Source code for eeweather.ranking

#!/usr/bin/env python
# -*- coding: utf-8 -*-

   Copyright 2018 Open Energy Efficiency, Inc.

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   See the License for the specific language governing permissions and
   limitations under the License.

import pandas as pd
import numpy as np
import pyproj

import eeweather.mockable
from .exceptions import ISDDataNotAvailableError
from .connections import metadata_db_connection_proxy
from .geo import get_lat_long_climate_zones
from .stations import ISDStation
from .utils import lazy_property
from .warnings import EEWeatherWarning

__all__ = ("rank_stations", "combine_ranked_stations", "select_station")

class CachedData(object):
    def all_station_metadata(self):
        conn = metadata_db_connection_proxy.get_connection()
        cur = conn.cursor()
            , isd.latitude
            , isd.longitude
            , isd.iecc_climate_zone
            , isd.iecc_moisture_regime
            , isd.ba_climate_zone
            , isd.ca_climate_zone
            , isd.quality as rough_quality
            , isd.elevation
            , isd.state
            , tmy3.class as tmy3_class
            , tmy3.usaf_id is not null as is_tmy3
            , cz2010.usaf_id is not null as is_cz2010
            isd_station_metadata as isd
            left join cz2010_station_metadata as cz2010 on
              isd.usaf_id = cz2010.usaf_id
            left join tmy3_station_metadata as tmy3 on
              isd.usaf_id = tmy3.usaf_id
          order by

        df = pd.DataFrame(
                {col[0]: val for col, val in zip(cur.description, row)}
                for row in cur.fetchall()

        df["latitude"] = df.latitude.astype(float)
        df["longitude"] = df.longitude.astype(float)
        df["elevation"] = df.elevation.astype(float)
        df["is_tmy3"] = df.is_tmy3.astype(bool)
        df["is_cz2010"] = df.is_cz2010.astype(bool)
        return df

cached_data = CachedData()

def _combine_filters(filters, index):
    combined_filters = pd.Series(True, index=index)
    for f in filters:
        combined_filters &= f
    return combined_filters

[docs]def rank_stations( site_latitude, site_longitude, site_state=None, site_elevation=None, match_iecc_climate_zone=False, match_iecc_moisture_regime=False, match_ba_climate_zone=False, match_ca_climate_zone=False, match_state=False, minimum_quality=None, minimum_tmy3_class=None, max_distance_meters=None, max_difference_elevation_meters=None, is_tmy3=None, is_cz2010=None, ): """ Get a ranked, filtered set of candidate weather stations and metadata for a particular site. Parameters ---------- site_latitude : float Latitude of target site for which to find candidate weather stations. site_longitude : float Longitude of target site for which to find candidate weather stations. site_state : str, 2 letter abbreviation US state of target site, used optionally to filter potential candidate weather stations. Ignored unless ``match_state=True``. site_elevation : float Elevation of target site in meters, used optionally to filter potential candidate weather stations. Ignored unless ``max_difference_elevation_meters`` is set. match_iecc_climate_zone : bool If ``True``, filter candidate weather stations to those matching the IECC climate zone of the target site. match_iecc_moisture_regime : bool If ``True``, filter candidate weather stations to those matching the IECC moisture regime of the target site. match_ca_climate_zone : bool If ``True``, filter candidate weather stations to those matching the CA climate zone of the target site. match_ba_climate_zone : bool If ``True``, filter candidate weather stations to those matching the Building America climate zone of the target site. match_state : bool If ``True``, filter candidate weather stations to those matching the US state of the target site, as specified by ``site_state=True``. minimum_quality : str, ``'high'``, ``'medium'``, ``'low'`` If given, filter candidate weather stations to those meeting or exceeding the given quality, as summarized by the frequency and availability of observations in the NOAA Integrated Surface Database. minimum_tmy3_class : str, ``'I'``, ``'II'``, ``'III'`` If given, filter candidate weather stations to those meeting or exceeding the given class, as reported in the NREL TMY3 metadata. max_distance_meters : float If given, filter candidate weather stations to those within the ``max_distance_meters`` of the target site location. max_difference_elevation_meters : float If given, filter candidate weather stations to those with elevations within ``max_difference_elevation_meters`` of the target site elevation. is_tmy3 : bool If given, filter candidate weather stations to those for which TMY3 normal year temperature data is available. is_cz2010 : bool If given, filter candidate weather stations to those for which CZ2010 normal year temperature data is available. Returns ------- ranked_filtered_candidates : :any:`pandas.DataFrame` Index is ``usaf_id``. Each row contains a potential weather station match and metadata. Contains the following columns: - ``rank``: Rank of weather station match for the target site. - ``distance_meters``: Distance from target site to weather station site. - ``latitude``: Latitude of weather station site. - ``longitude``: Longitude of weather station site. - ``iecc_climate_zone``: IECC Climate Zone ID (1-8) - ``iecc_moisture_regime``: IECC Moisture Regime ID (A-C) - ``ba_climate_zone``: Building America climate zone name - ``ca_climate_zone``: Califoria climate zone number - ``rough_quality``: Approximate measure of frequency of ISD observations data at weather station. - ``elevation``: Elevation of weather station site, if available. - ``state``: US state of weather station site, if applicable. - ``tmy3_class``: Weather station class as reported by NREL TMY3, if available - ``is_tmy3``: Weather station has associated TMY3 data. - ``is_cz2010``: Weather station has associated CZ2010 data. - ``difference_elevation_meters``: Absolute difference in meters between target site elevation and weather station elevation, if available. """ candidates = cached_data.all_station_metadata # compute distances candidates_defined_lat_long = candidates[ candidates.latitude.notnull() & candidates.longitude.notnull() ] candidates_latitude = candidates_defined_lat_long.latitude candidates_longitude = candidates_defined_lat_long.longitude tiled_site_latitude = np.tile(site_latitude, candidates_latitude.shape) tiled_site_longitude = np.tile(site_longitude, candidates_longitude.shape) geod = pyproj.Geod(ellps="WGS84") dists = geod.inv( tiled_site_longitude, tiled_site_latitude, candidates_longitude.values, candidates_latitude.values, )[2] distance_meters = pd.Series(dists, index=candidates_defined_lat_long.index).reindex( candidates.index ) candidates["distance_meters"] = distance_meters if site_elevation is not None: difference_elevation_meters = (candidates.elevation - site_elevation).abs() else: difference_elevation_meters = None candidates["difference_elevation_meters"] = difference_elevation_meters site_climate_zones = get_lat_long_climate_zones(site_latitude, site_longitude) site_iecc_climate_zone = site_climate_zones["iecc_climate_zone"] site_iecc_moisture_regime = site_climate_zones["iecc_moisture_regime"] site_ca_climate_zone = site_climate_zones["ca_climate_zone"] site_ba_climate_zone = site_climate_zones["ba_climate_zone"] # create filters filters = [] if match_iecc_climate_zone: if site_iecc_climate_zone is None: filters.append(candidates.iecc_climate_zone.isnull()) else: filters.append(candidates.iecc_climate_zone == site_iecc_climate_zone) if match_iecc_moisture_regime: if site_iecc_moisture_regime is None: filters.append(candidates.iecc_moisture_regime.isnull()) else: filters.append(candidates.iecc_moisture_regime == site_iecc_moisture_regime) if match_ba_climate_zone: if site_ba_climate_zone is None: filters.append(candidates.ba_climate_zone.isnull()) else: filters.append(candidates.ba_climate_zone == site_ba_climate_zone) if match_ca_climate_zone: if site_ca_climate_zone is None: filters.append(candidates.ca_climate_zone.isnull()) else: filters.append(candidates.ca_climate_zone == site_ca_climate_zone) if match_state: if site_state is None: filters.append(candidates.state.isnull()) else: filters.append(candidates.state == site_state) if is_tmy3 is not None: filters.append(candidates.is_tmy3.isin([is_tmy3])) if is_cz2010 is not None: filters.append(candidates.is_cz2010.isin([is_cz2010])) if minimum_quality == "low": filters.append(candidates.rough_quality.isin(["high", "medium", "low"])) elif minimum_quality == "medium": filters.append(candidates.rough_quality.isin(["high", "medium"])) elif minimum_quality == "high": filters.append(candidates.rough_quality.isin(["high"])) if minimum_tmy3_class == "III": filters.append(candidates.tmy3_class.isin(["I", "II", "III"])) elif minimum_tmy3_class == "II": filters.append(candidates.tmy3_class.isin(["I", "II"])) elif minimum_tmy3_class == "I": filters.append(candidates.tmy3_class.isin(["I"])) if max_distance_meters is not None: filters.append(candidates.distance_meters <= max_distance_meters) if max_difference_elevation_meters is not None and site_elevation is not None: filters.append( candidates.difference_elevation_meters <= max_difference_elevation_meters ) combined_filters = _combine_filters(filters, candidates.index) filtered_candidates = candidates[combined_filters] ranked_filtered_candidates = filtered_candidates.sort_values(by=["distance_meters"]) # add rank column ranks = range(1, 1 + len(ranked_filtered_candidates)) ranked_filtered_candidates.insert(0, "rank", ranks) return ranked_filtered_candidates[ [ "rank", "distance_meters", "latitude", "longitude", "iecc_climate_zone", "iecc_moisture_regime", "ba_climate_zone", "ca_climate_zone", "rough_quality", "elevation", "state", "tmy3_class", "is_tmy3", "is_cz2010", "difference_elevation_meters", ] ]
[docs]def combine_ranked_stations(rankings): """ Combine :any:`pandas.DataFrame` s of candidate weather stations to form a hybrid ranking dataframe. Parameters ---------- rankings : list of :any:`pandas.DataFrame` Dataframes of ranked weather station candidates and metadata. All ranking dataframes should have the same columns and must be sorted by rank. Returns ------- ranked_filtered_candidates : :any:`pandas.DataFrame` Dataframe has a rank column and the same columns given in the source dataframes. """ if len(rankings) == 0: raise ValueError("Requires at least one ranking.") combined_ranking = rankings[0] for ranking in rankings[1:]: filtered_ranking = ranking[~ranking.index.isin(combined_ranking.index)] combined_ranking = pd.concat([combined_ranking, filtered_ranking]) combined_ranking["rank"] = range(1, 1 + len(combined_ranking)) return combined_ranking
@eeweather.mockable.mockable() def load_isd_hourly_temp_data( station, start_date, end_date, fetch_from_web ): # pragma: no cover return station.load_isd_hourly_temp_data( start_date, end_date, fetch_from_web=fetch_from_web )
[docs]def select_station( candidates, coverage_range=None, min_fraction_coverage=0.9, distance_warnings=(50000, 200000), rank=1, fetch_from_web=True, ): """ Select a station from a list of candidates that meets given data quality criteria. Parameters ---------- candidates : :any:`pandas.DataFrame` A dataframe of the form given by :any:`eeweather.rank_stations` or :any:`eeweather.combine_ranked_stations`, specifically having at least an index with ``usaf_id`` values and the column ``distance_meters``. Returns ------- isd_station, warnings : tuple of (:any:`eeweather.ISDStation`, list of str) A qualified weather station. ``None`` if no station meets criteria. """ def _test_station(station): if coverage_range is None: return True, [] else: start_date, end_date = coverage_range try: tempC, warnings = eeweather.mockable.load_isd_hourly_temp_data( station, start_date, end_date, fetch_from_web ) except ISDDataNotAvailableError: return False, [] # reject # TODO(philngo): also need to incorporate within-day limits if len(tempC) > 0: fraction_coverage = tempC.notnull().sum() / float(len(tempC)) return (fraction_coverage > min_fraction_coverage), warnings else: return False, [] # reject def _station_warnings(station, distance_meters): return [ EEWeatherWarning( qualified_name="eeweather.exceeds_maximum_distance", description=( "Distance from target to weather station is greater" "than the specified km." ), data={ "distance_meters": distance_meters, "max_distance_meters": d, "rank": rank, }, ) for d in distance_warnings if distance_meters > d ] n_stations_passed = 0 for usaf_id, row in candidates.iterrows(): station = ISDStation(usaf_id) test_result, warnings = _test_station(station) if test_result: n_stations_passed += 1 if n_stations_passed == rank: if not warnings: warnings = [] warnings.extend(_station_warnings(station, row.distance_meters)) return station, warnings no_station_warning = EEWeatherWarning( qualified_name="eeweather.no_weather_station_selected", description=( "No weather station found with the specified rank and" " minimum fracitional coverage." ), data={"rank": rank, "min_fraction_coverage": min_fraction_coverage}, ) return None, [no_station_warning]