Source code for groclient.experimental

import pandas as pd

from typing import Dict, List

from groclient.client import GroClient
from groclient import lib
from groclient.constants import V2_DATA_DESCRIPTION_PREFIX, V2_DATA_DESCRIPTION_ATTRS

class Experimental(GroClient):
    """The experimental client will introduce a range of experimental functions with better user experience.
    While you will be able to access better performance and new features at an early stage,
    you should be aware that things might change (e.g response format)."""

[docs] def get_data_points(self, **selections: Dict) -> List[Dict]: """This function is a mirror of existing :meth:`~groclient.GroClient.get_data_points`, but with limited scope. For example: - "Gro derived on-the-fly" is under development. - Many sources are still under migration (please refer to internal confluence page for source migration timeline) Parameters ---------- metric_id : integer How something is measured. e.g. "Export Value" or "Area Harvested" item_ids : integer or list of integers What is being measured. e.g. "Corn" or "Rainfall" region_ids : integer or list of integers Where something is being measured e.g. "United States Corn Belt" or "China" partner_region_ids : integer or list of integers, optional partner_region refers to an interaction between two regions, like trade or transportation. For example, for an Export metric, the "region" would be the exporter and the "partner_region" would be the importer. For most series, this can be excluded or set to 0 ("World") by default. source_id : integer frequency_id : integer unit_id : integer, optional start_date : string, optional All data points with end dates after this date. end_date : string, optional All data points with start dates before this date. coverage_threshold : float, optional Custom threshold on the coverage of geospatial data. Value should be between 0 and 1. Returns ------- dict dictionary containing list of data_points and series_description Example:: from groclient.experimental import Experimental exp_client = Experimental(access_token="your_token_here") exp_client.get_data_points( **{ 'metric_id': 2540047, 'item_ids': [3457], 'region_ids': [100023971, 100023990], 'frequency_id': 1, 'source_id': 26, 'start_date': '2021-12-20', 'end_date': '2021-12-21', } ) Returns:: [ { "data_points": [ { "value": 33.20465087890625, "start_timestamp": "1639958400", "end_timestamp": "1640044800" } ], "series_description": { "source_id": 26, "item_id": 3457, "metric_id": 2540047, "frequency_id": 1, "region_id": 100023971, "unit_id": 36 } }, { "data_points": [ { "value": 32.73432922363281, "start_timestamp": "1639958400", "end_timestamp": "1640044800" } ], "series_description": { "source_id": 26, "item_id": 3457, "metric_id": 2540047, "frequency_id": 1, "region_id": 100023990, "unit_id": 36 } } ] """ data_stream_list = lib.get_data_points_v2_prime( self.access_token, self.api_host, **selections ) # due to the issue in javascript when dealing with 'int64' # here we would manually convert timestamp from str to int for data_stream in data_stream_list: for data_point in data_stream['data_points']: data_point['start_timestamp'] = int(data_point['start_timestamp']) data_point['end_timestamp'] = int(data_point['end_timestamp']) return data_stream_list
def get_data_points_df(self, **selections: Dict) -> pd.DataFrame: """Call :meth:`~groclient.Experimental.get_data_points` and return as a combined dataframe. Parameters ---------- metric_id : integer How something is measured. e.g. "Export Value" or "Area Harvested" item_ids : integer or list of integers What is being measured. e.g. "Corn" or "Rainfall" region_ids : integer or list of integers Where something is being measured e.g. "United States Corn Belt" or "China" partner_region_ids : integer or list of integers, optional partner_region refers to an interaction between two regions, like trade or transportation. For example, for an Export metric, the "region" would be the exporter and the "partner_region" would be the importer. For most series, this can be excluded or set to 0 ("World") by default. source_id : integer frequency_id : integer unit_id : integer, optional start_date : string, optional All data points with end dates after this date. end_date : string, optional All data points with start dates before this date. coverage_threshold : float, optional Custom threshold on the coverage of geospatial data. Value should be between 0 and 1. Returns ------- pandas.DataFrame The results from :meth:`~groclient.Experimental.get_data_points`, appended together into a single dataframe. Data point attributes in timestamp format (e.g `start_timestamp`, `end_timestamp`) will be converted into human readable format (`YYYY-MM-DD`), and renamed as `start_date` and `end_date` Example:: from groclient.experimental import Experimental exp_client = Experimental(access_token="your_token_here") exp_client.get_data_points_df( **{ 'metric_id': 2540047, 'item_ids': [3457], 'region_ids': [100023971, 100023990], 'frequency_id': 1, 'source_id': 26, 'start_date': '2021-12-20', 'end_date': '2021-12-21', } ) Returns:: value start_timestamp end_timestamp metric_id item_id region_id partner_region_id frequency_id source_id unit_id 0 33.204651 2021-12-20 2021-12-21 2540047 3457 100023971 NaN 1 26 36 1 32.734329 2021-12-20 2021-12-21 2540047 3457 100023990 NaN 1 26 36 """ res = lib.get_data_points_v2_prime( self.access_token, self.api_host, **selections ) v2_data_description_meta = [ [V2_DATA_DESCRIPTION_PREFIX, x] for x in V2_DATA_DESCRIPTION_ATTRS ] df = pd.json_normalize( res, record_path=['data_points'], meta=v2_data_description_meta, errors='ignore' ) if not df.empty: ts_cols = ["start_timestamp", "end_timestamp"] df[ts_cols] = df[ts_cols].apply(pd.to_datetime, unit="s") df.columns = df.columns.str.replace('series_description.', '') df[V2_DATA_DESCRIPTION_ATTRS] = df[V2_DATA_DESCRIPTION_ATTRS].apply(pd.to_numeric) return df