Source code for sentier_peakachu.data

import warnings
from datetime import datetime

import pandas as pd
import sentier_data_tools as sdt

from sentier_peakachu.entsoe import get_generation_data
from sentier_peakachu.iri_mapping import (
    DIRTY_BONSAI_PRODUCT_IRIS_MAPPING,
    DIRTY_TRACE_AGGREGATION,
    ENTSOE_PRODUCT_IRIS_MAPPING,
    TRACE_PRODUCT_IRIS_MAPPING,
)
from sentier_peakachu.utils_location import get_geonames_iri_from_iso_code


[docs] def create_local_electricity_datastorage(reset: bool = True): if reset: sdt.reset_local_database() start_time = pd.Timestamp("20221008", tz="Europe/Brussels") end_time = pd.Timestamp("20221009", tz="Europe/Brussels") create_country_mix_dataset("DE", start_time, end_time) create_plant_emission_datasets() create_bonsai_emission_factor_datasets()
[docs] def create_country_mix_dataset( country_code: str, start_time: pd.Timestamp, end_time: pd.Timestamp ): # DF1 metadata = sdt.Datapackage( name="electricity_markets", description="Electricity markets data from ENTSO-E", contributors=[ { "title": "Peakachu", "path": "https://github.com/TimoDiepers/sentier_peakachu/", "role": "author", }, ], homepage="https://github.com/TimoDiepers/sentier_peakachu/", ).metadata() df = get_generation_data( country_code=country_code, start=start_time, end=end_time, ) df.index.name = "https://vocab.sentier.dev/units/quantity-kind/Time" df = df.reset_index() df = df.rename(columns=ENTSOE_PRODUCT_IRIS_MAPPING) units_tech = ["https://vocab.sentier.dev/units/unit/MegaW-HR"] * len( ENTSOE_PRODUCT_IRIS_MAPPING ) # MW not Mwh but no entry in SKOSMOS units_time = [ "https://vocab.sentier.dev/units/unit/SEC", ] UNITS = units_time + units_tech sdt.Dataset( name="electricity mixes", dataframe=df, kind=sdt.DatasetKind.BOM, product="http://openenergy-platform.org/ontology/oeo/OEO_00000139", columns=[{"iri": x, "unit": y} for x, y in zip(df.columns, UNITS)], metadata=metadata, location=get_geonames_iri_from_iso_code(country_code), version=1, valid_from=datetime(2018, 1, 1), valid_to=datetime(2028, 1, 1), ).save()
[docs] def create_plant_emission_datasets(): # DF2 metadata = sdt.Datapackage( name="emission data power plants", description="Climate trace emission data for power plants", contributors=[ { "title": "Peakachu", "path": "https://github.com/TimoDiepers/sentier_peakachu/", "role": "author", }, ], homepage="https://github.com/TimoDiepers/sentier_peakachu/", ).metadata() COLUMNS_POWERPLANTS = [ "https://example.com/model-terms/identifier", # to be added to SKOSMOS model terms "https://example.com/model-terms/name", # to be added to SKOSMOS model terms "https://example.com/model-terms/start_time", # to be added to SKOSMOS model terms "https://example.com/model-terms/end_time", # to be added to SKOSMOS model terms "https://example.com/process-terms/powergeneration", # to be added to SKOSMOS process terms "http://openenergy-platform.org/ontology/oeo/OEO_00260007", # CO2 emission ] UNITS_POWERPLANTS = [ "https://example.com/model-terms/integer", # to be added to SKOSMOS model terms "https://example.com/model-terms/name", # to be added to SKOSMOS model terms "https://vocab.sentier.dev/units/unit/SEC", "https://vocab.sentier.dev/units/unit/SEC", "https://vocab.sentier.dev/units/unit/MegaW-HR", "https://vocab.sentier.dev/units/unit/TON_Metric", ] trace_frame = pd.read_csv("../data/electricity-generation_emissions_sources.csv") filtered_df = trace_frame[trace_frame["gas"] == "co2e_100yr"] grouped_dfs = { name: group for name, group in filtered_df.groupby(["iso3_country", "source_type"]) } for (country, source_type), df in grouped_dfs.items(): if source_type not in TRACE_PRODUCT_IRIS_MAPPING.keys(): warnings.warn( f"Source type {source_type} not found, skipping Dataset creation" ) continue geonames_iri = get_geonames_iri_from_iso_code(country) if not geonames_iri: warnings.warn( f"Location not found for {country}, skipping Dataset creation" ) continue filtered_df = df[ [ "source_id", "source_name", "start_time", "end_time", "activity", "emissions_quantity", ] ] filtered_df.columns = COLUMNS_POWERPLANTS valid_from_str = min(df["start_time"]) valid_to_str = max(df["end_time"]) sdt.Dataset( name=f"power plant data, {country}, {source_type}", dataframe=filtered_df, kind=sdt.DatasetKind.BOM, product=TRACE_PRODUCT_IRIS_MAPPING[source_type], columns=[ {"iri": x, "unit": y} for x, y in zip(df.columns, UNITS_POWERPLANTS) ], metadata=metadata, location=geonames_iri, version=1, valid_from=datetime.strptime(valid_from_str, "%Y-%m-%d %H:%M:%S"), valid_to=datetime.strptime(valid_to_str, "%Y-%m-%d %H:%M:%S"), ).save()
[docs] def create_bonsai_emission_factor_datasets(): """ Create datasets for emission factors for different sources of electricity from the bonsai database, splitting emissions in direct and indirect emissions. Unit is kg CO2-eq/kWh. """ metadata = sdt.Datapackage( name="emission factors for regional electricity producing technologies", description="Bonsai emission factor data for regional electricity producing technologies", contributors=[ { "title": "Karin Treyer", "path": "https://www.psi.ch/en/ta/people/karin-treyer", "role": "author", }, { "title": "Chris Mutel", "path": "https://chris.mutel.org/", "role": "wrangler", }, ], homepage="https://example.com/additional_inventories", ).metadata() UNITS_EMISSION_FACTORS = [ "https://example.com/units/kgCO2eqPerkWh", "https://example.com/units/kgCO2eqPerkWh", ] COLUMNS_EMISSION_FACTORS = [ "https://example.com/direct_CO2_emissions", "https://example.com/indirect_CO2_emissions", ] bonsai_frame = pd.read_csv("../data/bonsai_emission_factors.csv", delimiter=";") filtered_df = bonsai_frame[ [ "description", "region_code", "direct emission factor", "indirect emission factor", ] ] grouped_dfs = { name: group for name, group in filtered_df.groupby(["region_code", "description"]) } for (country, technology), df in grouped_dfs.items(): geonames_iri = get_geonames_iri_from_iso_code(country) if not geonames_iri: warnings.warn( f"Location not found for {country}, skipping Dataset creation" ) continue technology_iri = DIRTY_BONSAI_PRODUCT_IRIS_MAPPING.get(technology) if not technology_iri: warnings.warn( f"Technology {technology} not found, skipping Dataset creation" ) continue df = df[["direct emission factor", "indirect emission factor"]] df.columns = COLUMNS_EMISSION_FACTORS valid_from_str = "2016-01-01" # Bonsai/EXIOBASE data from year 2016 valid_to_str = "2024-12-31" sdt.Dataset( name=f"bonsai emission factors, {country}, {technology}", dataframe=df, kind=sdt.DatasetKind.BOM, product=technology_iri, columns=[ {"iri": x, "unit": y} for x, y in zip(df.columns, UNITS_EMISSION_FACTORS) ], metadata=metadata, location=geonames_iri, version=1, valid_from=valid_from_str, valid_to=valid_to_str, ).save()