Source code for sentier_peakachu.data

import warnings
from datetime import datetime

import pandas as pd
import sentier_data_tools as sdt

from sentier_peakachu.entsoe import get_generation_data
from sentier_peakachu.iri_mapping import (
    DIRTY_BONSAI_PRODUCT_IRIS_MAPPING,
    DIRTY_TRACE_AGGREGATION,
    ENTSOE_PRODUCT_IRIS_MAPPING,
    TRACE_PRODUCT_IRIS_MAPPING,
)
from sentier_peakachu.utils_location import get_geonames_iri_from_iso_code



[docs]
def create_local_electricity_datastorage(reset: bool = True):
    if reset:
        sdt.reset_local_database()
    start_time = pd.Timestamp("20221008", tz="Europe/Brussels")
    end_time = pd.Timestamp("20221009", tz="Europe/Brussels")
    create_country_mix_dataset("DE", start_time, end_time)

    create_plant_emission_datasets()
    create_bonsai_emission_factor_datasets()




[docs]
def create_country_mix_dataset(
    country_code: str, start_time: pd.Timestamp, end_time: pd.Timestamp
):
    # DF1

    metadata = sdt.Datapackage(
        name="electricity_markets",
        description="Electricity markets data from ENTSO-E",
        contributors=[
            {
                "title": "Peakachu",
                "path": "https://github.com/TimoDiepers/sentier_peakachu/",
                "role": "author",
            },
        ],
        homepage="https://github.com/TimoDiepers/sentier_peakachu/",
    ).metadata()

    df = get_generation_data(
        country_code=country_code,
        start=start_time,
        end=end_time,
    )
    df.index.name = "https://vocab.sentier.dev/units/quantity-kind/Time"
    df = df.reset_index()

    df = df.rename(columns=ENTSOE_PRODUCT_IRIS_MAPPING)

    units_tech = ["https://vocab.sentier.dev/units/unit/MegaW-HR"] * len(
        ENTSOE_PRODUCT_IRIS_MAPPING
    )  # MW not Mwh but no entry in SKOSMOS
    units_time = [
        "https://vocab.sentier.dev/units/unit/SEC",
    ]

    UNITS = units_time + units_tech

    sdt.Dataset(
        name="electricity mixes",
        dataframe=df,
        kind=sdt.DatasetKind.BOM,
        product="http://openenergy-platform.org/ontology/oeo/OEO_00000139",
        columns=[{"iri": x, "unit": y} for x, y in zip(df.columns, UNITS)],
        metadata=metadata,
        location=get_geonames_iri_from_iso_code(country_code),
        version=1,
        valid_from=datetime(2018, 1, 1),
        valid_to=datetime(2028, 1, 1),
    ).save()




[docs]
def create_plant_emission_datasets():
    # DF2
    metadata = sdt.Datapackage(
        name="emission data power plants",
        description="Climate trace emission data for power plants",
        contributors=[
            {
                "title": "Peakachu",
                "path": "https://github.com/TimoDiepers/sentier_peakachu/",
                "role": "author",
            },
        ],
        homepage="https://github.com/TimoDiepers/sentier_peakachu/",
    ).metadata()

    COLUMNS_POWERPLANTS = [
        "https://example.com/model-terms/identifier",  # to be added to SKOSMOS model terms
        "https://example.com/model-terms/name",  # to be added to SKOSMOS model terms
        "https://example.com/model-terms/start_time",  # to be added to SKOSMOS model terms
        "https://example.com/model-terms/end_time",  # to be added to SKOSMOS model terms
        "https://example.com/process-terms/powergeneration",  # to be added to SKOSMOS process terms
        "http://openenergy-platform.org/ontology/oeo/OEO_00260007",  # CO2 emission
    ]

    UNITS_POWERPLANTS = [
        "https://example.com/model-terms/integer",  # to be added to SKOSMOS model terms
        "https://example.com/model-terms/name",  # to be added to SKOSMOS model terms
        "https://vocab.sentier.dev/units/unit/SEC",
        "https://vocab.sentier.dev/units/unit/SEC",
        "https://vocab.sentier.dev/units/unit/MegaW-HR",
        "https://vocab.sentier.dev/units/unit/TON_Metric",
    ]

    trace_frame = pd.read_csv("../data/electricity-generation_emissions_sources.csv")

    filtered_df = trace_frame[trace_frame["gas"] == "co2e_100yr"]
    grouped_dfs = {
        name: group
        for name, group in filtered_df.groupby(["iso3_country", "source_type"])
    }

    for (country, source_type), df in grouped_dfs.items():
        if source_type not in TRACE_PRODUCT_IRIS_MAPPING.keys():
            warnings.warn(
                f"Source type {source_type} not found, skipping Dataset creation"
            )
            continue

        geonames_iri = get_geonames_iri_from_iso_code(country)
        if not geonames_iri:
            warnings.warn(
                f"Location not found for {country}, skipping Dataset creation"
            )
            continue
        filtered_df = df[
            [
                "source_id",
                "source_name",
                "start_time",
                "end_time",
                "activity",
                "emissions_quantity",
            ]
        ]
        filtered_df.columns = COLUMNS_POWERPLANTS
        valid_from_str = min(df["start_time"])
        valid_to_str = max(df["end_time"])

        sdt.Dataset(
            name=f"power plant data, {country}, {source_type}",
            dataframe=filtered_df,
            kind=sdt.DatasetKind.BOM,
            product=TRACE_PRODUCT_IRIS_MAPPING[source_type],
            columns=[
                {"iri": x, "unit": y} for x, y in zip(df.columns, UNITS_POWERPLANTS)
            ],
            metadata=metadata,
            location=geonames_iri,
            version=1,
            valid_from=datetime.strptime(valid_from_str, "%Y-%m-%d %H:%M:%S"),
            valid_to=datetime.strptime(valid_to_str, "%Y-%m-%d %H:%M:%S"),
        ).save()




[docs]
def create_bonsai_emission_factor_datasets():
    """
    Create datasets for emission factors for different sources of electricity from the bonsai database,
    splitting emissions in direct and indirect emissions.
    Unit is kg CO2-eq/kWh.
    """

    metadata = sdt.Datapackage(
        name="emission factors for regional electricity producing technologies",
        description="Bonsai emission factor data for regional electricity producing technologies",
        contributors=[
            {
                "title": "Karin Treyer",
                "path": "https://www.psi.ch/en/ta/people/karin-treyer",
                "role": "author",
            },
            {
                "title": "Chris Mutel",
                "path": "https://chris.mutel.org/",
                "role": "wrangler",
            },
        ],
        homepage="https://example.com/additional_inventories",
    ).metadata()

    UNITS_EMISSION_FACTORS = [
        "https://example.com/units/kgCO2eqPerkWh",
        "https://example.com/units/kgCO2eqPerkWh",
    ]
    COLUMNS_EMISSION_FACTORS = [
        "https://example.com/direct_CO2_emissions",
        "https://example.com/indirect_CO2_emissions",
    ]

    bonsai_frame = pd.read_csv("../data/bonsai_emission_factors.csv", delimiter=";")

    filtered_df = bonsai_frame[
        [
            "description",
            "region_code",
            "direct emission factor",
            "indirect emission factor",
        ]
    ]

    grouped_dfs = {
        name: group
        for name, group in filtered_df.groupby(["region_code", "description"])
    }

    for (country, technology), df in grouped_dfs.items():
        geonames_iri = get_geonames_iri_from_iso_code(country)
        if not geonames_iri:
            warnings.warn(
                f"Location not found for {country}, skipping Dataset creation"
            )
            continue
        technology_iri = DIRTY_BONSAI_PRODUCT_IRIS_MAPPING.get(technology)
        if not technology_iri:
            warnings.warn(
                f"Technology {technology} not found, skipping Dataset creation"
            )
            continue
        df = df[["direct emission factor", "indirect emission factor"]]
        df.columns = COLUMNS_EMISSION_FACTORS
        valid_from_str = "2016-01-01"  # Bonsai/EXIOBASE data from year 2016
        valid_to_str = "2024-12-31"

        sdt.Dataset(
            name=f"bonsai emission factors, {country}, {technology}",
            dataframe=df,
            kind=sdt.DatasetKind.BOM,
            product=technology_iri,
            columns=[
                {"iri": x, "unit": y}
                for x, y in zip(df.columns, UNITS_EMISSION_FACTORS)
            ],
            metadata=metadata,
            location=geonames_iri,
            version=1,
            valid_from=valid_from_str,
            valid_to=valid_to_str,
        ).save()