calliope-project · brynpickering · May 2, 2024 · Apr 11, 2024 · Apr 11, 2024 · Apr 11, 2024
diff --git a/Snakefile b/Snakefile
@@ -16,6 +16,7 @@ techs_template_dir = f"{model_template_dir}techs/"
 
 include: "./rules/shapes.smk"
 include: "./rules/data.smk"
+include: "./rules/jrc-idees.smk"
 include: "./rules/wind-and-solar.smk"
 include: "./rules/biofuels.smk"
 include: "./rules/hydro.smk"
@@ -25,6 +26,7 @@ include: "./rules/nuclear.smk"
 include: "./rules/transport.smk"
 include: "./rules/sync.smk"
 include: "./rules/heat.smk"
+
 min_version("7.8")
 localrules: all, clean
 wildcard_constraints:

diff --git a/config/default.yaml b/config/default.yaml
@@ -40,6 +40,7 @@ data-pre-processing:
             SRB: [HUN]
             CYP: [ROU]
 root-directory: .
+max-threads: 2
 cluster-sync:
     url: euler.ethz.ch
     send-ignore: .syncignore-send

diff --git a/config/schema.yaml b/config/schema.yaml
@@ -144,6 +144,9 @@ properties:
     root-directory:
         type: string
         description: Path to the root directory of euro-calliope containing scripts and template folders.
+    max-threads:
+        type: integer
+        description: maximum available threads for multiprocessing, in those rules that are able to accept multiple threads.
     cluster-sync:
         type: object
         description: Configuration for the "work local, build on remote" workflow.

diff --git a/lib/eurocalliopelib/utils.py b/lib/eurocalliopelib/utils.py
@@ -1,6 +1,8 @@
 """Utility functions."""
 
+import pandas as pd
 import pycountry
+import xarray as xr
 
 
 def eu_country_code_to_iso3(eu_country_code):
@@ -16,10 +18,10 @@ def eu_country_code_to_iso3(eu_country_code):
 
 def convert_country_code(input_country, output="alpha3"):
     """
-    Converts input country code or name into either a 2- or 3-letter code.
+    Converts input country code or name into either either a 2- or 3-letter code.
 
     ISO alpha2: alpha2
-    ISO alpha2 with Eurostat codes: alpha2_eu
+    ISO alpha2 with Eurostat codes: alpha2_eurostat
     ISO alpha3: alpha3
 
     """
@@ -36,7 +38,7 @@ def convert_country_code(input_country, output="alpha3"):
     if output == "alpha2":
         return pycountry.countries.lookup(input_country).alpha_2
 
-    if output == "alpha2_eu":
+    if output == "alpha2_eurostat":
         result = pycountry.countries.lookup(input_country).alpha_2
         if result == "GB":
             return "UK"
@@ -49,12 +51,101 @@ def convert_country_code(input_country, output="alpha3"):
         return pycountry.countries.lookup(input_country).alpha_3
 
 
-# conversion utils
+def convert_valid_countries(country_codes: list, output: str = "alpha3") -> dict:
+    """
+    Convert a list of country codes / names to a list of uniform ISO coded country
+    codes. If an input item isn't a valid country (e.g. "EU27") then print the code and
+    continue, instead of raising an exception
+
+    Args:
+        country_codes (list):
+            Strings defining country codes / names
+            (["France", "FRA", "FR"] will all be treated the same)
+
+    Returns:
+        dict: Mapping from input country code/name to output country code for all valid input countries
+    """
+
+    mapped_codes = {}
+    for country_code in country_codes:
+        try:
+            mapped_codes[country_code] = convert_country_code(
+                country_code, output=output
+            )
+        except LookupError:
+            print(f"Skipping country/region {country_code} in annual energy balances")
+            continue
+    return mapped_codes
+
+
+def rename_and_groupby(
+    da: xr.DataArray,
+    rename_dict: dict,
+    dim_name: str,
+    new_dim_name: str = None,
+    dropna: bool = False,
+    keep_non_renamed: bool = False,
+) -> xr.DataArray:
+    """
+    Take an xarray dataarray and rename the contents of a given dimension
+    as well as (optionally) rename that dimension.
+    If renaming the contents has some overlap (e.g. {'foo' : 'A', 'bar': 'A'})
+    then the returned dataarray will be grouped over the new dimension items
+    (by summing the data).
+
+    Args:
+        da (xr.DataArray):
+            Input dataarray with the dimension "dim_name".
+        rename_dict (dict):
+            Dictionary to map items in the dimension "dim_name" to new names ({"old_item_name": "new_item_name"}).
+        dim_name (str):
+            Dimension on which to rename items.
+        new_dim_name (str, optional): Defaults to None.
+            If not None, rename the dimension "dim_name" to the given string.
+        dropna (bool, optional): Defaults to False.
+            If True, drop any items in "dim_name" after renaming/grouping which have all NaN values along all other dimensions.
+        keep_non_renamed (bool, optional): Defaults to False.
+            If False, any item in "dim_name" that is not referred to in "rename_dict" will be removed from that dimension in the returned array.
+    Returns:
+        (xr.DataArray): Same as "da" but with the items in "dim_name" renamed and possibly a. grouped, b. "dim_name" itself renamed.
+    """
+    rename_series = pd.Series(rename_dict).rename_axis(index=dim_name)
+    if keep_non_renamed is True:
+        existing_dim_items = da[dim_name].to_series()
+        rename_series = rename_series.reindex(existing_dim_items).fillna(
+            existing_dim_items
+        )
+
+    if new_dim_name is None:
+        new_dim_name = f"_{dim_name}"  # placeholder that we'll revert
+        revert_dim_name = True
+    else:
+        revert_dim_name = False
+
+    rename_da = xr.DataArray(rename_series.rename(new_dim_name))
+    da = (
+        da.reindex({dim_name: rename_da[dim_name]})
+        .groupby(rename_da)
+        .sum(dim_name, skipna=True, min_count=1, keep_attrs=True)
+    )
+    if revert_dim_name:
+        da = da.rename({new_dim_name: dim_name})
+        new_dim_name = dim_name
+    if dropna:
+        da = da.dropna(new_dim_name, how="all")
+    return da
+
+
 def ktoe_to_twh(array):
     """Convert KTOE to TWH"""
     return array * 1.163e-2
 
 
+def gwh_to_tj(array):
+    """Convert GWh to TJ"""
+    return array * 3.6
+
+
 def pj_to_twh(array):
     """Convert PJ to TWh"""
     return array / 3.6
@@ -63,8 +154,3 @@ def pj_to_twh(array):
 def tj_to_twh(array):
     """Convert TJ to TWh"""
     return pj_to_twh(array) / 1000
-
-
-def gwh_to_tj(array):
-    """Convert GWh to TJ"""
-    return array * 3.6
diff --git a/rules/data.smk b/rules/data.smk
@@ -43,50 +43,3 @@ rule annual_energy_balances:
         first_year = 2000
     conda: "../envs/default.yaml"
     script: "../scripts/data/annual_energy_balance.py"
-
-
-"Rules regarding JRC-IDEES Data:"
-
-
-rule download_jrc_idees_zipped:
-    message: "Download JRC IDEES zip file for {wildcards.country_code}"
-    params: url = config["data-sources"]["jrc-idees"]
-    output: protected("data/automatic/jrc-idees/{country_code}.zip")
-    conda: "../envs/shell.yaml"
-    localrule: True
-    shell: "curl -sSLo {output} '{params.url}'"
-
-
-def jrc_to_euro_calliope_sector(sector: str):
-    if sector == "transport":
-        return "Transport"
-    elif sector == "heat":
-        return "Tertiary"
-    else:
-        raise ValueError(f"Unknown sector {sector}.")
-
-
-rule jrc_idees_unzipped:
-    message: "Unzip all JRC-IDEES {wildcards.sector} sector country data"
-    input:
-        "data/automatic/jrc-idees/{country_code}.zip"
-    params:
-        file_name = lambda wildcards: f"JRC-IDEES-2015_{jrc_to_euro_calliope_sector(wildcards.sector)}_{wildcards.country_code}.xlsx"
-    wildcard_constraints:
-        sector = "transport|heat"
-    output: temp("build/data/jrc-idees/{sector}/unprocessed/{country_code}.xlsx")
-    conda: "../envs/shell.yaml"
-    shadow: "minimal"
-    localrule: True
-    shell: """
-    unzip -j {input} -d build/data/jrc-idees/{wildcards.sector}/unprocessed/
-    mv build/data/jrc-idees/{wildcards.sector}/unprocessed/{params.file_name} {output}
-    """
-
-
-"EU28 county codes used for downloading JRC-IDEES"
-JRC_IDEES_SCOPE = [
-    "AT", "BE", "BG", "CY", "CZ", "DE", "DK", "EE", "EL", "ES", "FI", "FR",
-    "HR", "HU", "IE", "IT", "LT", "LU", "LV", "MT", "NL", "PL", "PT", "RO",
-    "SE", "SI", "SK", "UK"
-]
diff --git a/rules/heat.smk b/rules/heat.smk
@@ -3,7 +3,7 @@ rule jrc_idees_heat_processed:
     input:
         data = expand(
             "build/data/jrc-idees/heat/unprocessed/{country_code}.xlsx",
-            country_code=JRC_IDEES_SCOPE
+            country_code=JRC_IDEES_SPATIAL_SCOPE
         )
     output: "build/data/jrc-idees/heat/commercial/processed.csv"
     conda: "../envs/default.yaml"

diff --git a/rules/jrc-idees.smk b/rules/jrc-idees.smk
@@ -0,0 +1,47 @@
+"Rules regarding JRC-IDEES Data"
+
+JRC_IDEES_SPATIAL_SCOPE = [
+    "AT", "BE", "BG", "CY", "CZ", "DE", "DK", "EE", "EL", "ES", "FI", "FR",
+    "HR", "HU", "IE", "IT", "LT", "LU", "LV", "MT", "NL", "PL", "PT", "RO",
+    "SE", "SI", "SK", "UK"
+]
+
+
+rule download_jrc_idees_zipped:
+    message: "Download JRC IDEES zip file for {wildcards.country_code}"
+    params: url = config["data-sources"]["jrc-idees"]
+    output: protected("data/automatic/jrc-idees/{country_code}.zip")
+    conda: "../envs/shell.yaml"
+    localrule: True
+    shell: "curl -sSLo {output} '{params.url}'"
+
+
+rule jrc_idees_unzipped:
+    message: "Unzip all JRC-IDEES {wildcards.sector} sector country data"
+    input:
+        countries = [
+            f"data/automatic/jrc-idees/{country_code}.zip"
+            for country_code in [
+                pycountry.countries.lookup(country).alpha_2 for country in config["scope"]["spatial"]["countries"]
+            ]
+            if country_code in JRC_IDEES_SPATIAL_SCOPE
+        ]
+    params: sector_title_case = lambda wildcards: wildcards.sector.title()
+    wildcard_constraints:
+        sector = "((industry)|(transport)|(tertiary))"
+    output: temp(directory("build/data/jrc-idees/{sector}/unprocessed"))
+    conda: "../envs/shell.yaml"
+    shell: "unzip 'data/automatic/jrc-idees/*.zip' '*{params.sector_title_case}*' -d {output}"
+
+
+
+rule jrc_idees_industry_processed:
+    message: "Process {wildcards.dataset} industry data from JRC-IDEES to be used in understanding current and future industry demand"
+    input:
+        unprocessed_data = "build/data/jrc-idees/industry/unprocessed"
+    output: "build/data/jrc-idees/industry/processed-{dataset}.nc"
+    wildcard_constraints:
+        dataset = "((energy)|(production))"
+    conda: "../envs/default.yaml"
+    threads: config["max-threads"]
+    script: "../scripts/jrc-idees/industry.py"
diff --git a/rules/transport.smk b/rules/transport.smk
@@ -16,7 +16,7 @@ rule jrc_idees_transport_processed:
     input:
         data = expand(
             "build/data/jrc-idees/transport/unprocessed/{country_code}.xlsx",
-            country_code=JRC_IDEES_SCOPE
+            country_code=JRC_IDEES_SPATIAL_SCOPE
         )
     output: "build/data/jrc-idees/transport/processed-{dataset}.csv"
     params: