Skip to content

Commit

Permalink
added time_shift argument to deriver and data handler so that daily d…
Browse files Browse the repository at this point in the history
…ata time index can be shifted to start at the beginning of the day instead of at noon. GCM data frequently stamps daily data at noon instead of the beginning of the day. This caused an issue with the solar module thinking that given gan data had 48 time steps, since the time index had two unique day values, even though there were only 24 time steps from noon to noon on each day.
  • Loading branch information
bnb32 committed Sep 4, 2024
1 parent 7e90cb0 commit c21ed3c
Show file tree
Hide file tree
Showing 4 changed files with 32 additions and 9 deletions.
12 changes: 6 additions & 6 deletions sup3r/bias/bias_calc_vortex.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,12 @@
import dask
import numpy as np
import pandas as pd
import xarray as xr
from rex import Resource
from scipy.interpolate import interp1d

from sup3r.postprocessing import OutputHandler, RexOutputs
from sup3r.utilities import VERSION_RECORD
from sup3r.utilities.utilities import xr_open_mfdataset

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -114,7 +114,7 @@ def convert_month_height_tif(self, month, height):
os.remove(outfile)

if not os.path.exists(outfile) or self.overwrite:
ds = xr_open_mfdataset(infile)
ds = xr.open_mfdataset(infile)
ds = ds.rename(
{
'band_data': f'windspeed_{height}m',
Expand Down Expand Up @@ -142,7 +142,7 @@ def convert_all_tifs(self):
def mask(self):
"""Mask coordinates without data"""
if self._mask is None:
with xr_open_mfdataset(self.get_height_files('January')) as res:
with xr.open_mfdataset(self.get_height_files('January')) as res:
mask = (res[self.in_features[0]] != -999) & (
~np.isnan(res[self.in_features[0]])
)
Expand Down Expand Up @@ -173,13 +173,13 @@ def get_month(self, month):

if os.path.exists(month_file) and not self.overwrite:
logger.info(f'Loading month_file {month_file}.')
data = xr_open_mfdataset(month_file)
data = xr.open_mfdataset(month_file)
else:
logger.info(
'Getting mean windspeed for all heights '
f'({self.in_heights}) for {month}'
)
data = xr_open_mfdataset(self.get_height_files(month))
data = xr.open_mfdataset(self.get_height_files(month))
logger.info(
'Interpolating windspeed for all heights '
f'({self.out_heights}) for {month}.'
Expand Down Expand Up @@ -239,7 +239,7 @@ def interp(self, data):

def get_lat_lon(self):
"""Get lat lon grid"""
with xr_open_mfdataset(self.get_height_files('January')) as res:
with xr.open_mfdataset(self.get_height_files('January')) as res:
lons, lats = np.meshgrid(
res['longitude'].values, res['latitude'].values
)
Expand Down
9 changes: 8 additions & 1 deletion sup3r/preprocessing/data_handlers/factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ def __init__(
time_slice: Union[slice, tuple, list, None] = slice(None),
threshold: Optional[float] = None,
time_roll: int = 0,
time_shift: Optional[int] = None,
hr_spatial_coarsen: int = 1,
nan_method_kwargs: Optional[dict] = None,
BaseLoader: Optional[Callable] = None,
Expand Down Expand Up @@ -91,8 +92,13 @@ def __init__(
are more than this value away from the target lat/lon, an error is
raised.
time_roll : int
Number of steps to shift the time axis. `Passed to
Number of steps to roll along the time axis. `Passed to
xr.Dataset.roll()`
time_shift : int | None
Number of minutes to shift time axis. This can be used, for
example, to shift the time index for daily data so that the time
stamp for a given day starts at the zeroth minute instead of at
noon, as is the case for most GCM data.
hr_spatial_coarsen : int
Spatial coarsening factor. Passed to `xr.Dataset.coarsen()`
nan_method_kwargs : str | dict | None
Expand Down Expand Up @@ -145,6 +151,7 @@ def __init__(
data=self.rasterizer.data,
features=features,
time_roll=time_roll,
time_shift=time_shift,
hr_spatial_coarsen=hr_spatial_coarsen,
nan_method_kwargs=nan_method_kwargs,
FeatureRegistry=FeatureRegistry,
Expand Down
14 changes: 13 additions & 1 deletion sup3r/preprocessing/derivers/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -319,6 +319,7 @@ def __init__(
data: Union[Sup3rX, Sup3rDataset],
features,
time_roll=0,
time_shift=None,
hr_spatial_coarsen=1,
nan_method_kwargs=None,
FeatureRegistry=None,
Expand All @@ -332,8 +333,13 @@ def __init__(
features: list
List of features to derive
time_roll: int
Number of steps to shift the time axis. `Passed to
Number of steps to roll along the time axis. `Passed to
xr.Dataset.roll()`
time_shift: int | None
Number of minutes to shift time axis. This can be used, for
example, to shift the time index for daily data so that the time
stamp for a given day starts at the zeroth minute instead of at
noon, as is the case for most GCM data.
hr_spatial_coarsen: int
Spatial coarsening factor. Passed to `xr.Dataset.coarsen()`
nan_method_kwargs: str | dict | None
Expand All @@ -358,6 +364,12 @@ def __init__(
logger.debug('Applying time_roll=%s to data array', time_roll)
self.data = self.data.roll(**{Dimension.TIME: time_roll})

if time_shift is not None:
logger.debug('Applying time_shift=%s to time index', time_shift)
self.data.time_index = self.data.time_index.shift(
time_shift, freq='min'
)

if hr_spatial_coarsen > 1:
logger.debug(
'Applying hr_spatial_coarsen=%s to data.', hr_spatial_coarsen
Expand Down
6 changes: 5 additions & 1 deletion sup3r/solar/solar_cli.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
"""sup3r solar CLI entry points."""
"""sup3r solar CLI entry points.
TODO: This should be modified to enable distribution of file groups across
nodes instead of requesting a node for a single file
"""
import copy
import logging
import os
Expand Down

0 comments on commit c21ed3c

Please sign in to comment.