Skip to content

Commit

Permalink
Merge pull request #1616 from opendatacube/skip_broken_envvar
Browse files Browse the repository at this point in the history
Add config options for `dc.load`
  • Loading branch information
Ariana-B committed Jul 25, 2024
2 parents b55953d + e4d97dd commit c3cd3a8
Show file tree
Hide file tree
Showing 7 changed files with 46 additions and 9 deletions.
11 changes: 10 additions & 1 deletion datacube/api/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ def load(self,
resolution: int | float | tuple[int | float, int | float] | Resolution | None = None,
resampling: Resampling | dict[str, Resampling] | None = None,
align: XY[float] | Iterable[float] | None = None,
skip_broken_datasets: bool = False,
skip_broken_datasets: bool | None = None,
dask_chunks: dict[str, str | int] | None = None,
like: GeoBox | xarray.Dataset | xarray.DataArray | None = None,
fuse_func: FuserFunction | Mapping[str, FuserFunction | None] | None = None,
Expand Down Expand Up @@ -423,6 +423,7 @@ def load(self,
:param bool skip_broken_datasets:
Optional. If this is True, then don't break when failing to load a broken dataset.
If None, the value will come from the environment variable of the same name.
Default is False.
:param dict dask_chunks:
Expand Down Expand Up @@ -475,6 +476,7 @@ def filter_jan(dataset): return dataset.time.begin.month == 1
:param limit:
Optional. If provided, limit the maximum number of datasets returned. Useful for testing and debugging.
Can also be provided via the ``dc_load_limit`` config option.
:param driver:
Optional. If provided, use the specified driver to load the data.
Expand All @@ -494,6 +496,9 @@ def filter_jan(dataset): return dataset.time.begin.month == 1

if datasets is None:
assert product is not None # For type checker
if limit is None:
# check if a value was provided via the envvar
limit = self.index.environment["dc_load_limit"]
datasets = self.find_datasets(ensure_location=True,
dataset_predicate=dataset_predicate, like=like,
limit=limit,
Expand Down Expand Up @@ -547,6 +552,10 @@ def filter_jan(dataset): return dataset.time.begin.month == 1

measurement_dicts = datacube_product.lookup_measurements(measurements)

if skip_broken_datasets is None:
# default to value from env var, which defaults to False
skip_broken_datasets = self.index.environment["skip_broken_datasets"]

result = self.load_data(grouped, geobox,
measurement_dicts,
resampling=resampling,
Expand Down
6 changes: 4 additions & 2 deletions datacube/cfg/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

from .cfg import find_config, parse_text
from .exceptions import ConfigException
from .opt import ODCOptionHandler, AliasOptionHandler, IndexDriverOptionHandler
from .opt import ODCOptionHandler, AliasOptionHandler, IndexDriverOptionHandler, BoolOptionHandler, IntOptionHandler
from .utils import ConfigDict, check_valid_env_name


Expand Down Expand Up @@ -272,7 +272,9 @@ def __init__(self,

self._option_handlers: list[ODCOptionHandler] = [
AliasOptionHandler("alias", self),
IndexDriverOptionHandler("index_driver", self, default="default")
IndexDriverOptionHandler("index_driver", self, default="default"),
BoolOptionHandler("skip_broken_datasets", self, default=False),
IntOptionHandler("dc_load_limit", self, minval=0),
]

def get_all_aliases(self):
Expand Down
30 changes: 24 additions & 6 deletions datacube/cfg/opt.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,23 +90,25 @@ def get_val_from_environment(self) -> str | None:
2. Check canonical envvar name first. E.g. option "bar" in environment "foo" -> $ODC_FOO_BAR
3. Check canonical envvar name for any alias environments that point to this one.
4. Check any legacy envvar names, and raise warnings if found.
5. Check global envvar name, denoted by "all" instead of an environment name
:return: First environment variable with non-empty value, or None if none found.
"""
if self.allow_envvar_lookup and self.env._allow_envvar_overrides:
canonical_name = f"odc_{self.env._name}_{self.name}".upper()
for env_name in self.env.get_all_aliases():
envvar_name = f"odc_{env_name}_{self.name}".upper()
val = os.environ.get(envvar_name)
if val:
if val := os.environ.get(envvar_name):
return val
for env_name in self.legacy_env_aliases:
val = os.environ.get(env_name)
if val:
for envvar_name in self.legacy_env_aliases:
if val := os.environ.get(envvar_name):
warnings.warn(
f"Config being passed in by legacy environment variable ${env_name}. "
f"Config being passed in by legacy environment variable ${envvar_name}. "
f"Please use ${canonical_name} instead.")
return val
global_name = f"odc_all_{self.name}".upper()
if val := os.environ.get(global_name):
return val
return None


Expand Down Expand Up @@ -157,6 +159,8 @@ def __init__(self, *args, minval: int | None = None, maxval: int | None = None,
def validate_and_normalise(self, value: Any) -> Any:
# Call super() to get handle default value
value = super().validate_and_normalise(value)
if value is None:
return value
try:
ival = int(value)
except ValueError:
Expand All @@ -168,6 +172,20 @@ def validate_and_normalise(self, value: Any) -> Any:
return ival


class BoolOptionHandler(ODCOptionHandler):
"""
Handle config option expecting a boolean value
"""
def validate_and_normalise(self, value: Any) -> Any:
value = super().validate_and_normalise(value)
if isinstance(value, bool):
return value
elif isinstance(value, str) and value.lower() == "true":
return True
else:
return False


class IAMAuthenticationOptionHandler(ODCOptionHandler):
"""
A simple boolean, compatible with the historic behaviour of the IAM Authentication on/off option.
Expand Down
2 changes: 2 additions & 0 deletions datacube/cfg/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ def check_valid_env_name(name: str) -> None:
"""
if not re.fullmatch(r"^[a-z][a-z0-9]*$", name):
raise ConfigException(f'Environment names must consist of only lower case letters and numbers: {name}')
if name.lower() == "all":
raise ConfigException('Environments cannot be named "ALL"')


def check_valid_option(name: str) -> None:
Expand Down
2 changes: 2 additions & 0 deletions datacube/scripts/system.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,8 @@ def echo_field(name, value):
echo_field('Index Driver', cfg_env.index_driver)
db_url = psql_url_from_config(cfg_env)
echo_field('Database URL:', db_url)
echo_field('Skip broken datasets:', cfg_env.skip_broken_datasets)
echo_field('Datacube load limit:', cfg_env.dc_load_limit)

echo()
echo('Valid connection:\t', nl=False)
Expand Down
2 changes: 2 additions & 0 deletions docs/about/whats_new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ v1.9.next
=========

- Permissions management cleanup in postgis driver. (:pull:`1613`)
- Add `skip_broken_datasets` and `dc_load_limit` config options. (:pull:`1616`)
- Enable global environment variables with `ODC_ALL` naming convention (:pull:`1616`)

v1.9.0-rc9 (3rd July 2024)
==========================
Expand Down
2 changes: 2 additions & 0 deletions integration_tests/test_config_tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,8 @@ def test_config_check(clirunner, index, cfg_env):

assert cfg_env['db_hostname'] in result.output
assert cfg_env['db_username'] in result.output
assert str(cfg_env['skip_broken_datasets']) in result.output
assert str(cfg_env['dc_load_limit']) in result.output


def test_list_users_does_not_fail(clirunner, cfg_env, index):
Expand Down

0 comments on commit c3cd3a8

Please sign in to comment.