From 4808a6c83a712a12a4824530a2f1980b6c2f5118 Mon Sep 17 00:00:00 2001 From: Bas des Tombe Date: Mon, 31 Jul 2023 18:22:56 +0200 Subject: [PATCH 01/26] Deprecate "transient_asym_att_x", "transient_att_x" in favor of trans_att The x locations along the x-dimension where there is something, e.g. a connector, that introduces asymmetrical attenuation --- src/dtscalibration/datastore.py | 25 ++----------------------- 1 file changed, 2 insertions(+), 23 deletions(-) diff --git a/src/dtscalibration/datastore.py b/src/dtscalibration/datastore.py index ae5c92b0..f11d5194 100644 --- a/src/dtscalibration/datastore.py +++ b/src/dtscalibration/datastore.py @@ -783,8 +783,8 @@ def check_deprecated_kwargs(self, kwargs): ds.tmpw.plot() """ - list_of_depr = ["st_label", "ast_label", "rst_label", "rast_label"] - list_of_pending_depr = ["transient_asym_att_x", "transient_att_x"] + list_of_depr = ["st_label", "ast_label", "rst_label", "rast_label", "transient_asym_att_x", "transient_att_x"] + list_of_pending_depr = [] kwargs = {k: v for k, v in kwargs.items() if k not in list_of_pending_depr} @@ -1671,23 +1671,6 @@ def set_trans_att(self, trans_att=None, **kwargs): If multiple locations are defined, the losses are added. """ - - if "transient_att_x" in kwargs: - warnings.warn( - "transient_att_x argument will be deprecated in version 2, " - "use trans_att", - DeprecationWarning, - ) - trans_att = kwargs["transient_att_x"] - - if "transient_asym_att_x" in kwargs: - warnings.warn( - "transient_asym_att_x arg will be deprecated in version 2, " - "use trans_att", - DeprecationWarning, - ) - trans_att = kwargs["transient_asym_att_x"] - if "trans_att" in self.coords and self.trans_att.size > 0: raise_warning = 0 @@ -1859,8 +1842,6 @@ def calibration_single_ended( has three items. The first two items are the slices of the sections that are matched. The third item is a boolean and is True if the two sections have a reverse direction ("J-configuration"). - transient_att_x, transient_asym_att_x : iterable, optional - Depreciated. See trans_att trans_att : iterable, optional Splices can cause jumps in differential attenuation. Normal single ended calibration assumes these are not present. An additional loss @@ -2408,8 +2389,6 @@ def calibration_double_ended( memory, is faster, and gives the same result as the statsmodels solver. The statsmodels solver is mostly used to check the sparse solver. `'stats'` is the default. - transient_att_x, transient_asym_att_x : iterable, optional - Depreciated. See trans_att trans_att : iterable, optional Splices can cause jumps in differential attenuation. Normal single ended calibration assumes these are not present. An additional loss From cd1c2215f2566ac4098b162ce715956d727fa82b Mon Sep 17 00:00:00 2001 From: Bas des Tombe Date: Mon, 31 Jul 2023 18:38:02 +0200 Subject: [PATCH 02/26] Deprecate transient_att_x from tests in favor of trans_att --- tests/test_dtscalibration.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_dtscalibration.py b/tests/test_dtscalibration.py index c297633d..6235b4e8 100644 --- a/tests/test_dtscalibration.py +++ b/tests/test_dtscalibration.py @@ -1458,7 +1458,7 @@ def test_double_ended_asymmetrical_attenuation(): rast_var=1.0, method="wls", solver="sparse", - transient_asym_att_x=[50.0], + trans_att=[50.0], ) assert_almost_equal_verbose(temp_real_celsius, ds.tmpf.values, decimal=7) @@ -3149,7 +3149,7 @@ def test_single_ended_trans_att_synthetic(): st_var=1.0, ast_var=1.0, method="wls", - transient_att_x=[40, 60], + trans_att=[40, 60], solver="sparse", ) From 701f59946854be4f0a1cda27cff5d2dcbfc902ea Mon Sep 17 00:00:00 2001 From: Bas des Tombe Date: Mon, 31 Jul 2023 18:46:56 +0200 Subject: [PATCH 03/26] black formatting --- src/dtscalibration/datastore.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/dtscalibration/datastore.py b/src/dtscalibration/datastore.py index f11d5194..769147a9 100644 --- a/src/dtscalibration/datastore.py +++ b/src/dtscalibration/datastore.py @@ -783,7 +783,14 @@ def check_deprecated_kwargs(self, kwargs): ds.tmpw.plot() """ - list_of_depr = ["st_label", "ast_label", "rst_label", "rast_label", "transient_asym_att_x", "transient_att_x"] + list_of_depr = [ + "st_label", + "ast_label", + "rst_label", + "rast_label", + "transient_asym_att_x", + "transient_att_x", + ] list_of_pending_depr = [] kwargs = {k: v for k, v in kwargs.items() if k not in list_of_pending_depr} From 3ba2b20c5f218723d15a2039f535b1b60f0d4c86 Mon Sep 17 00:00:00 2001 From: Bas des Tombe Date: Mon, 31 Jul 2023 20:40:54 +0200 Subject: [PATCH 04/26] Inconsistent ds.sections behavior Closes #138 --- src/dtscalibration/datastore.py | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/src/dtscalibration/datastore.py b/src/dtscalibration/datastore.py index 769147a9..ba46989c 100644 --- a/src/dtscalibration/datastore.py +++ b/src/dtscalibration/datastore.py @@ -979,10 +979,8 @@ def variance_stokes_constant(self, st_label, sections=None, reshape_residuals=Tr dtscalibration/python-dts-calibration/blob/main/examples/notebooks/\ 04Calculate_variance_Stokes.ipynb>`_ """ - if sections: - self.sections = sections - else: - assert self.sections, "sections are not defined" + if sections is None: + sections = self.sections assert self[st_label].dims[0] == "x", "Stokes are transposed" @@ -1155,10 +1153,8 @@ def variance_stokes_exponential( dtscalibration/python-dts-calibration/blob/main/examples/notebooks/\ 04Calculate_variance_Stokes.ipynb>`_ """ - if sections: - self.sections = sections - else: - assert self.sections, "sections are not defined" + if sections is None: + sections = self.sections assert self[st_label].dims[0] == "x", "Stokes are transposed" @@ -1410,10 +1406,8 @@ def variance_stokes_linear( """ import matplotlib.pyplot as plt - if sections: - self.sections = sections - else: - assert self.sections, "sections are not defined" + if sections is None: + sections = self.sections assert self[st_label].dims[0] == "x", "Stokes are transposed" _, resid = self.variance_stokes(st_label=st_label) From 78c6aa7cdfffd5448a61dc4a6c304b913248799e Mon Sep 17 00:00:00 2001 From: Bas des Tombe Date: Thu, 3 Aug 2023 19:18:20 +0200 Subject: [PATCH 05/26] Removing api directory from docs as Reference contains the same info --- docs/api/dtscalibration.DataStore.rst | 83 ------------------- docs/api/dtscalibration.check_dims.rst | 6 -- ...dtscalibration.check_timestep_allclose.rst | 6 -- .../dtscalibration.get_netcdf_encoding.rst | 6 -- .../api/dtscalibration.merge_double_ended.rst | 6 -- docs/api/dtscalibration.open_datastore.rst | 6 -- docs/api/dtscalibration.open_mf_datastore.rst | 6 -- docs/api/dtscalibration.plot_accuracy.rst | 6 -- ...n.plot_location_residuals_double_ended.rst | 6 -- ...tion.plot_residuals_reference_sections.rst | 6 -- ...ot_residuals_reference_sections_single.rst | 6 -- docs/api/dtscalibration.plot_sigma_report.rst | 6 -- .../dtscalibration.read_apsensing_files.rst | 6 -- .../dtscalibration.read_sensornet_files.rst | 6 -- .../dtscalibration.read_sensortran_files.rst | 6 -- docs/api/dtscalibration.read_silixa_files.rst | 6 -- .../api/dtscalibration.shift_double_ended.rst | 6 -- ...ation.suggest_cable_shift_double_ended.rst | 6 -- docs/conf.py | 1 + docs/index.rst | 2 +- docs/requirements.txt | 6 -- pyproject.toml | 3 + 22 files changed, 5 insertions(+), 192 deletions(-) delete mode 100644 docs/api/dtscalibration.DataStore.rst delete mode 100644 docs/api/dtscalibration.check_dims.rst delete mode 100644 docs/api/dtscalibration.check_timestep_allclose.rst delete mode 100644 docs/api/dtscalibration.get_netcdf_encoding.rst delete mode 100644 docs/api/dtscalibration.merge_double_ended.rst delete mode 100644 docs/api/dtscalibration.open_datastore.rst delete mode 100644 docs/api/dtscalibration.open_mf_datastore.rst delete mode 100644 docs/api/dtscalibration.plot_accuracy.rst delete mode 100644 docs/api/dtscalibration.plot_location_residuals_double_ended.rst delete mode 100644 docs/api/dtscalibration.plot_residuals_reference_sections.rst delete mode 100644 docs/api/dtscalibration.plot_residuals_reference_sections_single.rst delete mode 100644 docs/api/dtscalibration.plot_sigma_report.rst delete mode 100644 docs/api/dtscalibration.read_apsensing_files.rst delete mode 100644 docs/api/dtscalibration.read_sensornet_files.rst delete mode 100644 docs/api/dtscalibration.read_sensortran_files.rst delete mode 100644 docs/api/dtscalibration.read_silixa_files.rst delete mode 100644 docs/api/dtscalibration.shift_double_ended.rst delete mode 100644 docs/api/dtscalibration.suggest_cable_shift_double_ended.rst diff --git a/docs/api/dtscalibration.DataStore.rst b/docs/api/dtscalibration.DataStore.rst deleted file mode 100644 index 102209b0..00000000 --- a/docs/api/dtscalibration.DataStore.rst +++ /dev/null @@ -1,83 +0,0 @@ -DataStore -========= - -.. currentmodule:: dtscalibration - -.. autoclass:: DataStore - :show-inheritance: - - .. rubric:: Attributes Summary - - .. autosummary:: - - ~DataStore.channel_configuration - ~DataStore.chbw - ~DataStore.chfw - ~DataStore.is_double_ended - ~DataStore.sections - ~DataStore.timeseries_keys - - .. rubric:: Methods Summary - - .. autosummary:: - - ~DataStore.average_double_ended - ~DataStore.average_single_ended - ~DataStore.calibration_double_ended - ~DataStore.calibration_single_ended - ~DataStore.check_deprecated_kwargs - ~DataStore.conf_int_double_ended - ~DataStore.conf_int_single_ended - ~DataStore.get_default_encoding - ~DataStore.get_section_indices - ~DataStore.get_time_dim - ~DataStore.i_var - ~DataStore.in_confidence_interval - ~DataStore.inverse_variance_weighted_mean - ~DataStore.inverse_variance_weighted_mean_array - ~DataStore.rename_labels - ~DataStore.resample_datastore - ~DataStore.temperature_residuals - ~DataStore.to_mf_netcdf - ~DataStore.to_netcdf - ~DataStore.ufunc_per_section - ~DataStore.variance_stokes - ~DataStore.variance_stokes_constant - ~DataStore.variance_stokes_exponential - ~DataStore.variance_stokes_linear - - .. rubric:: Attributes Documentation - - .. autoattribute:: channel_configuration - .. autoattribute:: chbw - .. autoattribute:: chfw - .. autoattribute:: is_double_ended - .. autoattribute:: sections - .. autoattribute:: timeseries_keys - - .. rubric:: Methods Documentation - - .. automethod:: average_double_ended - .. automethod:: average_single_ended - .. automethod:: calibration_double_ended - .. automethod:: calibration_single_ended - .. automethod:: check_deprecated_kwargs - .. automethod:: conf_int_double_ended - .. automethod:: conf_int_single_ended - .. automethod:: get_default_encoding - .. automethod:: get_section_indices - .. automethod:: get_time_dim - .. automethod:: i_var - .. automethod:: in_confidence_interval - .. automethod:: inverse_variance_weighted_mean - .. automethod:: inverse_variance_weighted_mean_array - .. automethod:: rename_labels - .. automethod:: resample_datastore - .. automethod:: temperature_residuals - .. automethod:: to_mf_netcdf - .. automethod:: to_netcdf - .. automethod:: ufunc_per_section - .. automethod:: variance_stokes - .. automethod:: variance_stokes_constant - .. automethod:: variance_stokes_exponential - .. automethod:: variance_stokes_linear diff --git a/docs/api/dtscalibration.check_dims.rst b/docs/api/dtscalibration.check_dims.rst deleted file mode 100644 index 1dc68e7e..00000000 --- a/docs/api/dtscalibration.check_dims.rst +++ /dev/null @@ -1,6 +0,0 @@ -check_dims -========== - -.. currentmodule:: dtscalibration - -.. autofunction:: check_dims diff --git a/docs/api/dtscalibration.check_timestep_allclose.rst b/docs/api/dtscalibration.check_timestep_allclose.rst deleted file mode 100644 index d655bfe4..00000000 --- a/docs/api/dtscalibration.check_timestep_allclose.rst +++ /dev/null @@ -1,6 +0,0 @@ -check_timestep_allclose -======================= - -.. currentmodule:: dtscalibration - -.. autofunction:: check_timestep_allclose diff --git a/docs/api/dtscalibration.get_netcdf_encoding.rst b/docs/api/dtscalibration.get_netcdf_encoding.rst deleted file mode 100644 index 820b0a0f..00000000 --- a/docs/api/dtscalibration.get_netcdf_encoding.rst +++ /dev/null @@ -1,6 +0,0 @@ -get_netcdf_encoding -=================== - -.. currentmodule:: dtscalibration - -.. autofunction:: get_netcdf_encoding diff --git a/docs/api/dtscalibration.merge_double_ended.rst b/docs/api/dtscalibration.merge_double_ended.rst deleted file mode 100644 index c8e512b5..00000000 --- a/docs/api/dtscalibration.merge_double_ended.rst +++ /dev/null @@ -1,6 +0,0 @@ -merge_double_ended -================== - -.. currentmodule:: dtscalibration - -.. autofunction:: merge_double_ended diff --git a/docs/api/dtscalibration.open_datastore.rst b/docs/api/dtscalibration.open_datastore.rst deleted file mode 100644 index 95a987bd..00000000 --- a/docs/api/dtscalibration.open_datastore.rst +++ /dev/null @@ -1,6 +0,0 @@ -open_datastore -============== - -.. currentmodule:: dtscalibration - -.. autofunction:: open_datastore diff --git a/docs/api/dtscalibration.open_mf_datastore.rst b/docs/api/dtscalibration.open_mf_datastore.rst deleted file mode 100644 index edab4ebe..00000000 --- a/docs/api/dtscalibration.open_mf_datastore.rst +++ /dev/null @@ -1,6 +0,0 @@ -open_mf_datastore -================= - -.. currentmodule:: dtscalibration - -.. autofunction:: open_mf_datastore diff --git a/docs/api/dtscalibration.plot_accuracy.rst b/docs/api/dtscalibration.plot_accuracy.rst deleted file mode 100644 index a2e41fc2..00000000 --- a/docs/api/dtscalibration.plot_accuracy.rst +++ /dev/null @@ -1,6 +0,0 @@ -plot_accuracy -============= - -.. currentmodule:: dtscalibration - -.. autofunction:: plot_accuracy diff --git a/docs/api/dtscalibration.plot_location_residuals_double_ended.rst b/docs/api/dtscalibration.plot_location_residuals_double_ended.rst deleted file mode 100644 index ad0d16db..00000000 --- a/docs/api/dtscalibration.plot_location_residuals_double_ended.rst +++ /dev/null @@ -1,6 +0,0 @@ -plot_location_residuals_double_ended -==================================== - -.. currentmodule:: dtscalibration - -.. autofunction:: plot_location_residuals_double_ended diff --git a/docs/api/dtscalibration.plot_residuals_reference_sections.rst b/docs/api/dtscalibration.plot_residuals_reference_sections.rst deleted file mode 100644 index 45f2529b..00000000 --- a/docs/api/dtscalibration.plot_residuals_reference_sections.rst +++ /dev/null @@ -1,6 +0,0 @@ -plot_residuals_reference_sections -================================= - -.. currentmodule:: dtscalibration - -.. autofunction:: plot_residuals_reference_sections diff --git a/docs/api/dtscalibration.plot_residuals_reference_sections_single.rst b/docs/api/dtscalibration.plot_residuals_reference_sections_single.rst deleted file mode 100644 index 725d77a0..00000000 --- a/docs/api/dtscalibration.plot_residuals_reference_sections_single.rst +++ /dev/null @@ -1,6 +0,0 @@ -plot_residuals_reference_sections_single -======================================== - -.. currentmodule:: dtscalibration - -.. autofunction:: plot_residuals_reference_sections_single diff --git a/docs/api/dtscalibration.plot_sigma_report.rst b/docs/api/dtscalibration.plot_sigma_report.rst deleted file mode 100644 index b047cdeb..00000000 --- a/docs/api/dtscalibration.plot_sigma_report.rst +++ /dev/null @@ -1,6 +0,0 @@ -plot_sigma_report -================= - -.. currentmodule:: dtscalibration - -.. autofunction:: plot_sigma_report diff --git a/docs/api/dtscalibration.read_apsensing_files.rst b/docs/api/dtscalibration.read_apsensing_files.rst deleted file mode 100644 index 04bd67d6..00000000 --- a/docs/api/dtscalibration.read_apsensing_files.rst +++ /dev/null @@ -1,6 +0,0 @@ -read_apsensing_files -==================== - -.. currentmodule:: dtscalibration - -.. autofunction:: read_apsensing_files diff --git a/docs/api/dtscalibration.read_sensornet_files.rst b/docs/api/dtscalibration.read_sensornet_files.rst deleted file mode 100644 index 541f00cb..00000000 --- a/docs/api/dtscalibration.read_sensornet_files.rst +++ /dev/null @@ -1,6 +0,0 @@ -read_sensornet_files -==================== - -.. currentmodule:: dtscalibration - -.. autofunction:: read_sensornet_files diff --git a/docs/api/dtscalibration.read_sensortran_files.rst b/docs/api/dtscalibration.read_sensortran_files.rst deleted file mode 100644 index 35d92c94..00000000 --- a/docs/api/dtscalibration.read_sensortran_files.rst +++ /dev/null @@ -1,6 +0,0 @@ -read_sensortran_files -===================== - -.. currentmodule:: dtscalibration - -.. autofunction:: read_sensortran_files diff --git a/docs/api/dtscalibration.read_silixa_files.rst b/docs/api/dtscalibration.read_silixa_files.rst deleted file mode 100644 index d5adee72..00000000 --- a/docs/api/dtscalibration.read_silixa_files.rst +++ /dev/null @@ -1,6 +0,0 @@ -read_silixa_files -================= - -.. currentmodule:: dtscalibration - -.. autofunction:: read_silixa_files diff --git a/docs/api/dtscalibration.shift_double_ended.rst b/docs/api/dtscalibration.shift_double_ended.rst deleted file mode 100644 index 0a879ea1..00000000 --- a/docs/api/dtscalibration.shift_double_ended.rst +++ /dev/null @@ -1,6 +0,0 @@ -shift_double_ended -================== - -.. currentmodule:: dtscalibration - -.. autofunction:: shift_double_ended diff --git a/docs/api/dtscalibration.suggest_cable_shift_double_ended.rst b/docs/api/dtscalibration.suggest_cable_shift_double_ended.rst deleted file mode 100644 index 14a135c0..00000000 --- a/docs/api/dtscalibration.suggest_cable_shift_double_ended.rst +++ /dev/null @@ -1,6 +0,0 @@ -suggest_cable_shift_double_ended -================================ - -.. currentmodule:: dtscalibration - -.. autofunction:: suggest_cable_shift_double_ended diff --git a/docs/conf.py b/docs/conf.py index b58fdee4..f1073bf8 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -4,6 +4,7 @@ extensions = [ + 'sphinx_rtd_theme', "sphinx.ext.autodoc", "sphinx.ext.autosummary", "sphinx.ext.coverage", diff --git a/docs/index.rst b/docs/index.rst index 1cbfeed1..40616de8 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -3,7 +3,7 @@ Contents ======== .. toctree:: - :maxdepth: 2 + :maxdepth: 4 readme installation diff --git a/docs/requirements.txt b/docs/requirements.txt index 8a51afe0..142b6ca3 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,7 +1 @@ -IPython -matplotlib>=3.0.0 -nbsphinx -recommonmark -sphinx<6 -sphinx-automodapi -e .[docs] diff --git a/pyproject.toml b/pyproject.toml index 1fdadfda..7b05a985 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -83,10 +83,13 @@ dev = [ "nbformat", # Needed to run the tests ] docs = [ # Required for ReadTheDocs + "IPython", "myst_parser", + "matplotlib>=3.0.0", "sphinx", "sphinx_rtd_theme", "sphinx-autoapi", + "sphinx-automodapi", "coverage[toml]", "nbsphinx", "ipykernel", From 8c295a793049f18243de41e70dbf3841a0306b55 Mon Sep 17 00:00:00 2001 From: Bas des Tombe Date: Thu, 3 Aug 2023 20:22:39 +0200 Subject: [PATCH 06/26] Clean up of documentation --- docs/index.rst | 1 - docs/notebooks/07Calibrate_single_ended.ipynb | 10 ++++---- docs/notebooks/08Calibrate_double_ended.ipynb | 12 +++++----- ...Temperature_uncertainty_single_ended.ipynb | 2 +- docs/reference/index.rst | 2 +- docs/usage.rst | 7 ------ src/dtscalibration/datastore.py | 23 ++++++++----------- 7 files changed, 23 insertions(+), 34 deletions(-) delete mode 100644 docs/usage.rst diff --git a/docs/index.rst b/docs/index.rst index 40616de8..a26299aa 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -7,7 +7,6 @@ Contents readme installation - usage learn_by_examples reference/index contributing diff --git a/docs/notebooks/07Calibrate_single_ended.ipynb b/docs/notebooks/07Calibrate_single_ended.ipynb index a130ac63..28616d9e 100644 --- a/docs/notebooks/07Calibrate_single_ended.ipynb +++ b/docs/notebooks/07Calibrate_single_ended.ipynb @@ -52,7 +52,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Read the raw data files loaded from your DTS machine\n", + "## Read the raw data files loaded from your DTS machine\n", "Use `read_silixa_files` for reading files from a Silixa device. The following functions are available for reading files from other devices: `read_sensortran_files`, `read_apsensing_files`, and `read_sensornet_files`. See Notebook 1.\n", "\n", "Calibration is performed on sections that have a known" @@ -79,7 +79,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Define the reference fiber sections that have a known temperature\n", + "## Define the reference fiber sections that have a known temperature\n", "As explained in Notebook 3. DTS devices come with temperature probes to measure the temperature of the water baths. These measurements are stored in the data that was loaded in the previous step and are loaded automatically. In the case you would like to use an external temperature sensor, have a look at notebook `09Import_timeseries` to append those measurements to the `ds` before continuing with the calibration. " ] }, @@ -100,7 +100,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Estimate the variance of the noise in the Stokes and anti-Stokes measurements\n", + "## Estimate the variance of the noise in the Stokes and anti-Stokes measurements\n", "First calculate the variance of the noise in the measured Stokes and anti-Stokes signals. See Notebook 4 for more information.\n", "\n", "The Stokes and anti-Stokes signals should follow a smooth decaying exponential. This function fits a decaying exponential to each reference section for each time step. The variance of the residuals between the measured Stokes and anti-Stokes signals and the fitted signals is used as an estimate of the variance in measured signals. This algorithm assumes that the temperature is the same for the entire section but may vary over time and differ per section." @@ -143,7 +143,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Perform calibration and compute the temperature\n", + "## Perform calibration and compute the temperature\n", "We calibrate the measurements and their uncertainty with a single method call. The temperature is stored by default as the `ds.tmpf` dataarray and the variance of its approximation as `ds.tmpf_var`." ] }, @@ -167,7 +167,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Plot the temperature and uncertainty of the estimated temperature\n", + "## Plot the temperature and uncertainty of the estimated temperature\n", "First, the temperature for the entire fiber is plotted. Second, the temperature and its standard error are plotted for the first timestep." ] }, diff --git a/docs/notebooks/08Calibrate_double_ended.ipynb b/docs/notebooks/08Calibrate_double_ended.ipynb index c76fcdf4..2ed0f35b 100644 --- a/docs/notebooks/08Calibrate_double_ended.ipynb +++ b/docs/notebooks/08Calibrate_double_ended.ipynb @@ -56,7 +56,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Read the raw data files loaded from your DTS machine\n", + "## Read the raw data files loaded from your DTS machine\n", "Use `read_silixa_files` for reading files from a Silixa device. The following functions are available for reading files from other devices: `read_sensortran_files`, `read_apsensing_files`, and `read_sensornet_files`. See Notebook 1. If your DTS device was configured such that your forward- and backward measurements are stored in seperate folders have a look at `11Merge_single_measurements_into_double` example notebook. " ] }, @@ -77,7 +77,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Align measurements of the forward and backward channels" + "## Align measurements of the forward and backward channels" ] }, { @@ -147,7 +147,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Define the reference fiber sections that have a known temperature\n", + "## Define the reference fiber sections that have a known temperature\n", "As explained in Notebook 3. As explained in Notebook 3. DTS devices come with temperature probes to measure the temperature of the water baths. These measurements are stored in the data that was loaded in the previous step and are loaded automatically. In the case you would like to use an external temperature sensor, have a look at notebook `09Import_timeseries` to append those measurements to the `ds` before continuing with the calibration." ] }, @@ -167,7 +167,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Estimate the variance of the noise in the Stokes and anti-Stokes measurements\n", + "## Estimate the variance of the noise in the Stokes and anti-Stokes measurements\n", "First calculate the variance in the measured Stokes and anti-Stokes signals, in the forward and backward direction. See Notebook 4 for more information.\n", "\n", "The Stokes and anti-Stokes signals should follow a smooth decaying exponential. This function fits a decaying exponential to each reference section for each time step. The variance of the residuals between the measured Stokes and anti-Stokes signals and the fitted signals is used as an estimate of the variance in measured signals. This algorithm assumes that the temperature is the same for the entire section but may vary over time and differ per section." @@ -219,7 +219,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Perform calibration and compute the temperature\n", + "## Perform calibration and compute the temperature\n", "We calibrate the measurements with a single method call. Three temperatures are estimated for double-ended setups. The temperature using the Stokes and anti-Stokes meassurements of the forward channel, `tmpf`. The temperature of the backward channel, `tmpb`. And the weigthed average of the two, `tmpw`. The latter is the best estimate of the temperature along the fiber with the smallest uncertainty." ] }, @@ -264,7 +264,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Uncertainty of the calibrated temperature\n", + "## Uncertainty of the calibrated temperature\n", "The uncertainty of the calibrated temperature can be computed in two manners:\n", "1. The variance of the calibrated temperature can be approximated using linear error propagation\n", " - Very fast computation\n", diff --git a/docs/notebooks/17Temperature_uncertainty_single_ended.ipynb b/docs/notebooks/17Temperature_uncertainty_single_ended.ipynb index 7fd9000d..f270404f 100644 --- a/docs/notebooks/17Temperature_uncertainty_single_ended.ipynb +++ b/docs/notebooks/17Temperature_uncertainty_single_ended.ipynb @@ -5,7 +5,7 @@ "id": "bcd29771", "metadata": {}, "source": [ - "# Uncertainty of the temperature estimated using single-ended calibration\n", + "# 17. Uncertainty of the temperature estimated using single-ended calibration\n", "After comleting single-ended calibration, you might be interested in inspecting the uncertainty of the estimated temperature.\n", "- Decomposing the uncertainty\n", "- Monte Carlo estimate of the standard error\n", diff --git a/docs/reference/index.rst b/docs/reference/index.rst index b73574c7..74c3bbc1 100644 --- a/docs/reference/index.rst +++ b/docs/reference/index.rst @@ -5,5 +5,5 @@ Reference :glob: .. automodapi:: dtscalibration - :no-inheritance-diagram: :skip: plot_dask + :members: diff --git a/docs/usage.rst b/docs/usage.rst deleted file mode 100644 index 7b73d5bf..00000000 --- a/docs/usage.rst +++ /dev/null @@ -1,7 +0,0 @@ -===== -Usage -===== - -To use dtscalibration in a project:: - - import dtscalibration diff --git a/src/dtscalibration/datastore.py b/src/dtscalibration/datastore.py index ba46989c..eb895c10 100644 --- a/src/dtscalibration/datastore.py +++ b/src/dtscalibration/datastore.py @@ -5753,7 +5753,9 @@ def open_datastore( load_in_memory=False, **kwargs, ): - """Load and decode a datastore from a file or file-like object. + """Load and decode a datastore from a file or file-like object. Most + arguments are passed to xarray.open_dataset(). + Parameters ---------- filename_or_obj : str, Path, file or xarray.backends.*DataStore @@ -5926,7 +5928,7 @@ def read_silixa_files( load_in_memory="auto", **kwargs, ): - """Read a folder with measurement files. Each measurement file contains + """Read a folder with measurement files from a device of the Silixa brand. Each measurement file contains values for a single timestep. Remember to check which timezone you are working in. @@ -6004,9 +6006,9 @@ def read_silixa_files( def read_sensortran_files(directory, timezone_netcdf="UTC", silent=False, **kwargs): - """Read a folder with measurement files. Each measurement file contains - values for a - single timestep. Remember to check which timezone you are working in. + """Read a folder with measurement files from a device of the Sensortran + brand. Each measurement file contains values for a single timestep. Remember + to check which timezone you are working in. The sensortran files are already timezone aware @@ -6066,14 +6068,13 @@ def read_apsensing_files( directory=None, file_ext="*.xml", timezone_netcdf="UTC", - timezone_input_files="UTC", silent=False, load_in_memory="auto", **kwargs, ): - """Read a folder with measurement files. Each measurement file contains - values for a single timestep. Remember to check which timezone - you are working in. + """Read a folder with measurement files from a device of the Sensortran + brand. Each measurement file contains values for a single timestep. + Remember to check which timezone you are working in. Parameters ---------- @@ -6083,10 +6084,6 @@ def read_apsensing_files( Path to folder timezone_netcdf : str, optional Timezone string of the netcdf file. UTC follows CF-conventions. - timezone_input_files : str, optional - Timezone string of the measurement files. - Remember to check when measurements are taken. - Also if summertime is used. file_ext : str, optional file extension of the measurement files silent : bool From fd57fd3a166c06c5e8ed916cb1f745fa92d7caad Mon Sep 17 00:00:00 2001 From: Bas des Tombe Date: Thu, 3 Aug 2023 20:33:43 +0200 Subject: [PATCH 07/26] Another cleanup of docs --- docs/index.rst | 2 +- docs/reference/index.rst | 1 + src/dtscalibration/datastore.py | 3 +-- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/index.rst b/docs/index.rst index a26299aa..f184b6f8 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -3,7 +3,7 @@ Contents ======== .. toctree:: - :maxdepth: 4 + :maxdepth: 3 readme installation diff --git a/docs/reference/index.rst b/docs/reference/index.rst index 74c3bbc1..9d9e4821 100644 --- a/docs/reference/index.rst +++ b/docs/reference/index.rst @@ -7,3 +7,4 @@ Reference .. automodapi:: dtscalibration :skip: plot_dask :members: + :no-inheritance-diagram: diff --git a/src/dtscalibration/datastore.py b/src/dtscalibration/datastore.py index eb895c10..97eca919 100644 --- a/src/dtscalibration/datastore.py +++ b/src/dtscalibration/datastore.py @@ -46,7 +46,7 @@ class DataStore(xr.Dataset): """The data class that stores the measurements, contains calibration methods to relate Stokes and anti-Stokes to temperature. The user should - never initiate this class directly, but use read_xml_dir or open_datastore + never initiate this class directly, but use open_datastore functions instead. Parameters @@ -85,7 +85,6 @@ class DataStore(xr.Dataset): See Also -------- - dtscalibration.read_xml_dir : Load measurements stored in XML-files dtscalibration.open_datastore : Load (calibrated) measurements from netCDF-like file """ From e28bb88558db81edacd5bdb5e0dd7762ad863d6a Mon Sep 17 00:00:00 2001 From: Bas des Tombe Date: Thu, 3 Aug 2023 20:48:15 +0200 Subject: [PATCH 08/26] Cleanup docs --- src/dtscalibration/plot.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/dtscalibration/plot.py b/src/dtscalibration/plot.py index cab55180..27a747e4 100755 --- a/src/dtscalibration/plot.py +++ b/src/dtscalibration/plot.py @@ -511,8 +511,8 @@ def plot_accuracy( def plot_sigma_report( ds, temp_label, temp_var_acc_label, temp_var_prec_label=None, itimes=None ): - """ - Returns two sub-plots. first a temperature with confidence boundaries. + """Returns two sub-plots. first a temperature with confidence boundaries. + Parameters ---------- ds From fcbf028ea3da85bc3d71f75254b206d4cfaf7278 Mon Sep 17 00:00:00 2001 From: Bas des Tombe Date: Fri, 4 Aug 2023 12:03:52 +0200 Subject: [PATCH 09/26] Update headings example nb 14 --- docs/notebooks/14Lossy_splices.ipynb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/notebooks/14Lossy_splices.ipynb b/docs/notebooks/14Lossy_splices.ipynb index b7959031..c2c98e6e 100644 --- a/docs/notebooks/14Lossy_splices.ipynb +++ b/docs/notebooks/14Lossy_splices.ipynb @@ -24,7 +24,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Demonstration\n", + "## Demonstration\n", "To demonstrate the effect of a lossy splice, we'll load the same dataset that was used in previous notebooks, and modify the data to simulate a lossy splice." ] }, @@ -230,5 +230,5 @@ } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 } From 3b5afa78094d5c1a5e7e33337e30b49653a8f00e Mon Sep 17 00:00:00 2001 From: Bas des Tombe Date: Fri, 4 Aug 2023 12:10:54 +0200 Subject: [PATCH 10/26] Updated changelog to include the fixing the headers of the example notebooks --- CHANGELOG.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 6dcffe82..5ad48eb7 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -12,6 +12,7 @@ Added Fixed * Single-ended measurements with `fix_alpha` failed due to a bug introduced in v2.0.0 ([#173](https://github.com/dtscalibration/python-dts-calibration/pull/173)). +* Fixes headers in example notebooks and their appearance in the docs Changed From 26d795f54eef9d4c77f2fe314768b3ecd2a2d50d Mon Sep 17 00:00:00 2001 From: Bas des Tombe Date: Fri, 4 Aug 2023 15:53:58 +0200 Subject: [PATCH 11/26] Black formatted docs/conf.py --- docs/conf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/conf.py b/docs/conf.py index f1073bf8..5e49338e 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -4,7 +4,7 @@ extensions = [ - 'sphinx_rtd_theme', + "sphinx_rtd_theme", "sphinx.ext.autodoc", "sphinx.ext.autosummary", "sphinx.ext.coverage", From dd6d21c0683e2fd24b96c1626fdd6d253edbe05d Mon Sep 17 00:00:00 2001 From: Bas des Tombe Date: Fri, 4 Aug 2023 17:43:00 +0200 Subject: [PATCH 12/26] Improves consistency of ds.sections behavior Closes #138 --- src/dtscalibration/datastore.py | 196 +++++++++++++++++++------------- src/dtscalibration/io.py | 8 +- tests/test_dtscalibration.py | 4 +- 3 files changed, 124 insertions(+), 84 deletions(-) diff --git a/src/dtscalibration/datastore.py b/src/dtscalibration/datastore.py index 97eca919..f156461c 100644 --- a/src/dtscalibration/datastore.py +++ b/src/dtscalibration/datastore.py @@ -218,71 +218,75 @@ def sections(self): """ if "_sections" not in self.attrs: self.attrs["_sections"] = yaml.dump(None) + return yaml.load(self.attrs["_sections"], Loader=yaml.UnsafeLoader) - @sections.setter - def sections(self, sections: Dict[str, List[slice]]): - sections_fix_slice_fixed = None + def validate_sections(self, sections: Dict[str, List[slice]]): + assert isinstance(sections, dict) + + # be less restrictive for capitalized labels + # find lower cases label + labels = np.reshape( + [[s.lower(), s] for s in self.data_vars.keys()], (-1,) + ).tolist() + + sections_fix = dict() + for k, v in sections.items(): + if k.lower() in labels: + i_lower_case = labels.index(k.lower()) + i_normal_case = i_lower_case + 1 + k_normal_case = labels[i_normal_case] + sections_fix[k_normal_case] = v + else: + assert k in self.data_vars, ( + "The keys of the " + "sections-dictionary should " + "refer to a valid timeserie " + "already stored in " + "ds.data_vars " + ) - if sections: - assert isinstance(sections, dict) - - # be less restrictive for capitalized labels - # find lower cases label - labels = np.reshape( - [[s.lower(), s] for s in self.data_vars.keys()], (-1,) - ).tolist() - - sections_fix = dict() - for k, v in sections.items(): - if k.lower() in labels: - i_lower_case = labels.index(k.lower()) - i_normal_case = i_lower_case + 1 - k_normal_case = labels[i_normal_case] - sections_fix[k_normal_case] = v - else: - assert k in self.data_vars, ( - "The keys of the " - "sections-dictionary should " - "refer to a valid timeserie " - "already stored in " - "ds.data_vars " - ) + sections_fix_slice_fixed = dict() - sections_fix_slice_fixed = dict() + for k, v in sections_fix.items(): + assert isinstance(v, (list, tuple)), ( + "The values of the sections-dictionary " + "should be lists of slice objects." + ) - for k, v in sections_fix.items(): - assert isinstance(v, (list, tuple)), ( - "The values of the sections-dictionary " - "should be lists of slice objects." + for vi in v: + assert isinstance(vi, slice), ( + "The values of the sections-dictionary should " + "be lists of slice objects." ) - for vi in v: - assert isinstance(vi, slice), ( - "The values of the sections-dictionary should " - "be lists of slice objects." - ) + assert self.x.sel(x=vi).size > 0, ( + f"Better define the {k} section. You tried {vi}, " + "which is not within the x-dimension" + ) - assert self.x.sel(x=vi).size > 0, ( - f"Better define the {k} section. You tried {vi}, " - "which is out of reach" - ) + # sorted stretches + stretch_unsort = [slice(float(vi.start), float(vi.stop)) for vi in v] + stretch_start = [i.start for i in stretch_unsort] + stretch_i_sorted = np.argsort(stretch_start) + sections_fix_slice_fixed[k] = [stretch_unsort[i] for i in stretch_i_sorted] - # sorted stretches - stretch_unsort = [slice(float(vi.start), float(vi.stop)) for vi in v] - stretch_start = [i.start for i in stretch_unsort] - stretch_i_sorted = np.argsort(stretch_start) - sections_fix_slice_fixed[k] = [ - stretch_unsort[i] for i in stretch_i_sorted - ] + # Prevent overlapping slices + ix_sec = self.ufunc_per_section( + sections=sections_fix_slice_fixed, x_indices=True, calc_per="all" + ) + assert np.unique(ix_sec).size == ix_sec.size, "The sections are overlapping" - # Prevent overlapping slices - ix_sec = self.ufunc_per_section( - sections=sections_fix_slice_fixed, x_indices=True, calc_per="all" - ) - assert np.unique(ix_sec).size == ix_sec.size, "The sections are overlapping" + return sections_fix_slice_fixed - self.attrs["_sections"] = yaml.dump(sections_fix_slice_fixed) + @sections.setter + def sections(self, sections: Dict[str, List[slice]]): + sections_validated = None + + if sections is not None: + sections_validated = self.validate_sections(sections=sections) + + self.attrs["_sections"] = yaml.dump(sections_validated) pass @sections.deleter @@ -480,7 +484,7 @@ def to_mf_netcdf( compute=True, time_chunks_from_key="st", ): - """Write DataStore to multiple to multiple netCDF files. + """Write DataStore to multiple netCDF files. Splits the DataStore along the time dimension using the chunks. It first checks if all chunks in `ds` are time aligned. If this is not @@ -980,17 +984,21 @@ def variance_stokes_constant(self, st_label, sections=None, reshape_residuals=Tr """ if sections is None: sections = self.sections + else: + sections = self.validate_sections(sections) assert self[st_label].dims[0] == "x", "Stokes are transposed" check_timestep_allclose(self, eps=0.01) - data_dict = da.compute( - self.ufunc_per_section(label=st_label, calc_per="stretch") - )[ - 0 - ] # should maybe be per section. But then residuals + # should maybe be per section. But then residuals # seem to be correlated between stretches. I don't know why.. BdT. + data_dict = da.compute( + self.ufunc_per_section( + sections=sections, label=st_label, calc_per="stretch" + ) + )[0] + resid_list = [] for k, v in data_dict.items(): @@ -1015,7 +1023,9 @@ def variance_stokes_constant(self, st_label, sections=None, reshape_residuals=Tr return var_I, resid else: - ix_resid = self.ufunc_per_section(x_indices=True, calc_per="all") + ix_resid = self.ufunc_per_section( + sections=sections, x_indices=True, calc_per="all" + ) resid_sorted = np.full(shape=self[st_label].shape, fill_value=np.nan) resid_sorted[ix_resid, :] = resid @@ -1154,6 +1164,8 @@ def variance_stokes_exponential( """ if sections is None: sections = self.sections + else: + sections = self.validate_sections(sections) assert self[st_label].dims[0] == "x", "Stokes are transposed" @@ -1166,7 +1178,7 @@ def variance_stokes_exponential( y_list = [] # intensities of stokes x_list = [] # length rel to start of section. for alpha - for k, stretches in self.sections.items(): + for k, stretches in sections.items(): for stretch in stretches: y_list.append(self[st_label].sel(x=stretch).data.T.reshape(-1)) _x = self.x.sel(x=stretch).data.copy() @@ -1260,6 +1272,7 @@ def variance_stokes_exponential( if not reshape_residuals: return var_I, resid + else: # restructure the residuals, such that they can be plotted and # added to ds @@ -1276,7 +1289,9 @@ def variance_stokes_exponential( resid_res.append(resid[lenis:lenie].T.reshape((nt, leni)).T) _resid = np.concatenate(resid_res) - _resid_x = self.ufunc_per_section(label="x", calc_per="all") + _resid_x = self.ufunc_per_section( + sections=sections, label="x", calc_per="all" + ) isort = np.argsort(_resid_x) resid_x = _resid_x[isort] # get indices from ufunc directly resid = _resid[isort, :] @@ -1407,11 +1422,15 @@ def variance_stokes_linear( if sections is None: sections = self.sections + else: + sections = self.validate_sections(sections) assert self[st_label].dims[0] == "x", "Stokes are transposed" - _, resid = self.variance_stokes(st_label=st_label) + _, resid = self.variance_stokes(sections=sections, st_label=st_label) - ix_sec = self.ufunc_per_section(x_indices=True, calc_per="all") + ix_sec = self.ufunc_per_section( + sections=sections, x_indices=True, calc_per="all" + ) st = self.isel(x=ix_sec)[st_label].values.ravel() diff_st = resid.isel(x=ix_sec).values.ravel() @@ -1480,7 +1499,7 @@ def var_fun(stokes): return slope, offset, st_sort_mean, st_sort_var, resid, var_fun def i_var(self, st_var, ast_var, st_label="st", ast_label="ast"): - """ + r""" Compute the variance of an observation given the stokes and anti-Stokes intensities and their variance. The variance, :math:`\sigma^2_{I_{m,n}}`, of the distribution of the @@ -1632,6 +1651,8 @@ def in_confidence_interval(self, ci_label, conf_ints=None, sections=None): """ if sections is None: sections = self.sections + else: + sections = self.validate_sections(sections) if conf_ints is None: conf_ints = self[ci_label].values @@ -1656,7 +1677,7 @@ def in_confidence_interval(self, ci_label, conf_ints=None, sections=None): return np.logical_and(mask_dn, mask_up) - def set_trans_att(self, trans_att=None, **kwargs): + def set_trans_att(self, trans_att=None): """Gracefully set the locations that introduce directional differential attenuation @@ -1749,7 +1770,7 @@ def calibration_single_ended( fix_alpha=None, **kwargs, ): - """ + r""" Calibrate the Stokes (`ds.st`) and anti-Stokes (`ds.ast`) data to temperature using fiber sections with a known temperature (`ds.sections`) for single-ended setups. The calibrated temperature is @@ -1883,8 +1904,6 @@ def calibration_single_ended( if sections: self.sections = sections - else: - assert self.sections, "sections are not defined" if method == "wls": assert st_var is not None and ast_var is not None, "Set `st_var`" @@ -2232,7 +2251,7 @@ def calibration_double_ended( verbose=False, **kwargs, ): - """ + r""" See example notebook 8 for an explanation on how to use this function. Calibrate the Stokes (`ds.st`) and anti-Stokes (`ds.ast`) of the forward channel and from the backward channel (`ds.rst`, `ds.rast`) data to @@ -2439,8 +2458,6 @@ def calibration_double_ended( if sections: self.sections = sections - else: - assert self.sections, "sections are not defined" self.check_reference_section_values() @@ -3454,7 +3471,7 @@ def conf_int_single_ended( reduce_memory_usage=False, **kwargs, ): - """ + r""" Estimation of the confidence intervals for the temperatures measured with a single-ended setup. It consists of five steps. First, the variances of the Stokes and anti-Stokes intensity measurements are estimated @@ -4076,7 +4093,7 @@ def conf_int_double_ended( reduce_memory_usage=False, **kwargs, ): - """ + r""" Estimation of the confidence intervals for the temperatures measured with a double-ended setup. Double-ended setups require four additional steps to estimate the @@ -4138,6 +4155,8 @@ def conf_int_double_ended( confidence interval for :math:`T_{m,n}` are estimated with the 2.5\% and 97.5\% percentiles of :math:`T_{m,n,i}`. + Assumes sections are set. + Parameters ---------- p_val : array-like, optional @@ -5124,6 +5143,11 @@ def temperature_residuals(self, label=None, sections=None): resid_da : xarray.DataArray The residuals as DataArray """ + if sections is None: + sections = self.sections + else: + sections = self.validate_sections(sections) + resid_temp = self.ufunc_per_section( sections=sections, label=label, temp_err=True, calc_per="all" ) @@ -5390,7 +5414,8 @@ def func(a): return out def resample_datastore(*args, **kwargs): - raise "ds.resample_datastore() is deprecated. Use " "from dtscalibration import DataStore; DataStore(ds.resample()) " "instead. See example notebook 2." + raise "ds.resample_datastore() is deprecated. Use from dtscalibration import DataStore; " \ + "DataStore(ds.resample()) instead. See example notebook 2." class ParameterIndexDoubleEnded: @@ -6004,7 +6029,7 @@ def read_silixa_files( return ds -def read_sensortran_files(directory, timezone_netcdf="UTC", silent=False, **kwargs): +def read_sensortran_files(directory, timezone_input_files="UTC", timezone_netcdf="UTC", silent=False, **kwargs): """Read a folder with measurement files from a device of the Sensortran brand. Each measurement file contains values for a single timestep. Remember to check which timezone you are working in. @@ -6015,6 +6040,10 @@ def read_sensortran_files(directory, timezone_netcdf="UTC", silent=False, **kwar ---------- directory : str, Path Path to folder containing BinaryRawDTS and BinaryTemp files + timezone_input_files : str, optional + Timezone string of the measurement files. + Remember to check when measurements are taken. + Also if summertime is used. timezone_netcdf : str, optional Timezone string of the netcdf file. UTC follows CF-conventions. silent : bool @@ -6050,6 +6079,7 @@ def read_sensortran_files(directory, timezone_netcdf="UTC", silent=False, **kwar data_vars, coords, attrs = read_sensortran_files_routine( filepathlist_dts, filepathlist_temp, + timezone_input_files=timezone_input_files, timezone_netcdf=timezone_netcdf, silent=silent, ) @@ -6066,6 +6096,7 @@ def read_apsensing_files( filepathlist=None, directory=None, file_ext="*.xml", + timezone_input_files="UTC", timezone_netcdf="UTC", silent=False, load_in_memory="auto", @@ -6081,6 +6112,10 @@ def read_apsensing_files( List of paths that point the the silixa files directory : str, Path, optional Path to folder + timezone_input_files : str, optional + Timezone string of the measurement files. + Remember to check when measurements are taken. + Also if summertime is used. timezone_netcdf : str, optional Timezone string of the netcdf file. UTC follows CF-conventions. file_ext : str, optional @@ -6135,6 +6170,7 @@ def read_apsensing_files( data_vars, coords, attrs = read_apsensing_files_routine( filepathlist, timezone_netcdf=timezone_netcdf, + timezone_input_files=timezone_input_files, silent=silent, load_in_memory=load_in_memory, ) @@ -6147,8 +6183,8 @@ def read_sensornet_files( filepathlist=None, directory=None, file_ext="*.ddf", - timezone_netcdf="UTC", timezone_input_files="UTC", + timezone_netcdf="UTC", silent=False, add_internal_fiber_length=50.0, fiber_length=None, @@ -6164,12 +6200,12 @@ def read_sensornet_files( List of paths that point the the silixa files directory : str, Path, optional Path to folder - timezone_netcdf : str, optional - Timezone string of the netcdf file. UTC follows CF-conventions. timezone_input_files : str, optional Timezone string of the measurement files. Remember to check when measurements are taken. Also if summertime is used. + timezone_netcdf : str, optional + Timezone string of the netcdf file. UTC follows CF-conventions. file_ext : str, optional file extension of the measurement files silent : bool diff --git a/src/dtscalibration/io.py b/src/dtscalibration/io.py index db10c5bd..8f9b4738 100644 --- a/src/dtscalibration/io.py +++ b/src/dtscalibration/io.py @@ -1201,7 +1201,7 @@ def metakey(meta, dict_to_parse, prefix): def read_sensortran_files_routine( - filepathlist_dts, filepathlist_temp, timezone_netcdf="UTC", silent=False + filepathlist_dts, filepathlist_temp, timezone_input_files="UTC", timezone_netcdf="UTC", silent=False ): """ Internal routine that reads sensortran files. @@ -1220,6 +1220,7 @@ def read_sensortran_files_routine( ------- """ + assert timezone_input_files == "UTC", "The sensortran files are always in UTC time." # Obtain metadata from the first file data_dts, meta_dts = read_sensortran_single(filepathlist_dts[0]) @@ -1418,7 +1419,7 @@ def read_sensortran_single(fname): def read_apsensing_files_routine( - filepathlist, timezone_netcdf="UTC", silent=False, load_in_memory="auto" + filepathlist, timezone_input_files="UTC", timezone_netcdf="UTC", silent=False, load_in_memory="auto" ): """ Internal routine that reads AP Sensing files. @@ -1429,6 +1430,7 @@ def read_apsensing_files_routine( Parameters ---------- filepathlist + timezone_input_files timezone_netcdf silent load_in_memory @@ -1441,6 +1443,8 @@ def read_apsensing_files_routine( import dask + assert timezone_input_files == "UTC" and timezone_netcdf == "UTC", "Only UTC timezones supported" + # translate names tld = {"ST": "st", "AST": "ast", "REV-ST": "rst", "REV-AST": "rast", "TEMP": "tmp"} diff --git a/tests/test_dtscalibration.py b/tests/test_dtscalibration.py index 6235b4e8..11830b77 100644 --- a/tests/test_dtscalibration.py +++ b/tests/test_dtscalibration.py @@ -825,7 +825,7 @@ def test_variance_of_stokes_linear_synthetic(): resid, var_fun, ) = ds.variance_stokes_linear( - "c_lin_var_through_zero", nbin=10, through_zero=True, plot_fit=False + "c_lin_var_through_zero", sections=sections, nbin=10, through_zero=True, plot_fit=False ) assert_almost_equal_verbose(slope, var_slope, decimal=3) @@ -838,7 +838,7 @@ def test_variance_of_stokes_linear_synthetic(): resid, var_fun, ) = ds.variance_stokes_linear( - "c_lin_var_through_zero", nbin=100, through_zero=False + "c_lin_var_through_zero", sections=sections, nbin=100, through_zero=False ) assert_almost_equal_verbose(slope, var_slope, decimal=3) assert_almost_equal_verbose(offset, 0.0, decimal=0) From ab12189247111370f785d6c838e3d76355b3bd91 Mon Sep 17 00:00:00 2001 From: Bas des Tombe Date: Fri, 4 Aug 2023 17:47:31 +0200 Subject: [PATCH 13/26] variance_stokes_linear() does not default through zero anymore. --- CHANGELOG.rst | 1 + src/dtscalibration/datastore.py | 7 +++++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 5ad48eb7..569022b7 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -19,6 +19,7 @@ Changed * Standardized parameter names. Reduced the freedom in choosing parameter names and dimension names in favor of simplifying the code. * Requiring netcdf4 >= 1.6.4 * Optional dependencies of xarray that improve performance are now required by default. +* variance_stokes_linear() does not default through zero anymore. * Flox included in requirements to speed up resampling via xarray ([Xarray #5734](https://github.com/pydata/xarray/pull/5734)). Removed diff --git a/src/dtscalibration/datastore.py b/src/dtscalibration/datastore.py index f156461c..bad8c89b 100644 --- a/src/dtscalibration/datastore.py +++ b/src/dtscalibration/datastore.py @@ -1305,7 +1305,7 @@ def variance_stokes_exponential( return var_I, resid_da def variance_stokes_linear( - self, st_label, sections=None, nbin=50, through_zero=True, plot_fit=False + self, st_label, sections=None, nbin=50, through_zero=False, plot_fit=False ): """ Approximate the variance of the noise in Stokes intensity measurements @@ -1457,6 +1457,7 @@ def variance_stokes_linear( # VAR(Stokes) = slope * Stokes offset = 0.0 slope = np.linalg.lstsq(st_sort_mean[:, None], st_sort_var, rcond=None)[0] + else: # VAR(Stokes) = slope * Stokes + offset slope, offset = np.linalg.lstsq( @@ -1472,7 +1473,9 @@ def variance_stokes_linear( f"not possible. Most likely, your {st_label} do " f"not vary enough to fit a linear curve. Either " f"use `through_zero` option or use " - f"`ds.variance_stokes_constant()`" + f"`ds.variance_stokes_constant()`. Another reason " + f"could be that your sections are defined to be " + f"wider than they actually are." ) def var_fun(stokes): From b58afef3386b34994317db47707630152ab72d41 Mon Sep 17 00:00:00 2001 From: Bas des Tombe Date: Fri, 4 Aug 2023 17:50:28 +0200 Subject: [PATCH 14/26] black formatting --- src/dtscalibration/datastore.py | 7 ++++--- src/dtscalibration/io.py | 16 +++++++++++++--- tests/test_dtscalibration.py | 6 +++++- 3 files changed, 22 insertions(+), 7 deletions(-) diff --git a/src/dtscalibration/datastore.py b/src/dtscalibration/datastore.py index bad8c89b..dc59e634 100644 --- a/src/dtscalibration/datastore.py +++ b/src/dtscalibration/datastore.py @@ -5417,8 +5417,7 @@ def func(a): return out def resample_datastore(*args, **kwargs): - raise "ds.resample_datastore() is deprecated. Use from dtscalibration import DataStore; " \ - "DataStore(ds.resample()) instead. See example notebook 2." + raise "ds.resample_datastore() is deprecated. Use from dtscalibration import DataStore; " "DataStore(ds.resample()) instead. See example notebook 2." class ParameterIndexDoubleEnded: @@ -6032,7 +6031,9 @@ def read_silixa_files( return ds -def read_sensortran_files(directory, timezone_input_files="UTC", timezone_netcdf="UTC", silent=False, **kwargs): +def read_sensortran_files( + directory, timezone_input_files="UTC", timezone_netcdf="UTC", silent=False, **kwargs +): """Read a folder with measurement files from a device of the Sensortran brand. Each measurement file contains values for a single timestep. Remember to check which timezone you are working in. diff --git a/src/dtscalibration/io.py b/src/dtscalibration/io.py index 8f9b4738..e880a6ca 100644 --- a/src/dtscalibration/io.py +++ b/src/dtscalibration/io.py @@ -1201,7 +1201,11 @@ def metakey(meta, dict_to_parse, prefix): def read_sensortran_files_routine( - filepathlist_dts, filepathlist_temp, timezone_input_files="UTC", timezone_netcdf="UTC", silent=False + filepathlist_dts, + filepathlist_temp, + timezone_input_files="UTC", + timezone_netcdf="UTC", + silent=False, ): """ Internal routine that reads sensortran files. @@ -1419,7 +1423,11 @@ def read_sensortran_single(fname): def read_apsensing_files_routine( - filepathlist, timezone_input_files="UTC", timezone_netcdf="UTC", silent=False, load_in_memory="auto" + filepathlist, + timezone_input_files="UTC", + timezone_netcdf="UTC", + silent=False, + load_in_memory="auto", ): """ Internal routine that reads AP Sensing files. @@ -1443,7 +1451,9 @@ def read_apsensing_files_routine( import dask - assert timezone_input_files == "UTC" and timezone_netcdf == "UTC", "Only UTC timezones supported" + assert ( + timezone_input_files == "UTC" and timezone_netcdf == "UTC" + ), "Only UTC timezones supported" # translate names tld = {"ST": "st", "AST": "ast", "REV-ST": "rst", "REV-AST": "rast", "TEMP": "tmp"} diff --git a/tests/test_dtscalibration.py b/tests/test_dtscalibration.py index 11830b77..2b86aa2d 100644 --- a/tests/test_dtscalibration.py +++ b/tests/test_dtscalibration.py @@ -825,7 +825,11 @@ def test_variance_of_stokes_linear_synthetic(): resid, var_fun, ) = ds.variance_stokes_linear( - "c_lin_var_through_zero", sections=sections, nbin=10, through_zero=True, plot_fit=False + "c_lin_var_through_zero", + sections=sections, + nbin=10, + through_zero=True, + plot_fit=False, ) assert_almost_equal_verbose(slope, var_slope, decimal=3) From 0ee72916486b8e05720f700dc20eb6a5abad58a1 Mon Sep 17 00:00:00 2001 From: Bas des Tombe Date: Fri, 4 Aug 2023 18:05:34 +0200 Subject: [PATCH 15/26] Moved validate_sections() to please mypy --- src/dtscalibration/datastore.py | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/src/dtscalibration/datastore.py b/src/dtscalibration/datastore.py index dc59e634..b6dcf028 100644 --- a/src/dtscalibration/datastore.py +++ b/src/dtscalibration/datastore.py @@ -221,6 +221,21 @@ def sections(self): return yaml.load(self.attrs["_sections"], Loader=yaml.UnsafeLoader) + @sections.setter + def sections(self, sections: Dict[str, List[slice]]): + sections_validated = None + + if sections is not None: + sections_validated = self.validate_sections(sections=sections) + + self.attrs["_sections"] = yaml.dump(sections_validated) + pass + + @sections.deleter + def sections(self): + self.sections = None + pass + def validate_sections(self, sections: Dict[str, List[slice]]): assert isinstance(sections, dict) @@ -279,21 +294,6 @@ def validate_sections(self, sections: Dict[str, List[slice]]): return sections_fix_slice_fixed - @sections.setter - def sections(self, sections: Dict[str, List[slice]]): - sections_validated = None - - if sections is not None: - sections_validated = self.validate_sections(sections=sections) - - self.attrs["_sections"] = yaml.dump(sections_validated) - pass - - @sections.deleter - def sections(self): - self.sections = None - pass - @property def is_double_ended(self) -> float: """ From 146faf26ba2cee1ed268f0b1e31412d0b36ab6a9 Mon Sep 17 00:00:00 2001 From: Bas des Tombe Date: Fri, 4 Aug 2023 20:51:18 +0200 Subject: [PATCH 16/26] Renamed io.py to io_utils.py and moved read_*_files() to io.py --- src/dtscalibration/__init__.py | 14 +- src/dtscalibration/datastore.py | 581 +------- src/dtscalibration/io.py | 2316 ++++++------------------------- src/dtscalibration/io_utils.py | 2020 +++++++++++++++++++++++++++ 4 files changed, 2476 insertions(+), 2455 deletions(-) create mode 100644 src/dtscalibration/io_utils.py diff --git a/src/dtscalibration/__init__.py b/src/dtscalibration/__init__.py index 2087d2b9..3836a779 100644 --- a/src/dtscalibration/__init__.py +++ b/src/dtscalibration/__init__.py @@ -1,17 +1,17 @@ # coding=utf-8 from dtscalibration.datastore import DataStore -from dtscalibration.datastore import open_datastore -from dtscalibration.datastore import open_mf_datastore -from dtscalibration.datastore import read_apsensing_files -from dtscalibration.datastore import read_sensornet_files -from dtscalibration.datastore import read_sensortran_files -from dtscalibration.datastore import read_silixa_files from dtscalibration.datastore_utils import check_dims from dtscalibration.datastore_utils import check_timestep_allclose from dtscalibration.datastore_utils import get_netcdf_encoding from dtscalibration.datastore_utils import merge_double_ended from dtscalibration.datastore_utils import shift_double_ended from dtscalibration.datastore_utils import suggest_cable_shift_double_ended +from dtscalibration.io import open_datastore +from dtscalibration.io import open_mf_datastore +from dtscalibration.io import read_apsensing_files +from dtscalibration.io import read_sensornet_files +from dtscalibration.io import read_sensortran_files +from dtscalibration.io import read_silixa_files from dtscalibration.plot import plot_accuracy from dtscalibration.plot import plot_location_residuals_double_ended from dtscalibration.plot import plot_residuals_reference_sections @@ -41,7 +41,7 @@ ] # filenames = ['datastore.py', 'datastore_utils.py', 'calibrate_utils.py', -# 'plot.py', 'io.py'] +# 'plot.py', 'io_utils.py'] # filenames = ['plot.py'] # # for filename in filenames: diff --git a/src/dtscalibration/datastore.py b/src/dtscalibration/datastore.py index b6dcf028..a1e335ba 100644 --- a/src/dtscalibration/datastore.py +++ b/src/dtscalibration/datastore.py @@ -24,17 +24,17 @@ from dtscalibration.calibrate_utils import wls_sparse from dtscalibration.calibrate_utils import wls_stats from dtscalibration.datastore_utils import check_timestep_allclose -from dtscalibration.io import _dim_attrs -from dtscalibration.io import apsensing_xml_version_check -from dtscalibration.io import read_apsensing_files_routine -from dtscalibration.io import read_sensornet_files_routine_v3 -from dtscalibration.io import read_sensortran_files_routine -from dtscalibration.io import read_silixa_files_routine_v4 -from dtscalibration.io import read_silixa_files_routine_v6 -from dtscalibration.io import sensornet_ddf_version_check -from dtscalibration.io import sensortran_binary_version_check -from dtscalibration.io import silixa_xml_version_check -from dtscalibration.io import ziphandle_to_filepathlist +from dtscalibration.io_utils import _dim_attrs +from dtscalibration.io_utils import apsensing_xml_version_check +from dtscalibration.io_utils import read_apsensing_files_routine +from dtscalibration.io_utils import read_sensornet_files_routine_v3 +from dtscalibration.io_utils import read_sensortran_files_routine +from dtscalibration.io_utils import read_silixa_files_routine_v4 +from dtscalibration.io_utils import read_silixa_files_routine_v6 +from dtscalibration.io_utils import sensornet_ddf_version_check +from dtscalibration.io_utils import sensortran_binary_version_check +from dtscalibration.io_utils import silixa_xml_version_check +from dtscalibration.io_utils import ziphandle_to_filepathlist dtsattr_namelist = ["double_ended_flag"] dim_attrs = {k: v for kl, v in _dim_attrs.items() for k in kl} @@ -982,6 +982,14 @@ def variance_stokes_constant(self, st_label, sections=None, reshape_residuals=Tr dtscalibration/python-dts-calibration/blob/main/examples/notebooks/\ 04Calculate_variance_Stokes.ipynb>`_ """ + + def func_fit(p, xs): + return p[:xs, None] * p[None, xs:] + + def func_cost(p, data, xs): + fit = func_fit(p, xs) + return np.sum((fit - data) ** 2) + if sections is None: sections = self.sections else: @@ -5762,554 +5770,3 @@ def get_taf_values(self, pval, x, trans_att, axis=""): return arr_out -def open_datastore( - filename_or_obj, - group=None, - decode_cf=True, - mask_and_scale=None, - decode_times=True, - concat_characters=True, - decode_coords=True, - engine=None, - chunks=None, - lock=None, - cache=None, - drop_variables=None, - backend_kwargs=None, - load_in_memory=False, - **kwargs, -): - """Load and decode a datastore from a file or file-like object. Most - arguments are passed to xarray.open_dataset(). - - Parameters - ---------- - filename_or_obj : str, Path, file or xarray.backends.*DataStore - Strings and Path objects are interpreted as a path to a netCDF file - or an OpenDAP URL and opened with python-netCDF4, unless the filename - ends with .gz, in which case the file is gunzipped and opened with - scipy.io.netcdf (only netCDF3 supported). File-like objects are opened - with scipy.io.netcdf (only netCDF3 supported). - group : str, optional - Path to the netCDF4 group in the given file to open (only works for - netCDF4 files). - decode_cf : bool, optional - Whether to decode these variables, assuming they were saved according - to CF conventions. - mask_and_scale : bool, optional - If True, replace array values equal to `_FillValue` with NA and scale - values according to the formula `original_values * scale_factor + - add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are - taken from variable attributes (if they exist). If the `_FillValue` or - `missing_value` attribute contains multiple values a warning will be - issued and all array values matching one of the multiple values will - be replaced by NA. mask_and_scale defaults to True except for the - pseudonetcdf backend. - decode_times : bool, optional - If True, decode times encoded in the standard NetCDF datetime format - into datetime objects. Otherwise, leave them encoded as numbers. - concat_characters : bool, optional - If True, concatenate along the last dimension of character arrays to - form string arrays. Dimensions will only be concatenated over (and - removed) if they have no corresponding variable and if they are only - used as the last dimension of character arrays. - decode_coords : bool, optional - If True, decode the 'coordinates' attribute to identify coordinates in - the resulting dataset. - engine : {'netcdf4', 'scipy', 'pydap', 'h5netcdf', 'pynio', - 'pseudonetcdf'}, optional - Engine to use when reading files. If not provided, the default engine - is chosen based on available dependencies, with a preference for - 'netcdf4'. - chunks : int or dict, optional - If chunks is provided, it used to load the new dataset into dask - arrays. ``chunks={}`` loads the dataset with dask using a single - chunk for all arrays. - lock : False, True or threading.Lock, optional - If chunks is provided, this argument is passed on to - :py:func:`dask.array.from_array`. By default, a global lock is - used when reading data from netCDF files with the netcdf4 and h5netcdf - engines to avoid issues with concurrent access when using dask's - multithreaded backend. - cache : bool, optional - If True, cache data loaded from the underlying datastore in memory as - NumPy arrays when accessed to avoid reading from the underlying data- - store multiple times. Defaults to True unless you specify the `chunks` - argument to use dask, in which case it defaults to False. Does not - change the behavior of coordinates corresponding to dimensions, which - always load their data from disk into a ``pandas.Index``. - drop_variables: string or iterable, optional - A variable or list of variables to exclude from being parsed from the - dataset. This may be useful to drop variables with problems or - inconsistent values. - backend_kwargs: dictionary, optional - A dictionary of keyword arguments to pass on to the backend. This - may be useful when backend options would improve performance or - allow user control of dataset processing. - - Returns - ------- - dataset : Dataset - The newly created dataset. - - See Also - -------- - xarray.open_dataset - xarray.load_dataset - """ - - xr_kws = inspect.signature(xr.open_dataset).parameters.keys() - - ds_kwargs = {k: v for k, v in kwargs.items() if k not in xr_kws} - - if chunks is None: - chunks = {} - - with xr.open_dataset( - filename_or_obj, - group=group, - decode_cf=decode_cf, - mask_and_scale=mask_and_scale, - decode_times=decode_times, - concat_characters=concat_characters, - decode_coords=decode_coords, - engine=engine, - chunks=chunks, - lock=lock, - cache=cache, - drop_variables=drop_variables, - backend_kwargs=backend_kwargs, - ) as ds_xr: - ds = DataStore( - data_vars=ds_xr.data_vars, - coords=ds_xr.coords, - attrs=ds_xr.attrs, - **ds_kwargs, - ) - - # to support deprecated st_labels - ds = ds.rename_labels(assertion=False) - - if load_in_memory: - if "cache" in kwargs: - raise TypeError("cache has no effect in this context") - return ds.load() - - else: - return ds - - -def open_mf_datastore( - path=None, paths=None, combine="by_coords", load_in_memory=False, **kwargs -): - """ - Open a datastore from multiple netCDF files. This script assumes the - datastore was split along the time dimension. But only variables with a - time dimension should be concatenated in the time dimension. Other - options from xarray do not support this. - - Parameters - ---------- - combine : {'by_coords', 'nested'}, optional - Leave it at by_coords - path : str - A file path to the stored netcdf files with an asterisk in the - filename to list all. Ensure you have leading zeros in the file - numbering. - paths : list - Define you own list of file paths. - Returns - ------- - dataset : Dataset - The newly created dataset. - """ - from xarray.backends.api import open_mfdataset - - if paths is None: - paths = sorted(glob.glob(path)) - assert paths, "No files match found with: " + path - - with open_mfdataset(paths=paths, combine=combine, **kwargs) as xds: - ds = DataStore(data_vars=xds.data_vars, coords=xds.coords, attrs=xds.attrs) - - # to support deprecated st_labels - ds = ds.rename_labels(assertion=False) - - if load_in_memory: - if "cache" in kwargs: - raise TypeError("cache has no effect in this context") - return ds.load() - - else: - return ds - - -def read_silixa_files( - filepathlist=None, - directory=None, - zip_handle=None, - file_ext="*.xml", - timezone_netcdf="UTC", - silent=False, - load_in_memory="auto", - **kwargs, -): - """Read a folder with measurement files from a device of the Silixa brand. Each measurement file contains - values for a - single timestep. Remember to check which timezone you are working in. - - The silixa files are already timezone aware - - Parameters - ---------- - filepathlist : list of str, optional - List of paths that point the the silixa files - directory : str, Path, optional - Path to folder - timezone_netcdf : str, optional - Timezone string of the netcdf file. UTC follows CF-conventions. - file_ext : str, optional - file extension of the measurement files - silent : bool - If set tot True, some verbose texts are not printed to stdout/screen - load_in_memory : {'auto', True, False} - If 'auto' the Stokes data is only loaded to memory for small files - kwargs : dict-like, optional - keyword-arguments are passed to DataStore initialization - - Returns - ------- - datastore : DataStore - The newly created datastore. - """ - - assert "timezone_input_files" not in kwargs, ( - "The silixa files are " "already timezone aware" - ) - - if filepathlist is None and zip_handle is None: - filepathlist = sorted(glob.glob(os.path.join(directory, file_ext))) - - # Make sure that the list of files contains any files - assert ( - len(filepathlist) >= 1 - ), "No measurement files found in provided " "directory: \n" + str(directory) - - elif filepathlist is None and zip_handle: - filepathlist = ziphandle_to_filepathlist(fh=zip_handle, extension=file_ext) - - # Make sure that the list of files contains any files - assert len(filepathlist) >= 1, ( - "No measurement files found in provided " "list/directory" - ) - - xml_version = silixa_xml_version_check(filepathlist) - - if xml_version == 4: - data_vars, coords, attrs = read_silixa_files_routine_v4( - filepathlist, - timezone_netcdf=timezone_netcdf, - silent=silent, - load_in_memory=load_in_memory, - ) - - elif xml_version in (6, 7, 8): - data_vars, coords, attrs = read_silixa_files_routine_v6( - filepathlist, - xml_version=xml_version, - timezone_netcdf=timezone_netcdf, - silent=silent, - load_in_memory=load_in_memory, - ) - - else: - raise NotImplementedError( - "Silixa xml version " + "{0} not implemented".format(xml_version) - ) - - ds = DataStore(data_vars=data_vars, coords=coords, attrs=attrs, **kwargs) - return ds - - -def read_sensortran_files( - directory, timezone_input_files="UTC", timezone_netcdf="UTC", silent=False, **kwargs -): - """Read a folder with measurement files from a device of the Sensortran - brand. Each measurement file contains values for a single timestep. Remember - to check which timezone you are working in. - - The sensortran files are already timezone aware - - Parameters - ---------- - directory : str, Path - Path to folder containing BinaryRawDTS and BinaryTemp files - timezone_input_files : str, optional - Timezone string of the measurement files. - Remember to check when measurements are taken. - Also if summertime is used. - timezone_netcdf : str, optional - Timezone string of the netcdf file. UTC follows CF-conventions. - silent : bool - If set tot True, some verbose texts are not printed to stdout/screen - kwargs : dict-like, optional - keyword-arguments are passed to DataStore initialization - - Returns - ------- - datastore : DataStore - The newly created datastore. - """ - - filepathlist_dts = sorted(glob.glob(os.path.join(directory, "*BinaryRawDTS.dat"))) - - # Make sure that the list of files contains any files - assert ( - len(filepathlist_dts) >= 1 - ), "No RawDTS measurement files found " "in provided directory: \n" + str(directory) - - filepathlist_temp = [f.replace("RawDTS", "Temp") for f in filepathlist_dts] - - for ii, fname in enumerate(filepathlist_dts): - # Check if corresponding temperature file exists - if not os.path.isfile(filepathlist_temp[ii]): - raise FileNotFoundError( - "Could not find BinaryTemp " + "file corresponding to {}".format(fname) - ) - - version = sensortran_binary_version_check(filepathlist_dts) - - if version == 3: - data_vars, coords, attrs = read_sensortran_files_routine( - filepathlist_dts, - filepathlist_temp, - timezone_input_files=timezone_input_files, - timezone_netcdf=timezone_netcdf, - silent=silent, - ) - else: - raise NotImplementedError( - "Sensortran binary version " + "{0} not implemented".format(version) - ) - - ds = DataStore(data_vars=data_vars, coords=coords, attrs=attrs, **kwargs) - return ds - - -def read_apsensing_files( - filepathlist=None, - directory=None, - file_ext="*.xml", - timezone_input_files="UTC", - timezone_netcdf="UTC", - silent=False, - load_in_memory="auto", - **kwargs, -): - """Read a folder with measurement files from a device of the Sensortran - brand. Each measurement file contains values for a single timestep. - Remember to check which timezone you are working in. - - Parameters - ---------- - filepathlist : list of str, optional - List of paths that point the the silixa files - directory : str, Path, optional - Path to folder - timezone_input_files : str, optional - Timezone string of the measurement files. - Remember to check when measurements are taken. - Also if summertime is used. - timezone_netcdf : str, optional - Timezone string of the netcdf file. UTC follows CF-conventions. - file_ext : str, optional - file extension of the measurement files - silent : bool - If set tot True, some verbose texts are not printed to stdout/screen - load_in_memory : {'auto', True, False} - If 'auto' the Stokes data is only loaded to memory for small files - kwargs : dict-like, optional - keyword-arguments are passed to DataStore initialization - - Notes - ----- - Only XML files are supported for now - - Returns - ------- - datastore : DataStore - The newly created datastore. - """ - if not file_ext == "*.xml": - raise NotImplementedError("Only .xml files are supported for now") - - if filepathlist is None: - filepathlist = sorted(glob.glob(os.path.join(directory, file_ext))) - - # Make sure that the list of files contains any files - assert ( - len(filepathlist) >= 1 - ), "No measurement files found in provided " "directory: \n" + str(directory) - - # Make sure that the list of files contains any files - assert len(filepathlist) >= 1, ( - "No measurement files found in provided " "list/directory" - ) - - device = apsensing_xml_version_check(filepathlist) - - valid_devices = ["CP320"] - - if device in valid_devices: - pass - - else: - warnings.warn( - "AP sensing device " - '"{0}"'.format(device) - + " has not been tested.\nPlease open an issue on github" - + " and provide an example file" - ) - - data_vars, coords, attrs = read_apsensing_files_routine( - filepathlist, - timezone_netcdf=timezone_netcdf, - timezone_input_files=timezone_input_files, - silent=silent, - load_in_memory=load_in_memory, - ) - - ds = DataStore(data_vars=data_vars, coords=coords, attrs=attrs, **kwargs) - return ds - - -def read_sensornet_files( - filepathlist=None, - directory=None, - file_ext="*.ddf", - timezone_input_files="UTC", - timezone_netcdf="UTC", - silent=False, - add_internal_fiber_length=50.0, - fiber_length=None, - **kwargs, -): - """Read a folder with measurement files. Each measurement file contains - values for a single timestep. Remember to check which timezone - you are working in. - - Parameters - ---------- - filepathlist : list of str, optional - List of paths that point the the silixa files - directory : str, Path, optional - Path to folder - timezone_input_files : str, optional - Timezone string of the measurement files. - Remember to check when measurements are taken. - Also if summertime is used. - timezone_netcdf : str, optional - Timezone string of the netcdf file. UTC follows CF-conventions. - file_ext : str, optional - file extension of the measurement files - silent : bool - If set tot True, some verbose texts are not printed to stdout/screen - add_internal_fiber_length : float - Set to zero if only the measurements of the fiber connected to the DTS - system of interest. Set to 50 if you also want to keep the internal - reference section. - fiber_length : float - It is the fiber length between the two connector entering the DTS - device. If left to `None`, it is approximated with - `x[-1] - add_internal_fiber_length`. - kwargs : dict-like, optional - keyword-arguments are passed to DataStore initialization - - Notes - ----- - Compressed sensornet files can not be directly decoded, - because the files are encoded with encoding='windows-1252' instead of - UTF-8. - - Returns - ------- - datastore : DataStore - The newly created datastore. - """ - if filepathlist is None: - # Also look for files in sub-folders - filepathlist_unsorted = glob.glob( - os.path.join(directory, "**", file_ext), recursive=True - ) - - # Make sure that the list of files contains any files - msg = "No measurement files found in provided directory: \n" + str(directory) - assert len(filepathlist_unsorted) >= 1, msg - - # sort based on dates in filesname. A simple sorted() is not sufficient - # as month folders do not sort well - basenames = [os.path.basename(fp) for fp in filepathlist_unsorted] - dates = ["".join(bn.split(" ")[2:4]) for bn in basenames] - i_sort = np.argsort(dates) - filepathlist = [filepathlist_unsorted[i] for i in i_sort] - - # Check measurements are all from same channel - chno = [bn.split(" ")[1] for bn in basenames] - assert ( - len(set(chno)) == 1 - ), "Folder contains measurements from multiple channels" - - # Make sure that the list of files contains any files - assert len(filepathlist) >= 1, ( - "No measurement files found in provided " "list/directory" - ) - - ddf_version = sensornet_ddf_version_check(filepathlist) - - valid_versions = [ - "Halo DTS v1*", - "ORYX F/W v1.02 Oryx Data Collector v3*", - "ORYX F/W v4.00 Oryx Data Collector v3*", - "Sentinel DTS v5*", - ] - - valid = any([fnmatch.fnmatch(ddf_version, v_) for v_ in valid_versions]) - - if valid: - if fnmatch.fnmatch(ddf_version, "Halo DTS v1*"): - flip_reverse_measurements = True - elif fnmatch.fnmatch(ddf_version, "Sentinel DTS v5*"): - flip_reverse_measurements = True - else: - flip_reverse_measurements = False - - else: - flip_reverse_measurements = False - warnings.warn( - "Sensornet .dff version " - '"{0}"'.format(ddf_version) - + " has not been tested.\nPlease open an issue on github" - + " and provide an example file" - ) - - data_vars, coords, attrs = read_sensornet_files_routine_v3( - filepathlist, - timezone_netcdf=timezone_netcdf, - timezone_input_files=timezone_input_files, - silent=silent, - add_internal_fiber_length=add_internal_fiber_length, - fiber_length=fiber_length, - flip_reverse_measurements=flip_reverse_measurements, - ) - - ds = DataStore(data_vars=data_vars, coords=coords, attrs=attrs, **kwargs) - return ds - - -def func_fit(p, xs): - return p[:xs, None] * p[None, xs:] - - -def func_cost(p, data, xs): - fit = func_fit(p, xs) - return np.sum((fit - data) ** 2) diff --git a/src/dtscalibration/io.py b/src/dtscalibration/io.py index e880a6ca..be7d0dd6 100644 --- a/src/dtscalibration/io.py +++ b/src/dtscalibration/io.py @@ -1,2020 +1,564 @@ -# coding=utf-8 +import fnmatch +import glob +import inspect import os -import struct -from contextlib import contextmanager -from xml.etree import ElementTree +import warnings -import dask -import dask.array as da import numpy as np -import pandas as pd - -# Returns a dictionary with the attributes to the dimensions. -# The keys refer to the naming used in the raw files. -# TODO: attrs for st_var and use it in parse_st_var function -_dim_attrs = { - ("x", "distance"): dict( - name="distance", - description="Length along fiber", - long_description="Starting at connector of forward channel", - units="m", - ), - ("tmp", "temperature"): dict( - name="tmp", description="Temperature calibrated by device", units=r"$^\circ$C" - ), - ("st",): dict(name="st", description="Stokes intensity", units="-"), - ("ast",): dict(name="ast", description="anti-Stokes intensity", units="-"), - ("rst",): dict(name="rst", description="reverse Stokes intensity", units="-"), - ("rast",): dict( - name="rast", description="reverse anti-Stokes intensity", units="-" - ), - ("tmpf",): dict( - name="tmpf", - description="Temperature estimated using the Stokes and anti-Stokes from the Forward channel", - units=r"$^\circ$C", - ), - ("tmpb",): dict( - name="tmpb", - description="Temperature estimated using the Stokes and anti-Stokes from the Backward channel", - units=r"$^\circ$C", - ), - ("tmpw",): dict( - name="tmpw", - description="Temperature estimated using the Stokes and anti-Stokes from both the Forward and Backward channel.", - units=r"$^\circ$C", - ), - ("tmpf_var",): dict( - name="tmpf_var", - description="Uncertainty variance in tmpf estimated with linear-error propagation", - units=r"$^\circ$C$^2$", - ), - ("tmpb_var",): dict( - name="tmpb_var", - description="Uncertainty variance in tmpb estimated with linear-error propagation", - units=r"$^\circ$C$^2$", - ), - ("tmpw_var",): dict( - name="tmpw_var", - description="Uncertainty variance in tmpw estimated with linear-error propagation", - units=r"$^\circ$C$^2$", - ), - ("tmpw_var_lower",): dict( - name="tmpw_var_lower", - description="Lower bound of uncertainty variance in tmpw estimated with linear-error propagation. " - "Excludes the parameter uncertainties.", - units=r"$^\circ$C$^2$", - ), - ("tmpw_var_approx",): dict( - name="tmpw_var_upper", - description="Upper bound of uncertainty variance in tmpw estimated with linear-error propagation. " - "Excludes the correlation between tmpf and tmpb caused by alpha.", - units=r"$^\circ$C$^2$", - ), - ("tmpf_mc_var",): dict( - name="tmpf_mc_var", - description="Uncertainty variance in tmpf estimated with Monte Carlo", - units=r"$^\circ$C$^2$", - ), - ("tmpb_mc_var",): dict( - name="tmpb_mc_var", - description="Uncertainty variance in tmpb estimated with Monte Carlo", - units=r"$^\circ$C$^2$", - ), - ("tmpw_mc_var",): dict( - name="tmpw_mc_var", - description="Uncertainty variance in tmpw estimated with Monte Carlo", - units=r"$^\circ$C$^2$", - ), - ("acquisitionTime",): dict( - name="acquisitionTime", - description="Measurement duration of forward channel", - long_description="Actual measurement duration of forward " "channel", - units="seconds", - ), - ("userAcquisitionTimeFW",): dict( - name="userAcquisitionTimeFW", - description="Measurement duration of forward channel", - long_description="Desired measurement duration of forward " "channel", - units="seconds", - ), - ("userAcquisitionTimeBW",): dict( - name="userAcquisitionTimeBW", - description="Measurement duration of backward channel", - long_description="Desired measurement duration of backward " "channel", - units="seconds", - ), - ("trans_att",): dict( - name="Locations introducing transient directional differential " "attenuation", - description="Locations along the x-dimension introducing transient " - "directional differential attenuation", - long_description="Connectors introduce additional differential " - "attenuation that is different for the forward " - "and backward direction, and varies over time.", - units="m", - ), -} - -# Because variations in the names exist between the different file formats. The -# tuple as key contains the possible keys, which is expanded below. -dim_attrs = {k: v for kl, v in _dim_attrs.items() for k in kl} - -dim_attrs_apsensing = dict(dim_attrs) -dim_attrs_apsensing["TEMP"] = dim_attrs_apsensing.pop("tmp") -dim_attrs_apsensing["TEMP"]["name"] = "TEMP" -dim_attrs_apsensing.pop("acquisitionTime") -dim_attrs_apsensing.pop("userAcquisitionTimeFW") -dim_attrs_apsensing.pop("userAcquisitionTimeBW") - - -@contextmanager -def open_file(path, **kwargs): - if isinstance(path, tuple): - # print('\nabout to open zipfile', path[0], '. from', path[1]) - # assert isinstance(path[1], zip) - the_file = path[1].open(path[0], **kwargs) - - else: - the_file = open(path, **kwargs) - - yield the_file - the_file.close() - - -def silixa_xml_version_check(filepathlist): - """Function which tests which version of xml files have to be read. +import xarray as xr + +from dtscalibration import DataStore +from dtscalibration.io_utils import apsensing_xml_version_check +from dtscalibration.io_utils import read_apsensing_files_routine +from dtscalibration.io_utils import read_sensornet_files_routine_v3 +from dtscalibration.io_utils import read_sensortran_files_routine +from dtscalibration.io_utils import read_silixa_files_routine_v4 +from dtscalibration.io_utils import read_silixa_files_routine_v6 +from dtscalibration.io_utils import sensornet_ddf_version_check +from dtscalibration.io_utils import sensortran_binary_version_check +from dtscalibration.io_utils import silixa_xml_version_check +from dtscalibration.io_utils import ziphandle_to_filepathlist + + +def open_datastore( + filename_or_obj, + group=None, + decode_cf=True, + mask_and_scale=None, + decode_times=True, + concat_characters=True, + decode_coords=True, + engine=None, + chunks=None, + lock=None, + cache=None, + drop_variables=None, + backend_kwargs=None, + load_in_memory=False, + **kwargs, +): + """Load and decode a datastore from a file or file-like object. Most + arguments are passed to xarray.open_dataset(). Parameters ---------- - filepathlist + filename_or_obj : str, Path, file or xarray.backends.*DataStore + Strings and Path objects are interpreted as a path to a netCDF file + or an OpenDAP URL and opened with python-netCDF4, unless the filename + ends with .gz, in which case the file is gunzipped and opened with + scipy.io.netcdf (only netCDF3 supported). File-like objects are opened + with scipy.io.netcdf (only netCDF3 supported). + group : str, optional + Path to the netCDF4 group in the given file to open (only works for + netCDF4 files). + decode_cf : bool, optional + Whether to decode these variables, assuming they were saved according + to CF conventions. + mask_and_scale : bool, optional + If True, replace array values equal to `_FillValue` with NA and scale + values according to the formula `original_values * scale_factor + + add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are + taken from variable attributes (if they exist). If the `_FillValue` or + `missing_value` attribute contains multiple values a warning will be + issued and all array values matching one of the multiple values will + be replaced by NA. mask_and_scale defaults to True except for the + pseudonetcdf backend. + decode_times : bool, optional + If True, decode times encoded in the standard NetCDF datetime format + into datetime objects. Otherwise, leave them encoded as numbers. + concat_characters : bool, optional + If True, concatenate along the last dimension of character arrays to + form string arrays. Dimensions will only be concatenated over (and + removed) if they have no corresponding variable and if they are only + used as the last dimension of character arrays. + decode_coords : bool, optional + If True, decode the 'coordinates' attribute to identify coordinates in + the resulting dataset. + engine : {'netcdf4', 'scipy', 'pydap', 'h5netcdf', 'pynio', + 'pseudonetcdf'}, optional + Engine to use when reading files. If not provided, the default engine + is chosen based on available dependencies, with a preference for + 'netcdf4'. + chunks : int or dict, optional + If chunks is provided, it used to load the new dataset into dask + arrays. ``chunks={}`` loads the dataset with dask using a single + chunk for all arrays. + lock : False, True or threading.Lock, optional + If chunks is provided, this argument is passed on to + :py:func:`dask.array.from_array`. By default, a global lock is + used when reading data from netCDF files with the netcdf4 and h5netcdf + engines to avoid issues with concurrent access when using dask's + multithreaded backend. + cache : bool, optional + If True, cache data loaded from the underlying datastore in memory as + NumPy arrays when accessed to avoid reading from the underlying data- + store multiple times. Defaults to True unless you specify the `chunks` + argument to use dask, in which case it defaults to False. Does not + change the behavior of coordinates corresponding to dimensions, which + always load their data from disk into a ``pandas.Index``. + drop_variables: string or iterable, optional + A variable or list of variables to exclude from being parsed from the + dataset. This may be useful to drop variables with problems or + inconsistent values. + backend_kwargs: dictionary, optional + A dictionary of keyword arguments to pass on to the backend. This + may be useful when backend options would improve performance or + allow user control of dataset processing. Returns ------- + dataset : Dataset + The newly created dataset. + See Also + -------- + xarray.open_dataset + xarray.load_dataset """ - sep = ":" - attrs = read_silixa_attrs_singlefile(filepathlist[0], sep) - - version_string = attrs["customData:SystemSettings:softwareVersion"] - - # Get major version from string. Tested for Ultima v4, v6, v7 XT-DTS v6 - major_version = int(version_string.replace(" ", "").split(":")[-1][0]) - - return major_version + xr_kws = inspect.signature(xr.open_dataset).parameters.keys() + + ds_kwargs = {k: v for k, v in kwargs.items() if k not in xr_kws} + + if chunks is None: + chunks = {} + + with xr.open_dataset( + filename_or_obj, + group=group, + decode_cf=decode_cf, + mask_and_scale=mask_and_scale, + decode_times=decode_times, + concat_characters=concat_characters, + decode_coords=decode_coords, + engine=engine, + chunks=chunks, + lock=lock, + cache=cache, + drop_variables=drop_variables, + backend_kwargs=backend_kwargs, + ) as ds_xr: + ds = DataStore( + data_vars=ds_xr.data_vars, + coords=ds_xr.coords, + attrs=ds_xr.attrs, + **ds_kwargs, + ) + # to support deprecated st_labels + ds = ds.rename_labels(assertion=False) -def apsensing_xml_version_check(filepathlist): - """Function which tests which version of xml files are read. + if load_in_memory: + if "cache" in kwargs: + raise TypeError("cache has no effect in this context") + return ds.load() - Parameters - ---------- - filepathlist + else: + return ds - Returns - ------- +def open_mf_datastore( + path=None, paths=None, combine="by_coords", load_in_memory=False, **kwargs +): """ - - sep = ":" - attrs, _ = read_apsensing_attrs_singlefile(filepathlist[0], sep) - - return attrs["wellbore:uid"] - - -def sensornet_ddf_version_check(filepathlist): - """Function which checks and returns the .ddf file version + Open a datastore from multiple netCDF files. This script assumes the + datastore was split along the time dimension. But only variables with a + time dimension should be concatenated in the time dimension. Other + options from xarray do not support this. Parameters ---------- - filepathlist - + combine : {'by_coords', 'nested'}, optional + Leave it at by_coords + path : str + A file path to the stored netcdf files with an asterisk in the + filename to list all. Ensure you have leading zeros in the file + numbering. + paths : list + Define you own list of file paths. Returns ------- - ddf_version - + dataset : Dataset + The newly created dataset. """ + from xarray.backends.api import open_mfdataset - # Obtain metadata fro mthe first file - _, meta = read_sensornet_single(filepathlist[0]) - - if "Software version number" in meta.keys(): - version_string = meta["Software version number"] - else: - raise ValueError( - "Software version number could not be detected in .ddf file" - + "Either file is corrupted or not supported" - ) - - ddf_version = version_string.replace(",", ".") + if paths is None: + paths = sorted(glob.glob(path)) + assert paths, "No files match found with: " + path - return ddf_version + with open_mfdataset(paths=paths, combine=combine, **kwargs) as xds: + ds = DataStore(data_vars=xds.data_vars, coords=xds.coords, attrs=xds.attrs) + # to support deprecated st_labels + ds = ds.rename_labels(assertion=False) -def sensortran_binary_version_check(filepathlist): - """Function which tests which version the sensortran binaries are. + if load_in_memory: + if "cache" in kwargs: + raise TypeError("cache has no effect in this context") + return ds.load() - Parameters - ---------- - filepathlist - - Returns - ------- - - """ - fname = filepathlist[0] - - with open(fname, "rb") as f: - f.read(2) - version = struct.unpack("= 1 + ), "No measurement files found in provided " "directory: \n" + str(directory) - attrs["forwardMeasurementChannel"] = attrs["customData:forwardMeasurementChannel"] - if double_ended_flag: - attrs["backwardMeasurementChannel"] = attrs[ - "customData:reverseMeasurementChannel" - ] - else: - attrs["backwardMeasurementChannel"] = "N/A" + elif filepathlist is None and zip_handle: + filepathlist = ziphandle_to_filepathlist(fh=zip_handle, extension=file_ext) - # obtain basic data info - data_item_names = ( - attrs["logData:mnemonicList"].replace(" ", "").strip(" ").split(",") + # Make sure that the list of files contains any files + assert len(filepathlist) >= 1, ( + "No measurement files found in provided " "list/directory" ) - nitem = len(data_item_names) - - ntime = len(filepathlist) - - double_ended_flag = bool(int(attrs["isDoubleEnded"])) - chFW = int(attrs["forwardMeasurementChannel"]) - 1 # zero-based - if double_ended_flag: - chBW = int(attrs["backwardMeasurementChannel"]) - 1 # zero-based - else: - # no backward channel is negative value. writes better to netcdf - chBW = -1 - # print summary - if not silent: - print(f"{ntime} files were found, each representing a single timestep") - print(f"{nitem} recorded vars were found: " + ", ".join(data_item_names)) - print(f"Recorded at {nx} points along the cable") + xml_version = silixa_xml_version_check(filepathlist) - if double_ended_flag: - print("The measurement is double ended") - else: - print("The measurement is single ended") - - # obtain timeseries from data - timeseries_loc_in_hierarchy = [ - ("log", "customData", "acquisitionTime"), - ("log", "customData", "referenceTemperature"), - ("log", "customData", "probe1Temperature"), - ("log", "customData", "probe2Temperature"), - ("log", "customData", "referenceProbeVoltage"), - ("log", "customData", "probe1Voltage"), - ("log", "customData", "probe2Voltage"), - ( - "log", - "customData", - "UserConfiguration", - "ChannelConfiguration", - "AcquisitionConfiguration", - "AcquisitionTime", - "userAcquisitionTimeFW", - ), - ] - - if double_ended_flag: - timeseries_loc_in_hierarchy.append( - ( - "log", - "customData", - "UserConfiguration", - "ChannelConfiguration", - "AcquisitionConfiguration", - "AcquisitionTime", - "userAcquisitionTimeBW", - ) + if xml_version == 4: + data_vars, coords, attrs = read_silixa_files_routine_v4( + filepathlist, + timezone_netcdf=timezone_netcdf, + silent=silent, + load_in_memory=load_in_memory, ) - timeseries = { - item[-1]: dict(loc=item, array=np.zeros(ntime, dtype=np.float32)) - for item in timeseries_loc_in_hierarchy - } - - # add units to timeseries (unit of measurement) - for key, item in timeseries.items(): - if f"customData:{key}:uom" in attrs: - item["uom"] = attrs[f"customData:{key}:uom"] - else: - item["uom"] = "" - - # Gather data - arr_path = "s:" + "/s:".join(["log", "logData", "data"]) - - @dask.delayed - def grab_data_per_file(file_handle): - """ - - Parameters - ---------- - file_handle - - Returns - ------- - - """ - with open_file(file_handle, mode="r") as f_h: - eltree = ElementTree.parse(f_h) - arr_el = eltree.findall(arr_path, namespaces=ns) - - if not len(arr_el) == nx: - raise ValueError( - "Inconsistent length of x-dimension" - + "\nCheck if files are mixed up, or if the number of " - + "data points vary per file." - ) - - # remove the breaks on both sides of the string - # split the string on the comma - arr_str = [arr_eli.text[1:-1].split(",") for arr_eli in arr_el] - - return np.array(arr_str, dtype=float) - - data_lst_dly = [grab_data_per_file(fp) for fp in filepathlist] - data_lst = [ - da.from_delayed(x, shape=(nx, nitem), dtype=float) for x in data_lst_dly - ] - data_arr = da.stack(data_lst).T # .compute() - - # Check whether to compute data_arr (if possible 25% faster) - data_arr_cnk = data_arr.rechunk({0: -1, 1: -1, 2: "auto"}) - if load_in_memory == "auto" and data_arr_cnk.npartitions <= 5: - if not silent: - print("Reading the data from disk") - data_arr = data_arr_cnk.compute() - elif load_in_memory: - if not silent: - print("Reading the data from disk") - data_arr = data_arr_cnk.compute() - else: - if not silent: - print("Not reading the data from disk") - data_arr = data_arr_cnk - - data_vars = {} - for name, data_arri in zip(data_item_names, data_arr): - if name == "LAF": - continue - if tld[name] in dim_attrs: - data_vars[tld[name]] = (["x", "time"], data_arri, dim_attrs[tld[name]]) - else: - raise ValueError(f"Dont know what to do with the {name} data column") - - # Obtaining the timeseries data (reference temperature etc) - _ts_dtype = [(k, np.float32) for k in timeseries] - _time_dtype = [ - ("filename_tstamp", np.int64), - ("minDateTimeIndex", "= 1 + ), "No RawDTS measurement files found " "in provided directory: \n" + str(directory) - if double_ended_flag: - print("The measurement is double ended") - else: - print("The measurement is single ended") - - # obtain timeseries from data - timeseries_loc_in_hierarchy = [ - ("wellLog", "customData", "acquisitionTime"), - ("wellLog", "customData", "referenceTemperature"), - ("wellLog", "customData", "probe1Temperature"), - ("wellLog", "customData", "probe2Temperature"), - ("wellLog", "customData", "referenceProbeVoltage"), - ("wellLog", "customData", "probe1Voltage"), - ("wellLog", "customData", "probe2Voltage"), - ( - "wellLog", - "customData", - "UserConfiguration", - "ChannelConfiguration", - "AcquisitionConfiguration", - "AcquisitionTime", - "userAcquisitionTimeFW", - ), - ] + filepathlist_temp = [f.replace("RawDTS", "Temp") for f in filepathlist_dts] - if double_ended_flag: - timeseries_loc_in_hierarchy.append( - ( - "wellLog", - "customData", - "UserConfiguration", - "ChannelConfiguration", - "AcquisitionConfiguration", - "AcquisitionTime", - "userAcquisitionTimeBW", + for ii, fname in enumerate(filepathlist_dts): + # Check if corresponding temperature file exists + if not os.path.isfile(filepathlist_temp[ii]): + raise FileNotFoundError( + "Could not find BinaryTemp " + "file corresponding to {}".format(fname) ) - ) - - timeseries = { - item[-1]: dict(loc=item, array=np.zeros(ntime, dtype=np.float32)) - for item in timeseries_loc_in_hierarchy - } - - # add units to timeseries (unit of measurement) - for key, item in timeseries.items(): - if f"customData:{key}:uom" in attrs: - item["uom"] = attrs[f"customData:{key}:uom"] - else: - item["uom"] = "" - - # Gather data - arr_path = "s:" + "/s:".join(["wellLog", "logData", "data"]) - - @dask.delayed - def grab_data_per_file(file_handle): - """ - - Parameters - ---------- - file_handle - - Returns - ------- - - """ - with open_file(file_handle, mode="r") as f_h: - eltree = ElementTree.parse(f_h) - arr_el = eltree.findall(arr_path, namespaces=ns) - - if not len(arr_el) == nx: - raise ValueError( - "Inconsistent length of x-dimension" - + "\nCheck if files are mixed up, or if the number of " - + "data points vary per file." - ) - - # remove the breaks on both sides of the string - # split the string on the comma - arr_str = [arr_eli.text.split(",") for arr_eli in arr_el] - return np.array(arr_str, dtype=float) - - data_lst_dly = [grab_data_per_file(fp) for fp in filepathlist] - data_lst = [ - da.from_delayed(x, shape=(nx, nitem), dtype=float) for x in data_lst_dly - ] - data_arr = da.stack(data_lst).T # .compute() - - # Check whether to compute data_arr (if possible 25% faster) - data_arr_cnk = data_arr.rechunk({0: -1, 1: -1, 2: "auto"}) - if load_in_memory == "auto" and data_arr_cnk.npartitions <= 5: - if not silent: - print("Reading the data from disk") - data_arr = data_arr_cnk.compute() - elif load_in_memory: - if not silent: - print("Reading the data from disk") - data_arr = data_arr_cnk.compute() - else: - if not silent: - print("Not reading the data from disk") - data_arr = data_arr_cnk - - data_vars = {} - for name, data_arri in zip(data_item_names, data_arr): - if name == "LAF": - continue - - if tld[name] in dim_attrs: - data_vars[tld[name]] = (["x", "time"], data_arri, dim_attrs[tld[name]]) - else: - raise ValueError("Dont know what to do with the" " {name} data column") - - # Obtaining the timeseries data (reference temperature etc) - _ts_dtype = [(k, np.float32) for k in timeseries] - _time_dtype = [ - ("filename_tstamp", np.int64), - ("minDateTimeIndex", "= 1 + ), "No measurement files found in provided " "directory: \n" + str(directory) - # obtain basic data info - nx = data["x"].size - - ntime = len(filepathlist) + # Make sure that the list of files contains any files + assert len(filepathlist) >= 1, ( + "No measurement files found in provided " "list/directory" + ) - # chFW = int(attrs['forwardMeasurementChannel']) - 1 # zero-based - # if double_ended_flag: - # chBW = int(attrs['backwardMeasurementChannel']) - 1 # zero-based - # else: - # # no backward channel is negative value. writes better to netcdf - # chBW = -1 + device = apsensing_xml_version_check(filepathlist) - # print summary - if not silent: - print("%s files were found," % ntime + " each representing a single timestep") - print("Recorded at %s points along the cable" % nx) + valid_devices = ["CP320"] - if double_ended_flag: - print("The measurement is double ended") - else: - print("The measurement is single ended") - - # Gather data - # x has already been read. should not change over time - xraw = data["x"] - - # Define all variables - referenceTemperature = np.zeros(ntime) - probe1temperature = np.zeros(ntime) - probe2temperature = np.zeros(ntime) - gamma_ddf = np.zeros(ntime) - k_internal = np.zeros(ntime) - k_external = np.zeros(ntime) - acquisitiontimeFW = np.zeros(ntime) - acquisitiontimeBW = np.zeros(ntime) - - timestamp = [""] * ntime - ST = np.zeros((nx, ntime)) - AST = np.zeros((nx, ntime)) - TMP = np.zeros((nx, ntime)) - - if double_ended_flag: - REV_ST = np.zeros((nx, ntime)) - REV_AST = np.zeros((nx, ntime)) - - for ii in range(ntime): - data, meta = read_sensornet_single(filepathlist[ii]) - - timestamp[ii] = pd.DatetimeIndex([meta["date"] + " " + meta["time"]])[0] - probe1temperature[ii] = float(meta["T ext. ref 1 (°C)"]) - probe2temperature[ii] = float(meta["T ext. ref 2 (°C)"]) - referenceTemperature[ii] = float(meta["T internal ref (°C)"]) - gamma_ddf[ii] = float(meta["gamma"]) - k_internal[ii] = float(meta["k internal"]) - k_external[ii] = float(meta["k external"]) - acquisitiontimeFW[ii] = float(meta["forward acquisition time"]) - acquisitiontimeBW[ii] = float(meta["reverse acquisition time"]) - - ST[:, ii] = data["st"] - AST[:, ii] = data["ast"] - TMP[:, ii] = data["tmp"] - - if double_ended_flag: - REV_ST[:, ii] = data["rst"] - REV_AST[:, ii] = data["rast"] - - if fiber_length is None and double_ended_flag: - fiber_length = np.max([0.0, xraw[-1] - add_internal_fiber_length]) - elif fiber_length is None and not double_ended_flag: - fiber_length = xraw[-1] - else: + if device in valid_devices: pass - assert fiber_length > 0.0, ( - "`fiber_length` is not defined. Use key" - "word argument in read function." + str(fiber_length) - ) - - fiber_start_index = (np.abs(xraw + add_internal_fiber_length)).argmin() - fiber_0_index = np.abs(xraw).argmin() - fiber_1_index = (np.abs(xraw - fiber_length)).argmin() - fiber_n_indices = fiber_1_index - fiber_0_index - fiber_n_indices_internal = fiber_0_index - fiber_start_index - if double_ended_flag: - fiber_end_index = np.min([xraw.size, fiber_1_index + fiber_n_indices_internal]) else: - fiber_end_index = fiber_1_index - - if double_ended_flag: - if not flip_reverse_measurements: - # fiber length how the backward channel is aligned - fiber_length_raw = float(meta["fibre end"]) - fiber_bw_1_index = np.abs(xraw - fiber_length_raw).argmin() - fiber_bw_end_index = np.min( - [xraw.size, fiber_bw_1_index + (fiber_end_index - fiber_1_index)] - ) - fiber_bw_start_index = np.max( - [0, fiber_bw_1_index - fiber_n_indices - fiber_n_indices_internal] - ) - - REV_ST = REV_ST[fiber_bw_start_index:fiber_bw_end_index] - REV_AST = REV_AST[fiber_bw_start_index:fiber_bw_end_index] - - else: - # Use the fiber indices from the forward channel - n_indices_internal_left = fiber_0_index - fiber_start_index - n_indices_internal_right = np.max([0, fiber_end_index - fiber_1_index]) - n_indices_internal_shortest = np.min( - [n_indices_internal_left, n_indices_internal_right] - ) - fiber_start_index = fiber_0_index - n_indices_internal_shortest - fiber_end_index = ( - fiber_0_index + fiber_n_indices + n_indices_internal_shortest - ) - REV_ST = REV_ST[fiber_end_index:fiber_start_index:-1] - REV_AST = REV_AST[fiber_end_index:fiber_start_index:-1] - - x = xraw[fiber_start_index:fiber_end_index] - TMP = TMP[fiber_start_index:fiber_end_index] - ST = ST[fiber_start_index:fiber_end_index] - AST = AST[fiber_start_index:fiber_end_index] - - data_vars = { - "st": (["x", "time"], ST, dim_attrs["st"]), - "ast": (["x", "time"], AST, dim_attrs["ast"]), - "tmp": (["x", "time"], TMP, dim_attrs["tmp"]), - "probe1Temperature": ( - "time", - probe1temperature, - { - "name": "Probe 1 temperature", - "description": "reference probe 1 " "temperature", - "units": r"$^\circ$C", - }, - ), - "probe2Temperature": ( - "time", - probe2temperature, - { - "name": "Probe 2 temperature", - "description": "reference probe 2 " "temperature", - "units": r"$^\circ$C", - }, - ), - "referenceTemperature": ( - "time", - referenceTemperature, - { - "name": "reference temperature", - "description": "Internal reference " "temperature", - "units": r"$^\circ$C", - }, - ), - "gamma_ddf": ( - "time", - gamma_ddf, - { - "name": "gamma ddf", - "description": "machine " "calibrated gamma", - "units": "-", - }, - ), - "k_internal": ( - "time", - k_internal, - { - "name": "k internal", - "description": "machine calibrated " "internal k", - "units": "-", - }, - ), - "k_external": ( - "time", - k_external, - { - "name": "reference temperature", - "description": "machine calibrated " "external k", - "units": "-", - }, - ), - "userAcquisitionTimeFW": ( - "time", - acquisitiontimeFW, - dim_attrs["userAcquisitionTimeFW"], - ), - "userAcquisitionTimeBW": ( - "time", - acquisitiontimeBW, - dim_attrs["userAcquisitionTimeBW"], - ), - } - - if double_ended_flag: - data_vars["rst"] = (["x", "time"], REV_ST, dim_attrs["rst"]) - data_vars["rast"] = (["x", "time"], REV_AST, dim_attrs["rast"]) - - filenamelist = [os.path.split(f)[-1] for f in filepathlist] - - coords = {"x": ("x", x, dim_attrs["x"]), "filename": ("time", filenamelist)} - - dtFW = data_vars["userAcquisitionTimeFW"][1].astype("timedelta64[s]") - dtBW = data_vars["userAcquisitionTimeBW"][1].astype("timedelta64[s]") - if not double_ended_flag: - tcoords = coords_time( - np.array(timestamp).astype("datetime64[ns]"), - timezone_netcdf=timezone_netcdf, - timezone_input_files=timezone_input_files, - dtFW=dtFW, - double_ended_flag=double_ended_flag, - ) - else: - tcoords = coords_time( - np.array(timestamp).astype("datetime64[ns]"), - timezone_netcdf=timezone_netcdf, - timezone_input_files=timezone_input_files, - dtFW=dtFW, - dtBW=dtBW, - double_ended_flag=double_ended_flag, + warnings.warn( + "AP sensing device " + '"{0}"'.format(device) + + " has not been tested.\nPlease open an issue on github" + + " and provide an example file" ) - coords.update(tcoords) - - return data_vars, coords, attrs - - -def read_silixa_attrs_singlefile(filename, sep): - """ - - Parameters - ---------- - filename - sep - - Returns - ------- - - """ - import xmltodict - - def metakey(meta, dict_to_parse, prefix): - """ - Fills the metadata dictionairy with data from dict_to_parse. - The dict_to_parse is the raw data from a silixa xml-file. - dict_to_parse is a nested dictionary to represent the - different levels of hierarchy. For example, - toplevel = {lowlevel: {key: value}}. - This function returns {'toplevel:lowlevel:key': value}. - Where prefix is the flattened hierarchy. - - Parameters - ---------- - meta : dict - the output dictionairy with prcessed metadata - dict_to_parse : dict - prefix - - Returns - ------- - - """ - - for key in dict_to_parse: - if prefix == "": - prefix_parse = key.replace("@", "") - else: - prefix_parse = sep.join([prefix, key.replace("@", "")]) - - if prefix_parse == sep.join(("logData", "data")): - # skip the LAF , ST data - continue - - if hasattr(dict_to_parse[key], "keys"): - # Nested dictionaries, flatten hierarchy. - meta.update(metakey(meta, dict_to_parse[key], prefix_parse)) - - elif isinstance(dict_to_parse[key], list): - # if the key has values for the multiple channels - for ival, val in enumerate(dict_to_parse[key]): - num_key = prefix_parse + "_" + str(ival) - meta.update(metakey(meta, val, num_key)) - else: - meta[prefix_parse] = dict_to_parse[key] - - return meta - - with open_file(filename) as fh: - doc_ = xmltodict.parse(fh.read()) - - if "wellLogs" in doc_.keys(): - doc = doc_["wellLogs"]["wellLog"] - else: - doc = doc_["logs"]["log"] - - return metakey({}, doc, "") - - -def read_sensortran_files_routine( - filepathlist_dts, - filepathlist_temp, - timezone_input_files="UTC", - timezone_netcdf="UTC", - silent=False, -): - """ - Internal routine that reads sensortran files. - Use dtscalibration.read_sensortran_files function instead. - - The sensortran files are in UTC time - - Parameters - ---------- - filepathlist_dts - filepathlist_temp - timezone_netcdf - silent - - Returns - ------- - - """ - assert timezone_input_files == "UTC", "The sensortran files are always in UTC time." - - # Obtain metadata from the first file - data_dts, meta_dts = read_sensortran_single(filepathlist_dts[0]) - data_temp, meta_temp = read_sensortran_single(filepathlist_temp[0]) - - attrs = meta_dts - - # Add standardised required attributes - attrs["isDoubleEnded"] = "0" - - attrs["forwardMeasurementChannel"] = meta_dts["channel_id"] - 1 - attrs["backwardMeasurementChannel"] = "N/A" - - # obtain basic data info - nx = meta_temp["num_points"] - - ntime = len(filepathlist_dts) - - # print summary - if not silent: - print("%s files were found," % ntime + " each representing a single timestep") - print("Recorded at %s points along the cable" % nx) - - print("The measurement is single ended") - - # Gather data - # x has already been read. should not change over time - x = data_temp["x"] - - # Define all variables - referenceTemperature = np.zeros(ntime) - acquisitiontimeFW = np.ones(ntime) - - timestamp = [""] * ntime - ST = np.zeros((nx, ntime), dtype=np.int32) - AST = np.zeros((nx, ntime), dtype=np.int32) - TMP = np.zeros((nx, ntime)) - - ST_zero = np.zeros((ntime)) - AST_zero = np.zeros((ntime)) - - for ii in range(ntime): - data_dts, meta_dts = read_sensortran_single(filepathlist_dts[ii]) - data_temp, meta_temp = read_sensortran_single(filepathlist_temp[ii]) - - timestamp[ii] = data_dts["time"] - - referenceTemperature[ii] = data_temp["reference_temperature"] - 273.15 - - ST[:, ii] = data_dts["st"][:nx] - AST[:, ii] = data_dts["ast"][:nx] - # The TMP can vary by 1 or 2 datapoints, dynamically assign the values - TMP[: meta_temp["num_points"], ii] = data_temp["tmp"][:nx] - - zero_index = (meta_dts["num_points"] - nx) // 2 - ST_zero[ii] = np.mean(data_dts["st"][nx + zero_index :]) - AST_zero[ii] = np.mean(data_dts["ast"][nx + zero_index :]) - - data_vars = { - "st": (["x", "time"], ST, dim_attrs["st"]), - "ast": (["x", "time"], AST, dim_attrs["ast"]), - "tmp": ( - ["x", "time"], - TMP, - { - "name": "tmp", - "description": "Temperature calibrated by device", - "units": meta_temp["y_units"], - }, - ), - "referenceTemperature": ( - "time", - referenceTemperature, - { - "name": "reference temperature", - "description": "Internal reference " "temperature", - "units": r"$^\circ$C", - }, - ), - "st_zero": ( - ["time"], - ST_zero, - { - "name": "ST_zero", - "description": "Stokes zero count", - "units": meta_dts["y_units"], - }, - ), - "ast_zero": ( - ["time"], - AST_zero, - { - "name": "AST_zero", - "description": "anit-Stokes zero count", - "units": meta_dts["y_units"], - }, - ), - "userAcquisitionTimeFW": ( - "time", - acquisitiontimeFW, - dim_attrs["userAcquisitionTimeFW"], - ), - } - - filenamelist_dts = [os.path.split(f)[-1] for f in filepathlist_dts] - filenamelist_temp = [os.path.split(f)[-1] for f in filepathlist_temp] - - coords = { - "x": ( - "x", - x, - { - "name": "distance", - "description": "Length along fiber", - "long_description": "Starting at connector " + "of forward channel", - "units": "m", - }, - ), - "filename": ("time", filenamelist_dts), - "filename_temp": ("time", filenamelist_temp), - } - - dtFW = data_vars["userAcquisitionTimeFW"][1].astype("timedelta64[s]") - - tcoords = coords_time( - np.array(timestamp).astype("datetime64[ns]"), + data_vars, coords, attrs = read_apsensing_files_routine( + filepathlist, timezone_netcdf=timezone_netcdf, - timezone_input_files="UTC", - dtFW=dtFW, - double_ended_flag=False, + timezone_input_files=timezone_input_files, + silent=silent, + load_in_memory=load_in_memory, ) - coords.update(tcoords) - - return data_vars, coords, attrs + ds = DataStore(data_vars=data_vars, coords=coords, attrs=attrs, **kwargs) + return ds -def read_sensortran_single(fname): - """ - Internal routine that reads a single sensortran file. - Use dtscalibration.read_sensortran_files function instead. - - Parameters - ---------- - fname - - Returns - ------- - data, metadata - """ - import struct - from datetime import datetime - - meta = {} - data = {} - with open(fname, "rb") as f: - meta["survey_type"] = struct.unpack("= 1, msg + + # sort based on dates in filesname. A simple sorted() is not sufficient + # as month folders do not sort well + basenames = [os.path.basename(fp) for fp in filepathlist_unsorted] + dates = ["".join(bn.split(" ")[2:4]) for bn in basenames] + i_sort = np.argsort(dates) + filepathlist = [filepathlist_unsorted[i] for i in i_sort] + + # Check measurements are all from same channel + chno = [bn.split(" ")[1] for bn in basenames] + assert ( + len(set(chno)) == 1 + ), "Folder contains measurements from multiple channels" + + # Make sure that the list of files contains any files + assert len(filepathlist) >= 1, ( + "No measurement files found in provided " "list/directory" ) - @dask.delayed - def grab_data_per_file(file_handle): - """ + ddf_version = sensornet_ddf_version_check(filepathlist) - Parameters - ---------- - file_handle - - Returns - ------- - - """ - - with open_file(file_handle, mode="r") as f_h: - if skip_chars: - f_h.read(3) - eltree = ElementTree.parse(f_h) - arr_el = eltree.findall(arr_path, namespaces=ns) - - if not len(arr_el) == nx: - raise ValueError( - "Inconsistent length of x-dimension" - + "\nCheck if files are mixed up, or if the number of " - + "data points vary per file." - ) - - # remove the breaks on both sides of the string - # split the string on the comma - arr_str = [arr_eli.text.split(",") for arr_eli in arr_el] - return np.array(arr_str, dtype=float) - - data_lst_dly = [grab_data_per_file(fp) for fp in filepathlist] - - data_lst = [ - da.from_delayed(x, shape=(nx, nitem), dtype=float) for x in data_lst_dly + valid_versions = [ + "Halo DTS v1*", + "ORYX F/W v1.02 Oryx Data Collector v3*", + "ORYX F/W v4.00 Oryx Data Collector v3*", + "Sentinel DTS v5*", ] - data_arr = da.stack(data_lst).T # .compute() - - # Check whether to compute data_arr (if possible 25% faster) - data_arr_cnk = data_arr.rechunk({0: -1, 1: -1, 2: "auto"}) - if load_in_memory == "auto" and data_arr_cnk.npartitions <= 5: - if not silent: - print("Reading the data from disk") - data_arr = data_arr_cnk.compute() - elif load_in_memory: - if not silent: - print("Reading the data from disk") - data_arr = data_arr_cnk.compute() - else: - if not silent: - print("Not reading the data from disk") - data_arr = data_arr_cnk - - data_vars = {} - for name, data_arri in zip(data_item_names, data_arr): - if name == "LAF": - continue - - if tld[name] in dim_attrs_apsensing: - data_vars[tld[name]] = ( - ["x", "time"], - data_arri, - dim_attrs_apsensing[tld[name]], - ) - elif name in dim_attrs_apsensing: - data_vars[tld[name]] = (["x", "time"], data_arri, dim_attrs_apsensing[name]) - else: - raise ValueError( - "Dont know what to do with the" + " {} data column".format(name) - ) - - _time_dtype = [("filename_tstamp", np.int64), ("acquisitionTime", " 0.0, ( + "`fiber_length` is not defined. Use key" + "word argument in read function." + str(fiber_length) + ) + + fiber_start_index = (np.abs(xraw + add_internal_fiber_length)).argmin() + fiber_0_index = np.abs(xraw).argmin() + fiber_1_index = (np.abs(xraw - fiber_length)).argmin() + fiber_n_indices = fiber_1_index - fiber_0_index + fiber_n_indices_internal = fiber_0_index - fiber_start_index + if double_ended_flag: + fiber_end_index = np.min([xraw.size, fiber_1_index + fiber_n_indices_internal]) + else: + fiber_end_index = fiber_1_index + + if double_ended_flag: + if not flip_reverse_measurements: + # fiber length how the backward channel is aligned + fiber_length_raw = float(meta["fibre end"]) + fiber_bw_1_index = np.abs(xraw - fiber_length_raw).argmin() + fiber_bw_end_index = np.min( + [xraw.size, fiber_bw_1_index + (fiber_end_index - fiber_1_index)] + ) + fiber_bw_start_index = np.max( + [0, fiber_bw_1_index - fiber_n_indices - fiber_n_indices_internal] + ) + + REV_ST = REV_ST[fiber_bw_start_index:fiber_bw_end_index] + REV_AST = REV_AST[fiber_bw_start_index:fiber_bw_end_index] + + else: + # Use the fiber indices from the forward channel + n_indices_internal_left = fiber_0_index - fiber_start_index + n_indices_internal_right = np.max([0, fiber_end_index - fiber_1_index]) + n_indices_internal_shortest = np.min( + [n_indices_internal_left, n_indices_internal_right] + ) + fiber_start_index = fiber_0_index - n_indices_internal_shortest + fiber_end_index = ( + fiber_0_index + fiber_n_indices + n_indices_internal_shortest + ) + REV_ST = REV_ST[fiber_end_index:fiber_start_index:-1] + REV_AST = REV_AST[fiber_end_index:fiber_start_index:-1] + + x = xraw[fiber_start_index:fiber_end_index] + TMP = TMP[fiber_start_index:fiber_end_index] + ST = ST[fiber_start_index:fiber_end_index] + AST = AST[fiber_start_index:fiber_end_index] + + data_vars = { + "st": (["x", "time"], ST, dim_attrs["st"]), + "ast": (["x", "time"], AST, dim_attrs["ast"]), + "tmp": (["x", "time"], TMP, dim_attrs["tmp"]), + "probe1Temperature": ( + "time", + probe1temperature, + { + "name": "Probe 1 temperature", + "description": "reference probe 1 " "temperature", + "units": r"$^\circ$C", + }, + ), + "probe2Temperature": ( + "time", + probe2temperature, + { + "name": "Probe 2 temperature", + "description": "reference probe 2 " "temperature", + "units": r"$^\circ$C", + }, + ), + "referenceTemperature": ( + "time", + referenceTemperature, + { + "name": "reference temperature", + "description": "Internal reference " "temperature", + "units": r"$^\circ$C", + }, + ), + "gamma_ddf": ( + "time", + gamma_ddf, + { + "name": "gamma ddf", + "description": "machine " "calibrated gamma", + "units": "-", + }, + ), + "k_internal": ( + "time", + k_internal, + { + "name": "k internal", + "description": "machine calibrated " "internal k", + "units": "-", + }, + ), + "k_external": ( + "time", + k_external, + { + "name": "reference temperature", + "description": "machine calibrated " "external k", + "units": "-", + }, + ), + "userAcquisitionTimeFW": ( + "time", + acquisitiontimeFW, + dim_attrs["userAcquisitionTimeFW"], + ), + "userAcquisitionTimeBW": ( + "time", + acquisitiontimeBW, + dim_attrs["userAcquisitionTimeBW"], + ), + } + + if double_ended_flag: + data_vars["rst"] = (["x", "time"], REV_ST, dim_attrs["rst"]) + data_vars["rast"] = (["x", "time"], REV_AST, dim_attrs["rast"]) + + filenamelist = [os.path.split(f)[-1] for f in filepathlist] + + coords = {"x": ("x", x, dim_attrs["x"]), "filename": ("time", filenamelist)} + + dtFW = data_vars["userAcquisitionTimeFW"][1].astype("timedelta64[s]") + dtBW = data_vars["userAcquisitionTimeBW"][1].astype("timedelta64[s]") + if not double_ended_flag: + tcoords = coords_time( + np.array(timestamp).astype("datetime64[ns]"), + timezone_netcdf=timezone_netcdf, + timezone_input_files=timezone_input_files, + dtFW=dtFW, + double_ended_flag=double_ended_flag, + ) + else: + tcoords = coords_time( + np.array(timestamp).astype("datetime64[ns]"), + timezone_netcdf=timezone_netcdf, + timezone_input_files=timezone_input_files, + dtFW=dtFW, + dtBW=dtBW, + double_ended_flag=double_ended_flag, + ) + + coords.update(tcoords) + + return data_vars, coords, attrs + + +def read_silixa_attrs_singlefile(filename, sep): + """ + + Parameters + ---------- + filename + sep + + Returns + ------- + + """ + import xmltodict + + def metakey(meta, dict_to_parse, prefix): + """ + Fills the metadata dictionairy with data from dict_to_parse. + The dict_to_parse is the raw data from a silixa xml-file. + dict_to_parse is a nested dictionary to represent the + different levels of hierarchy. For example, + toplevel = {lowlevel: {key: value}}. + This function returns {'toplevel:lowlevel:key': value}. + Where prefix is the flattened hierarchy. + + Parameters + ---------- + meta : dict + the output dictionairy with prcessed metadata + dict_to_parse : dict + prefix + + Returns + ------- + + """ + + for key in dict_to_parse: + if prefix == "": + prefix_parse = key.replace("@", "") + else: + prefix_parse = sep.join([prefix, key.replace("@", "")]) + + if prefix_parse == sep.join(("logData", "data")): + # skip the LAF , ST data + continue + + if hasattr(dict_to_parse[key], "keys"): + # Nested dictionaries, flatten hierarchy. + meta.update(metakey(meta, dict_to_parse[key], prefix_parse)) + + elif isinstance(dict_to_parse[key], list): + # if the key has values for the multiple channels + for ival, val in enumerate(dict_to_parse[key]): + num_key = prefix_parse + "_" + str(ival) + meta.update(metakey(meta, val, num_key)) + else: + meta[prefix_parse] = dict_to_parse[key] + + return meta + + with open_file(filename) as fh: + doc_ = xmltodict.parse(fh.read()) + + if "wellLogs" in doc_.keys(): + doc = doc_["wellLogs"]["wellLog"] + else: + doc = doc_["logs"]["log"] + + return metakey({}, doc, "") + + +def read_sensortran_files_routine( + filepathlist_dts, + filepathlist_temp, + timezone_input_files="UTC", + timezone_netcdf="UTC", + silent=False, +): + """ + Internal routine that reads sensortran files. + Use dtscalibration.read_sensortran_files function instead. + + The sensortran files are in UTC time + + Parameters + ---------- + filepathlist_dts + filepathlist_temp + timezone_netcdf + silent + + Returns + ------- + + """ + assert timezone_input_files == "UTC", "The sensortran files are always in UTC time." + + # Obtain metadata from the first file + data_dts, meta_dts = read_sensortran_single(filepathlist_dts[0]) + data_temp, meta_temp = read_sensortran_single(filepathlist_temp[0]) + + attrs = meta_dts + + # Add standardised required attributes + attrs["isDoubleEnded"] = "0" + + attrs["forwardMeasurementChannel"] = meta_dts["channel_id"] - 1 + attrs["backwardMeasurementChannel"] = "N/A" + + # obtain basic data info + nx = meta_temp["num_points"] + + ntime = len(filepathlist_dts) + + # print summary + if not silent: + print("%s files were found," % ntime + " each representing a single timestep") + print("Recorded at %s points along the cable" % nx) + + print("The measurement is single ended") + + # Gather data + # x has already been read. should not change over time + x = data_temp["x"] + + # Define all variables + referenceTemperature = np.zeros(ntime) + acquisitiontimeFW = np.ones(ntime) + + timestamp = [""] * ntime + ST = np.zeros((nx, ntime), dtype=np.int32) + AST = np.zeros((nx, ntime), dtype=np.int32) + TMP = np.zeros((nx, ntime)) + + ST_zero = np.zeros((ntime)) + AST_zero = np.zeros((ntime)) + + for ii in range(ntime): + data_dts, meta_dts = read_sensortran_single(filepathlist_dts[ii]) + data_temp, meta_temp = read_sensortran_single(filepathlist_temp[ii]) + + timestamp[ii] = data_dts["time"] + + referenceTemperature[ii] = data_temp["reference_temperature"] - 273.15 + + ST[:, ii] = data_dts["st"][:nx] + AST[:, ii] = data_dts["ast"][:nx] + # The TMP can vary by 1 or 2 datapoints, dynamically assign the values + TMP[: meta_temp["num_points"], ii] = data_temp["tmp"][:nx] + + zero_index = (meta_dts["num_points"] - nx) // 2 + ST_zero[ii] = np.mean(data_dts["st"][nx + zero_index :]) + AST_zero[ii] = np.mean(data_dts["ast"][nx + zero_index :]) + + data_vars = { + "st": (["x", "time"], ST, dim_attrs["st"]), + "ast": (["x", "time"], AST, dim_attrs["ast"]), + "tmp": ( + ["x", "time"], + TMP, + { + "name": "tmp", + "description": "Temperature calibrated by device", + "units": meta_temp["y_units"], + }, + ), + "referenceTemperature": ( + "time", + referenceTemperature, + { + "name": "reference temperature", + "description": "Internal reference " "temperature", + "units": r"$^\circ$C", + }, + ), + "st_zero": ( + ["time"], + ST_zero, + { + "name": "ST_zero", + "description": "Stokes zero count", + "units": meta_dts["y_units"], + }, + ), + "ast_zero": ( + ["time"], + AST_zero, + { + "name": "AST_zero", + "description": "anit-Stokes zero count", + "units": meta_dts["y_units"], + }, + ), + "userAcquisitionTimeFW": ( + "time", + acquisitiontimeFW, + dim_attrs["userAcquisitionTimeFW"], + ), + } + + filenamelist_dts = [os.path.split(f)[-1] for f in filepathlist_dts] + filenamelist_temp = [os.path.split(f)[-1] for f in filepathlist_temp] + + coords = { + "x": ( + "x", + x, + { + "name": "distance", + "description": "Length along fiber", + "long_description": "Starting at connector " + "of forward channel", + "units": "m", + }, + ), + "filename": ("time", filenamelist_dts), + "filename_temp": ("time", filenamelist_temp), + } + + dtFW = data_vars["userAcquisitionTimeFW"][1].astype("timedelta64[s]") + + tcoords = coords_time( + np.array(timestamp).astype("datetime64[ns]"), + timezone_netcdf=timezone_netcdf, + timezone_input_files="UTC", + dtFW=dtFW, + double_ended_flag=False, + ) + + coords.update(tcoords) + + return data_vars, coords, attrs + + +def read_sensortran_single(fname): + """ + Internal routine that reads a single sensortran file. + Use dtscalibration.read_sensortran_files function instead. + + Parameters + ---------- + fname + + Returns + ------- + data, metadata + """ + import struct + from datetime import datetime + + meta = {} + data = {} + with open(fname, "rb") as f: + meta["survey_type"] = struct.unpack(" Date: Fri, 4 Aug 2023 21:04:29 +0200 Subject: [PATCH 17/26] Pull code through linter --- src/dtscalibration/datastore.py | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/src/dtscalibration/datastore.py b/src/dtscalibration/datastore.py index a1e335ba..3357bc36 100644 --- a/src/dtscalibration/datastore.py +++ b/src/dtscalibration/datastore.py @@ -1,6 +1,3 @@ -import fnmatch -import glob -import inspect import os import warnings from typing import Dict @@ -25,16 +22,6 @@ from dtscalibration.calibrate_utils import wls_stats from dtscalibration.datastore_utils import check_timestep_allclose from dtscalibration.io_utils import _dim_attrs -from dtscalibration.io_utils import apsensing_xml_version_check -from dtscalibration.io_utils import read_apsensing_files_routine -from dtscalibration.io_utils import read_sensornet_files_routine_v3 -from dtscalibration.io_utils import read_sensortran_files_routine -from dtscalibration.io_utils import read_silixa_files_routine_v4 -from dtscalibration.io_utils import read_silixa_files_routine_v6 -from dtscalibration.io_utils import sensornet_ddf_version_check -from dtscalibration.io_utils import sensortran_binary_version_check -from dtscalibration.io_utils import silixa_xml_version_check -from dtscalibration.io_utils import ziphandle_to_filepathlist dtsattr_namelist = ["double_ended_flag"] dim_attrs = {k: v for kl, v in _dim_attrs.items() for k in kl} @@ -5768,5 +5755,3 @@ def get_taf_values(self, pval, x, trans_att, axis=""): arr_out[x >= taxi] += np.diag(tai)[None] return arr_out - - From 10f36c33e21a866f0416d835890bcdd94aee0f93 Mon Sep 17 00:00:00 2001 From: Bas des Tombe Date: Sun, 6 Aug 2023 17:16:42 +0200 Subject: [PATCH 18/26] Refactored ufunc_per_section and moved ParameterIndex_Ended to datastore_utils --- src/dtscalibration/datastore.py | 490 ++------------------- src/dtscalibration/datastore_utils.py | 601 ++++++++++++++++++++++++++ 2 files changed, 629 insertions(+), 462 deletions(-) diff --git a/src/dtscalibration/datastore.py b/src/dtscalibration/datastore.py index 3357bc36..320e0998 100644 --- a/src/dtscalibration/datastore.py +++ b/src/dtscalibration/datastore.py @@ -21,6 +21,9 @@ from dtscalibration.calibrate_utils import wls_sparse from dtscalibration.calibrate_utils import wls_stats from dtscalibration.datastore_utils import check_timestep_allclose +from dtscalibration.datastore_utils import ParameterIndexDoubleEnded +from dtscalibration.datastore_utils import ParameterIndexSingleEnded +from dtscalibration.datastore_utils import ufunc_per_section_helper from dtscalibration.io_utils import _dim_attrs dtsattr_namelist = ["double_ended_flag"] @@ -970,6 +973,8 @@ def variance_stokes_constant(self, st_label, sections=None, reshape_residuals=Tr 04Calculate_variance_Stokes.ipynb>`_ """ + # var_I, resid = variance_stokes_constant_util(st_label, sections=None, reshape_residuals=True) + # def variance_stokes_constant_util(st_label, sections=None, reshape_residuals=True): def func_fit(p, xs): return p[:xs, None] * p[None, xs:] @@ -1900,7 +1905,7 @@ def calibration_single_ended( self.check_deprecated_kwargs(kwargs) self.set_trans_att(trans_att=trans_att, **kwargs) - if sections: + if sections is not None: self.sections = sections if method == "wls": @@ -2454,7 +2459,7 @@ def calibration_double_ended( self.set_trans_att(trans_att=trans_att, **kwargs) - if sections: + if sections is not None: self.sections = sections self.check_reference_section_values() @@ -5288,470 +5293,31 @@ def ufunc_per_section( if sections is None: sections = self.sections - if not func: - - def func(a): - """ - - Parameters - ---------- - a - - Returns - ------- - - """ - return a - - elif isinstance(func, str) and func == "var": - - def func(a): - """ - - Parameters - ---------- - a - - Returns - ------- - - """ - return np.var(a, ddof=1) - + if label is None: + dataarray = None else: - assert callable(func) + dataarray = self[label] - assert calc_per in ["all", "section", "stretch"] - - if not x_indices and ( - (label and hasattr(self[label].data, "chunks")) - or ( - subtract_from_label - and hasattr(self[subtract_from_label].data, "chunks") - ) - ): - concat = da.concatenate + if x_indices: + x_coords = self.x + reference_dataset = None else: - concat = np.concatenate - - out = dict() - - for k, section in sections.items(): - out[k] = [] - for stretch in section: - if x_indices: - assert not subtract_from_label - assert not temp_err - assert not ref_temp_broadcasted - # so it is slicable with x-indices - self["_x_indices"] = self.x.astype(int) * 0 + np.arange(self.x.size) - arg1 = self["_x_indices"].sel(x=stretch).data - del self["_x_indices"] - - else: - arg1 = self[label].sel(x=stretch).data - - if subtract_from_label: - # calculate std wrt other series - # check_dims(self, [subtract_from_label], - # correct_dims=('x', 'time')) - - assert not temp_err - - arg2 = self[subtract_from_label].sel(x=stretch).data - out[k].append(arg1 - arg2) - - elif temp_err: - # calculate std wrt reference temperature of the - # corresponding bath - arg2 = self[k].data - out[k].append(arg1 - arg2) - - elif ref_temp_broadcasted: - assert not temp_err - assert not subtract_from_label - - arg2 = da.broadcast_to(self[k].data, arg1.shape) - out[k].append(arg2) - - else: - # calculate std wrt mean value - out[k].append(arg1) - - if calc_per == "stretch": - out[k] = [func(argi, **func_kwargs) for argi in out[k]] - - elif calc_per == "section": - # flatten the out_dict to sort them - start = [i.start for i in section] - i_sorted = np.argsort(start) - out_flat_sort = [out[k][i] for i in i_sorted] - out[k] = func(concat(out_flat_sort), **func_kwargs) - - if calc_per == "all": - # flatten the out_dict to sort them - start = [item.start for sublist in sections.values() for item in sublist] - i_sorted = np.argsort(start) - out_flat = [item for sublist in out.values() for item in sublist] - out_flat_sort = [out_flat[i] for i in i_sorted] - out = func(concat(out_flat_sort, axis=0), **func_kwargs) - - if ( - hasattr(out, "chunks") - and len(out.chunks) > 0 - and "x" in self[label].dims - ): - # also sum the chunksize in the x dimension - # first find out where the x dim is - ixdim = self[label].dims.index("x") - c_old = out.chunks - c_new = list(c_old) - c_new[ixdim] = sum(c_old[ixdim]) - out = out.rechunk(c_new) - + x_coords = None + reference_dataset = {k: self[k] for k in sections} + + out = ufunc_per_section_helper( + x_coords=x_coords, + sections=sections, + func=func, + dataarray=dataarray, + subtract_from_dataarray=subtract_from_label, + reference_dataset=reference_dataset, + subtract_reference_from_dataarray=temp_err, + ref_temp_broadcasted=ref_temp_broadcasted, + calc_per=calc_per, + **func_kwargs, + ) return out def resample_datastore(*args, **kwargs): raise "ds.resample_datastore() is deprecated. Use from dtscalibration import DataStore; " "DataStore(ds.resample()) instead. See example notebook 2." - - -class ParameterIndexDoubleEnded: - """ - npar = 1 + 2 * nt + nx + 2 * nt * nta - assert pval.size == npar - assert p_var.size == npar - if calc_cov: - assert p_cov.shape == (npar, npar) - gamma = pval[0] - d_fw = pval[1:nt + 1] - d_bw = pval[1 + nt:1 + 2 * nt] - alpha = pval[1 + 2 * nt:1 + 2 * nt + nx] - # store calibration parameters in DataStore - self["gamma"] = (tuple(), gamma) - self["alpha"] = (('x',), alpha) - self["df"] = (('time',), d_fw) - self["db"] = (('time',), d_bw) - if nta > 0: - ta = pval[1 + 2 * nt + nx:].reshape((nt, 2, nta), order='F') - self['talpha_fw'] = (('time', 'trans_att'), ta[:, 0, :]) - self['talpha_bw'] = (('time', 'trans_att'), ta[:, 1, :]) - """ - - def __init__(self, nt, nx, nta, fix_gamma=False, fix_alpha=False): - self.nt = nt - self.nx = nx - self.nta = nta - self.fix_gamma = fix_gamma - self.fix_alpha = fix_alpha - - @property - def all(self): - return np.concatenate( - (self.gamma, self.df, self.db, self.alpha, self.ta.flatten(order="F")) - ) - - @property - def npar(self): - if not self.fix_gamma and not self.fix_alpha: - return 1 + 2 * self.nt + self.nx + 2 * self.nt * self.nta - elif self.fix_gamma and not self.fix_alpha: - return 2 * self.nt + self.nx + 2 * self.nt * self.nta - elif not self.fix_gamma and self.fix_alpha: - return 1 + 2 * self.nt + 2 * self.nt * self.nta - elif self.fix_gamma and self.fix_alpha: - return 2 * self.nt + 2 * self.nt * self.nta - - @property - def gamma(self): - if self.fix_gamma: - return [] - else: - return [0] - - @property - def df(self): - if self.fix_gamma: - return list(range(self.nt)) - else: - return list(range(1, self.nt + 1)) - - @property - def db(self): - if self.fix_gamma: - return list(range(self.nt, 2 * self.nt)) - else: - return list(range(1 + self.nt, 1 + 2 * self.nt)) - - @property - def alpha(self): - if self.fix_alpha: - return [] - elif self.fix_gamma: - return list(range(2 * self.nt, 1 + 2 * self.nt + self.nx)) - elif not self.fix_gamma: - return list(range(1 + 2 * self.nt, 1 + 2 * self.nt + self.nx)) - - @property - def ta(self): - if self.nta == 0: - return np.zeros((self.nt, 2, 0)) - elif not self.fix_gamma and not self.fix_alpha: - arr = np.arange(1 + 2 * self.nt + self.nx, self.npar) - elif self.fix_gamma and not self.fix_alpha: - arr = np.arange(2 * self.nt + self.nx, self.npar) - elif not self.fix_gamma and self.fix_alpha: - arr = np.arange(1 + 2 * self.nt, self.npar) - elif self.fix_gamma and self.fix_alpha: - arr = np.arange(2 * self.nt, self.npar) - - return arr.reshape((self.nt, 2, self.nta), order="F") - - @property - def taf(self): - """ - Use `.reshape((nt, nta))` to convert array to (time-dim and transatt-dim). Order is the default C order. - ta = pval[1 + 2 * nt + nx:].reshape((nt, 2, nta), order='F') - self['talpha_fw'] = (('time', 'trans_att'), ta[:, 0, :]) - """ - return self.ta[:, 0, :].flatten(order="C") - - @property - def tab(self): - """ - Use `.reshape((nt, nta))` to convert array to (time-dim and transatt-dim). Order is the default C order. - ta = pval[1 + 2 * nt + nx:].reshape((nt, 2, nta), order='F') - self['talpha_bw'] = (('time', 'trans_att'), ta[:, 1, :]) - """ - return self.ta[:, 1, :].flatten(order="C") - - def get_ta_pars(self, pval): - if self.nta > 0: - if pval.ndim == 1: - return np.take_along_axis(pval[None, None], self.ta, axis=-1) - - else: - # assume shape is (a, npar) and returns shape (nt, 2, nta, a) - assert pval.shape[1] == self.npar and pval.ndim == 2 - return np.stack([self.get_ta_pars(v) for v in pval], axis=-1) - - else: - return np.zeros(shape=(self.nt, 2, 0)) - - def get_taf_pars(self, pval): - """returns taf parameters of shape (nt, nta) or (nt, nta, a)""" - return self.get_ta_pars(pval=pval)[:, 0, :] - - def get_tab_pars(self, pval): - """returns taf parameters of shape (nt, nta) or (nt, nta, a)""" - return self.get_ta_pars(pval=pval)[:, 1, :] - - def get_taf_values(self, pval, x, trans_att, axis=""): - """returns taf parameters of shape (nx, nt)""" - pval = np.atleast_2d(pval) - - assert pval.ndim == 2 and pval.shape[1] == self.npar - - arr_out = np.zeros((self.nx, self.nt)) - - if self.nta == 0: - pass - - elif axis == "": - assert pval.shape[0] == 1 - - arr = np.transpose(self.get_taf_pars(pval), axes=(1, 2, 0)) # (nta, 1, nt) - - for tai, taxi in zip(arr, trans_att): - arr_out[x >= taxi] += tai - - elif axis == "x": - assert pval.shape[0] == self.nx - - arr = np.transpose(self.get_taf_pars(pval), axes=(1, 2, 0)) # (nta, nx, nt) - - for tai, taxi in zip(arr, trans_att): - # loop over nta, tai has shape (x, t) - arr_out[x >= taxi] += tai[x >= taxi] - - elif axis == "time": - assert pval.shape[0] == self.nt - - # arr (nt, nta, nt) to have shape (nta, nt, nt) - arr = np.transpose(self.get_taf_pars(pval), axes=(1, 2, 0)) - - for tai, taxi in zip(arr, trans_att): - # loop over nta, tai has shape (t, t) - arr_out[x >= taxi] += np.diag(tai)[None] - - return arr_out - - def get_tab_values(self, pval, x, trans_att, axis=""): - """returns tab parameters of shape (nx, nt)""" - assert pval.shape[-1] == self.npar - - arr_out = np.zeros((self.nx, self.nt)) - - if self.nta == 0: - pass - - elif axis == "": - pval = np.squeeze(pval) - assert pval.ndim == 1 - arr = np.transpose(self.get_tab_pars(pval), axes=(1, 0)) - - for tai, taxi in zip(arr, trans_att): - arr_out[x < taxi] += tai - - elif axis == "x": - assert pval.ndim == 2 and pval.shape[0] == self.nx - - arr = np.transpose(self.get_tab_pars(pval), axes=(1, 2, 0)) - - for tai, taxi in zip(arr, trans_att): - # loop over nta, tai has shape (x, t) - arr_out[x < taxi] += tai[x < taxi] - - elif axis == "time": - assert pval.ndim == 2 and pval.shape[0] == self.nt - - # arr (nt, nta, nt) to have shape (nta, nt, nt) - arr = np.transpose(self.get_tab_pars(pval), axes=(1, 2, 0)) - - for tai, taxi in zip(arr, trans_att): - # loop over nta, tai has shape (t, t) - arr_out[x < taxi] += np.diag(tai) - - return arr_out - - -class ParameterIndexSingleEnded: - """ - if parameter fixed, they are not in - npar = 1 + 1 + nt + nta * nt - """ - - def __init__(self, nt, nx, nta, includes_alpha=False, includes_dalpha=True): - assert not ( - includes_alpha and includes_dalpha - ), "Cannot hold both dalpha and alpha" - self.nt = nt - self.nx = nx - self.nta = nta - self.includes_alpha = includes_alpha - self.includes_dalpha = includes_dalpha - - @property - def all(self): - return np.concatenate( - (self.gamma, self.dalpha, self.alpha, self.c, self.ta.flatten(order="F")) - ) - - @property - def npar(self): - if self.includes_alpha: - return 1 + self.nx + self.nt + self.nta * self.nt - elif self.includes_dalpha: - return 1 + 1 + self.nt + self.nta * self.nt - else: - return 1 + self.nt + self.nta * self.nt - - @property - def gamma(self): - return [0] - - @property - def dalpha(self): - if self.includes_dalpha: - return [1] - else: - return [] - - @property - def alpha(self): - if self.includes_alpha: - return list(range(1, 1 + self.nx)) - else: - return [] - - @property - def c(self): - if self.includes_alpha: - return list(range(1 + self.nx, 1 + self.nx + self.nt)) - elif self.includes_dalpha: - return list(range(1 + 1, 1 + 1 + self.nt)) - else: - return list(range(1, 1 + self.nt)) - - @property - def taf(self): - """returns taf parameters of shape (nt, nta) or (nt, nta, a)""" - # ta = p_val[-nt * nta:].reshape((nt, nta), order='F') - # self["talpha"] = (('time', 'trans_att'), ta[:, :]) - if self.includes_alpha: - return np.arange( - 1 + self.nx + self.nt, 1 + self.nx + self.nt + self.nt * self.nta - ).reshape((self.nt, self.nta), order="F") - elif self.includes_dalpha: - return np.arange( - 1 + 1 + self.nt, 1 + 1 + self.nt + self.nt * self.nta - ).reshape((self.nt, self.nta), order="F") - else: - return np.arange(1 + self.nt, 1 + self.nt + self.nt * self.nta).reshape( - (self.nt, self.nta), order="F" - ) - - def get_taf_pars(self, pval): - if self.nta > 0: - if pval.ndim == 1: - # returns shape (nta, nt) - assert len(pval) == self.npar, "Length of pval is incorrect" - return np.stack([pval[tafi] for tafi in self.taf.T]) - - else: - # assume shape is (a, npar) and returns shape (nta, nt, a) - assert pval.shape[1] == self.npar and pval.ndim == 2 - return np.stack([self.get_taf_pars(v) for v in pval], axis=-1) - - else: - return np.zeros(shape=(self.nt, 0)) - - def get_taf_values(self, pval, x, trans_att, axis=""): - """returns taf parameters of shape (nx, nt)""" - # assert pval.ndim == 2 and pval.shape[1] == self.npar - - arr_out = np.zeros((self.nx, self.nt)) - - if self.nta == 0: - pass - - elif axis == "": - pval = pval.flatten() - assert pval.shape == (self.npar,) - arr = self.get_taf_pars(pval) - assert arr.shape == ( - self.nta, - self.nt, - ) - - for tai, taxi in zip(arr, trans_att): - arr_out[x >= taxi] += tai - - elif axis == "x": - assert pval.shape == (self.nx, self.npar) - arr = self.get_taf_pars(pval) - assert arr.shape == (self.nta, self.nx, self.nt) - - for tai, taxi in zip(arr, trans_att): - # loop over nta, tai has shape (x, t) - arr_out[x >= taxi] += tai[x >= taxi] - - elif axis == "time": - assert pval.shape == (self.nt, self.npar) - arr = self.get_taf_pars(pval) - assert arr.shape == (self.nta, self.nt, self.nt) - - for tai, taxi in zip(arr, trans_att): - # loop over nta, tai has shape (t, t) - arr_out[x >= taxi] += np.diag(tai)[None] - - return arr_out diff --git a/src/dtscalibration/datastore_utils.py b/src/dtscalibration/datastore_utils.py index fe23f53c..5afb4fa0 100644 --- a/src/dtscalibration/datastore_utils.py +++ b/src/dtscalibration/datastore_utils.py @@ -3,6 +3,7 @@ from typing import Optional from typing import Union +import dask.array as da import matplotlib.pyplot as plt import numpy as np import numpy.typing as npt @@ -539,3 +540,603 @@ def suggest_cable_shift_double_ended( plt.tight_layout() return ishift1, ishift2 + + +def ufunc_per_section_helper( + sections=None, + func=None, + x_coords=None, + reference_dataset=None, + dataarray=None, + subtract_from_dataarray=None, + subtract_reference_from_dataarray=False, + ref_temp_broadcasted=False, + calc_per="stretch", + **func_kwargs, +): + """ + User function applied to parts of the cable. Super useful, + many options and slightly + complicated. + + The function `func` is taken over all the timesteps and calculated + per `calc_per`. This + is returned as a dictionary + + Parameters + ---------- + sections : Dict[str, List[slice]], optional + If `None` is supplied, `ds.sections` is used. Define calibration + sections. Each section requires a reference temperature time series, + such as the temperature measured by an external temperature sensor. + They should already be part of the DataStore object. `sections` + is defined with a dictionary with its keywords of the + names of the reference temperature time series. Its values are + lists of slice objects, where each slice object is a fiber stretch + that has the reference temperature. Afterwards, `sections` is stored + under `ds.sections`. + func : callable, str + A numpy function, or lambda function to apply to each 'calc_per'. + x_coords : xarray.DataArray, optional + x-coordinates, stored as ds.x. If supplied, returns the x-indices of + the reference sections. + reference_dataset : xarray.Dataset or Dict, optional + Contains the reference temperature timeseries refered to in `sections`. + Not required if `x_indices`. + dataarray : xarray.DataArray, optional + Pass your DataArray of which you want to compute the statistics. Has an + (x,) dimension or (x, time) dimensions. + subtract_from_dataarray : xarray.DataArray, optional + Pass your DataArray of which you want to subtract from `dataarray` before you + compute the statistics. Has an (x,) dimension or (x, time) dimensions. + subtract_reference_from_dataarray : bool + If True the reference temperature according to sections is subtracted from + dataarray before computing statistics + ref_temp_broadcasted : bool + Use if you want to return the reference temperature of shape of the reference + sections + calc_per : {'all', 'section', 'stretch'} + func_kwargs : dict + Dictionary with options that are passed to func + + Returns + ------- + + Examples + -------- + + 1. Calculate the variance of the residuals in the along ALL the\ + reference sections wrt the temperature of the water baths + + >>> tmpf_var = ufunc_per_section_helper( + >>> func='var', + >>> calc_per='all', + >>> dataarray=d['tmpf'], + >>> subtract_reference_from_dataarray=True) + + 2. Calculate the variance of the residuals in the along PER\ + reference section wrt the temperature of the water baths + + >>> tmpf_var = ufunc_per_section_helper + >>> func='var', + >>> calc_per='stretch', + >>> dataarray=d['tmpf'], + >>> subtract_reference_from_dataarray=True) + + 3. Calculate the variance of the residuals in the along PER\ + water bath wrt the temperature of the water baths + + >>> tmpf_var = ufunc_per_section_helper( + >>> func='var', + >>> calc_per='section', + >>> dataarray=d['tmpf'], + >>> subtract_reference_from_dataarray=True) + + 4. Obtain the coordinates of the measurements per section + + >>> locs = ufunc_per_section_helper( + >>> func=None, + >>> dataarray=d.x, + >>> subtract_reference_from_dataarray=False, + >>> ref_temp_broadcasted=False, + >>> calc_per='stretch') + + 5. Number of observations per stretch + + >>> nlocs = ufunc_per_section_helper( + >>> func=len, + >>> dataarray=d.x, + >>> subtract_reference_from_dataarray=False, + >>> ref_temp_broadcasted=False, + >>> calc_per='stretch') + + 6. broadcast the temperature of the reference sections to\ + stretch/section/all dimensions. The value of the reference\ + temperature (a timeseries) is broadcasted to the shape of self[\ + label]. The dataarray is not used for anything else. + + >>> temp_ref = ufunc_per_section_helper( + >>> dataarray=d["st"], + >>> ref_temp_broadcasted=True, + >>> calc_per='all') + + 7. x-coordinate index + + >>> ix_loc = ufunc_per_section_helperx_coords=d.x) + + + Note + ---- + If `dataarray` or `subtract_from_dataarray` is a Dask array, a Dask + array is returned else a numpy array is returned + """ + if not func: + + def func(a): + """ + + Parameters + ---------- + a + + Returns + ------- + + """ + return a + + elif isinstance(func, str) and func == "var": + + def func(a): + """ + + Parameters + ---------- + a + + Returns + ------- + + """ + return np.var(a, ddof=1) + + else: + assert callable(func) + + assert calc_per in ["all", "section", "stretch"] + + if x_coords is None and ( + (dataarray is not None and hasattr(dataarray.data, "chunks")) + or (subtract_from_dataarray and hasattr(subtract_from_dataarray.data, "chunks")) + ): + concat = da.concatenate + else: + concat = np.concatenate + + out = dict() + + for k, section in sections.items(): + out[k] = [] + for stretch in section: + if x_coords is not None: + # get indices from stretches + assert subtract_from_dataarray is None + assert not subtract_reference_from_dataarray + assert not ref_temp_broadcasted + # so it is slicable with x-indices + _x_indices = x_coords.astype(int) * 0 + np.arange(x_coords.size) + arg1 = _x_indices.sel(x=stretch).data + out[k].append(arg1) + + elif ( + subtract_from_dataarray is not None + and not subtract_reference_from_dataarray + and not ref_temp_broadcasted + ): + # calculate std wrt other series + arg1 = dataarray.sel(x=stretch).data + arg2 = subtract_from_dataarray.sel(x=stretch).data + out[k].append(arg1 - arg2) + + elif ( + subtract_from_dataarray is None + and subtract_reference_from_dataarray + and not ref_temp_broadcasted + ): + # calculate std wrt reference temperature of the corresponding bath + arg1 = dataarray.sel(x=stretch).data + arg2 = reference_dataset[k].data + out[k].append(arg1 - arg2) + + elif ( + subtract_from_dataarray is None + and not subtract_reference_from_dataarray + and ref_temp_broadcasted + ): + # Broadcast the reference temperature to the length of the stretch + arg1 = dataarray.sel(x=stretch).data + arg2 = da.broadcast_to(reference_dataset[k].data, arg1.shape) + out[k].append(arg2) + + elif ( + subtract_from_dataarray is None + and not subtract_reference_from_dataarray + and not ref_temp_broadcasted + ): + # calculate std wrt mean value + arg1 = dataarray.sel(x=stretch).data + out[k].append(arg1) + + if calc_per == "stretch": + out[k] = [func(argi, **func_kwargs) for argi in out[k]] + + elif calc_per == "section": + # flatten the out_dict to sort them + start = [i.start for i in section] + i_sorted = np.argsort(start) + out_flat_sort = [out[k][i] for i in i_sorted] + out[k] = func(concat(out_flat_sort), **func_kwargs) + + elif calc_per == "all": + pass + + if calc_per == "all": + # flatten the out_dict to sort them + start = [item.start for sublist in sections.values() for item in sublist] + i_sorted = np.argsort(start) + out_flat = [item for sublist in out.values() for item in sublist] + out_flat_sort = [out_flat[i] for i in i_sorted] + out = func(concat(out_flat_sort, axis=0), **func_kwargs) + + if hasattr(out, "chunks") and len(out.chunks) > 0 and "x" in dataarray.dims: + # also sum the chunksize in the x dimension + # first find out where the x dim is + ixdim = dataarray.dims.index("x") + c_old = out.chunks + c_new = list(c_old) + c_new[ixdim] = sum(c_old[ixdim]) + out = out.rechunk(c_new) + + return out + + +class ParameterIndexDoubleEnded: + """ + npar = 1 + 2 * nt + nx + 2 * nt * nta + assert pval.size == npar + assert p_var.size == npar + if calc_cov: + assert p_cov.shape == (npar, npar) + gamma = pval[0] + d_fw = pval[1:nt + 1] + d_bw = pval[1 + nt:1 + 2 * nt] + alpha = pval[1 + 2 * nt:1 + 2 * nt + nx] + # store calibration parameters in DataStore + self["gamma"] = (tuple(), gamma) + self["alpha"] = (('x',), alpha) + self["df"] = (('time',), d_fw) + self["db"] = (('time',), d_bw) + if nta > 0: + ta = pval[1 + 2 * nt + nx:].reshape((nt, 2, nta), order='F') + self['talpha_fw'] = (('time', 'trans_att'), ta[:, 0, :]) + self['talpha_bw'] = (('time', 'trans_att'), ta[:, 1, :]) + """ + + def __init__(self, nt, nx, nta, fix_gamma=False, fix_alpha=False): + self.nt = nt + self.nx = nx + self.nta = nta + self.fix_gamma = fix_gamma + self.fix_alpha = fix_alpha + + @property + def all(self): + return np.concatenate( + (self.gamma, self.df, self.db, self.alpha, self.ta.flatten(order="F")) + ) + + @property + def npar(self): + if not self.fix_gamma and not self.fix_alpha: + return 1 + 2 * self.nt + self.nx + 2 * self.nt * self.nta + elif self.fix_gamma and not self.fix_alpha: + return 2 * self.nt + self.nx + 2 * self.nt * self.nta + elif not self.fix_gamma and self.fix_alpha: + return 1 + 2 * self.nt + 2 * self.nt * self.nta + elif self.fix_gamma and self.fix_alpha: + return 2 * self.nt + 2 * self.nt * self.nta + + @property + def gamma(self): + if self.fix_gamma: + return [] + else: + return [0] + + @property + def df(self): + if self.fix_gamma: + return list(range(self.nt)) + else: + return list(range(1, self.nt + 1)) + + @property + def db(self): + if self.fix_gamma: + return list(range(self.nt, 2 * self.nt)) + else: + return list(range(1 + self.nt, 1 + 2 * self.nt)) + + @property + def alpha(self): + if self.fix_alpha: + return [] + elif self.fix_gamma: + return list(range(2 * self.nt, 1 + 2 * self.nt + self.nx)) + elif not self.fix_gamma: + return list(range(1 + 2 * self.nt, 1 + 2 * self.nt + self.nx)) + + @property + def ta(self): + if self.nta == 0: + return np.zeros((self.nt, 2, 0)) + elif not self.fix_gamma and not self.fix_alpha: + arr = np.arange(1 + 2 * self.nt + self.nx, self.npar) + elif self.fix_gamma and not self.fix_alpha: + arr = np.arange(2 * self.nt + self.nx, self.npar) + elif not self.fix_gamma and self.fix_alpha: + arr = np.arange(1 + 2 * self.nt, self.npar) + elif self.fix_gamma and self.fix_alpha: + arr = np.arange(2 * self.nt, self.npar) + + return arr.reshape((self.nt, 2, self.nta), order="F") + + @property + def taf(self): + """ + Use `.reshape((nt, nta))` to convert array to (time-dim and transatt-dim). Order is the default C order. + ta = pval[1 + 2 * nt + nx:].reshape((nt, 2, nta), order='F') + self['talpha_fw'] = (('time', 'trans_att'), ta[:, 0, :]) + """ + return self.ta[:, 0, :].flatten(order="C") + + @property + def tab(self): + """ + Use `.reshape((nt, nta))` to convert array to (time-dim and transatt-dim). Order is the default C order. + ta = pval[1 + 2 * nt + nx:].reshape((nt, 2, nta), order='F') + self['talpha_bw'] = (('time', 'trans_att'), ta[:, 1, :]) + """ + return self.ta[:, 1, :].flatten(order="C") + + def get_ta_pars(self, pval): + if self.nta > 0: + if pval.ndim == 1: + return np.take_along_axis(pval[None, None], self.ta, axis=-1) + + else: + # assume shape is (a, npar) and returns shape (nt, 2, nta, a) + assert pval.shape[1] == self.npar and pval.ndim == 2 + return np.stack([self.get_ta_pars(v) for v in pval], axis=-1) + + else: + return np.zeros(shape=(self.nt, 2, 0)) + + def get_taf_pars(self, pval): + """returns taf parameters of shape (nt, nta) or (nt, nta, a)""" + return self.get_ta_pars(pval=pval)[:, 0, :] + + def get_tab_pars(self, pval): + """returns taf parameters of shape (nt, nta) or (nt, nta, a)""" + return self.get_ta_pars(pval=pval)[:, 1, :] + + def get_taf_values(self, pval, x, trans_att, axis=""): + """returns taf parameters of shape (nx, nt)""" + pval = np.atleast_2d(pval) + + assert pval.ndim == 2 and pval.shape[1] == self.npar + + arr_out = np.zeros((self.nx, self.nt)) + + if self.nta == 0: + pass + + elif axis == "": + assert pval.shape[0] == 1 + + arr = np.transpose(self.get_taf_pars(pval), axes=(1, 2, 0)) # (nta, 1, nt) + + for tai, taxi in zip(arr, trans_att): + arr_out[x >= taxi] += tai + + elif axis == "x": + assert pval.shape[0] == self.nx + + arr = np.transpose(self.get_taf_pars(pval), axes=(1, 2, 0)) # (nta, nx, nt) + + for tai, taxi in zip(arr, trans_att): + # loop over nta, tai has shape (x, t) + arr_out[x >= taxi] += tai[x >= taxi] + + elif axis == "time": + assert pval.shape[0] == self.nt + + # arr (nt, nta, nt) to have shape (nta, nt, nt) + arr = np.transpose(self.get_taf_pars(pval), axes=(1, 2, 0)) + + for tai, taxi in zip(arr, trans_att): + # loop over nta, tai has shape (t, t) + arr_out[x >= taxi] += np.diag(tai)[None] + + return arr_out + + def get_tab_values(self, pval, x, trans_att, axis=""): + """returns tab parameters of shape (nx, nt)""" + assert pval.shape[-1] == self.npar + + arr_out = np.zeros((self.nx, self.nt)) + + if self.nta == 0: + pass + + elif axis == "": + pval = np.squeeze(pval) + assert pval.ndim == 1 + arr = np.transpose(self.get_tab_pars(pval), axes=(1, 0)) + + for tai, taxi in zip(arr, trans_att): + arr_out[x < taxi] += tai + + elif axis == "x": + assert pval.ndim == 2 and pval.shape[0] == self.nx + + arr = np.transpose(self.get_tab_pars(pval), axes=(1, 2, 0)) + + for tai, taxi in zip(arr, trans_att): + # loop over nta, tai has shape (x, t) + arr_out[x < taxi] += tai[x < taxi] + + elif axis == "time": + assert pval.ndim == 2 and pval.shape[0] == self.nt + + # arr (nt, nta, nt) to have shape (nta, nt, nt) + arr = np.transpose(self.get_tab_pars(pval), axes=(1, 2, 0)) + + for tai, taxi in zip(arr, trans_att): + # loop over nta, tai has shape (t, t) + arr_out[x < taxi] += np.diag(tai) + + return arr_out + + +class ParameterIndexSingleEnded: + """ + if parameter fixed, they are not in + npar = 1 + 1 + nt + nta * nt + """ + + def __init__(self, nt, nx, nta, includes_alpha=False, includes_dalpha=True): + assert not ( + includes_alpha and includes_dalpha + ), "Cannot hold both dalpha and alpha" + self.nt = nt + self.nx = nx + self.nta = nta + self.includes_alpha = includes_alpha + self.includes_dalpha = includes_dalpha + + @property + def all(self): + return np.concatenate( + (self.gamma, self.dalpha, self.alpha, self.c, self.ta.flatten(order="F")) + ) + + @property + def npar(self): + if self.includes_alpha: + return 1 + self.nx + self.nt + self.nta * self.nt + elif self.includes_dalpha: + return 1 + 1 + self.nt + self.nta * self.nt + else: + return 1 + self.nt + self.nta * self.nt + + @property + def gamma(self): + return [0] + + @property + def dalpha(self): + if self.includes_dalpha: + return [1] + else: + return [] + + @property + def alpha(self): + if self.includes_alpha: + return list(range(1, 1 + self.nx)) + else: + return [] + + @property + def c(self): + if self.includes_alpha: + return list(range(1 + self.nx, 1 + self.nx + self.nt)) + elif self.includes_dalpha: + return list(range(1 + 1, 1 + 1 + self.nt)) + else: + return list(range(1, 1 + self.nt)) + + @property + def taf(self): + """returns taf parameters of shape (nt, nta) or (nt, nta, a)""" + # ta = p_val[-nt * nta:].reshape((nt, nta), order='F') + # self["talpha"] = (('time', 'trans_att'), ta[:, :]) + if self.includes_alpha: + return np.arange( + 1 + self.nx + self.nt, 1 + self.nx + self.nt + self.nt * self.nta + ).reshape((self.nt, self.nta), order="F") + elif self.includes_dalpha: + return np.arange( + 1 + 1 + self.nt, 1 + 1 + self.nt + self.nt * self.nta + ).reshape((self.nt, self.nta), order="F") + else: + return np.arange(1 + self.nt, 1 + self.nt + self.nt * self.nta).reshape( + (self.nt, self.nta), order="F" + ) + + def get_taf_pars(self, pval): + if self.nta > 0: + if pval.ndim == 1: + # returns shape (nta, nt) + assert len(pval) == self.npar, "Length of pval is incorrect" + return np.stack([pval[tafi] for tafi in self.taf.T]) + + else: + # assume shape is (a, npar) and returns shape (nta, nt, a) + assert pval.shape[1] == self.npar and pval.ndim == 2 + return np.stack([self.get_taf_pars(v) for v in pval], axis=-1) + + else: + return np.zeros(shape=(self.nt, 0)) + + def get_taf_values(self, pval, x, trans_att, axis=""): + """returns taf parameters of shape (nx, nt)""" + # assert pval.ndim == 2 and pval.shape[1] == self.npar + + arr_out = np.zeros((self.nx, self.nt)) + + if self.nta == 0: + pass + + elif axis == "": + pval = pval.flatten() + assert pval.shape == (self.npar,) + arr = self.get_taf_pars(pval) + assert arr.shape == ( + self.nta, + self.nt, + ) + + for tai, taxi in zip(arr, trans_att): + arr_out[x >= taxi] += tai + + elif axis == "x": + assert pval.shape == (self.nx, self.npar) + arr = self.get_taf_pars(pval) + assert arr.shape == (self.nta, self.nx, self.nt) + + for tai, taxi in zip(arr, trans_att): + # loop over nta, tai has shape (x, t) + arr_out[x >= taxi] += tai[x >= taxi] + + elif axis == "time": + assert pval.shape == (self.nt, self.npar) + arr = self.get_taf_pars(pval) + assert arr.shape == (self.nta, self.nt, self.nt) + + for tai, taxi in zip(arr, trans_att): + # loop over nta, tai has shape (t, t) + arr_out[x >= taxi] += np.diag(tai)[None] + + return arr_out From e64418aa309ac8797eb2f0a28a5f22b57c79fb66 Mon Sep 17 00:00:00 2001 From: Bas des Tombe Date: Sun, 6 Aug 2023 17:37:48 +0200 Subject: [PATCH 19/26] Refactored calibration_single_ended() and calibration_double_ended() --- src/dtscalibration/calibrate_utils.py | 758 ++++++++++++++++++++++ src/dtscalibration/datastore.py | 880 ++------------------------ 2 files changed, 822 insertions(+), 816 deletions(-) diff --git a/src/dtscalibration/calibrate_utils.py b/src/dtscalibration/calibrate_utils.py index 66047af1..466d3d2a 100644 --- a/src/dtscalibration/calibrate_utils.py +++ b/src/dtscalibration/calibrate_utils.py @@ -46,6 +46,147 @@ def parse_st_var(ds, st_var, st_label="st"): return st_var_sec +def calibration_single_ended_helper( + self, + st_var, + ast_var, + fix_alpha, + fix_dalpha, + fix_gamma, + matching_indices, + nt, + nta, + nx, + solver, +): + """Only used in `calibration_single_ended()`""" + for input_item in [st_var, ast_var]: + assert input_item is not None, ( + "For wls define all " "variances (`st_var`, " "`ast_var`) " + ) + calc_cov = True + split = calibration_single_ended_solver( + self, + st_var, + ast_var, + calc_cov=calc_cov, + solver="external_split", + matching_indices=matching_indices, + ) + y = split["y"] + w = split["w"] + + # Stack all X's + if fix_alpha: + assert not fix_dalpha, "Use either `fix_dalpha` or `fix_alpha`" + assert fix_alpha[0].size == self.x.size, ( + "fix_alpha also needs to be defined outside the reference " "sections" + ) + assert fix_alpha[1].size == self.x.size, ( + "fix_alpha also needs to be defined outside the reference " "sections" + ) + p_val = split["p0_est_alpha"].copy() + + if np.any(matching_indices): + raise NotImplementedError( + "Configuring fix_alpha and matching sections requires extra code" + ) + + X = sp.hstack( + (split["X_gamma"], split["X_alpha"], split["X_c"], split["X_TA"]) + ).tocsr() + ip_use = list(range(1 + nx + nt + nta * nt)) + + else: + X = sp.vstack( + ( + sp.hstack( + ( + split["X_gamma"], + split["X_dalpha"], + split["X_c"], + split["X_TA"], + ) + ), + split["X_m"], + ) + ).tocsr() + p_val = split["p0_est_dalpha"].copy() + ip_use = list(range(1 + 1 + nt + nta * nt)) + p_var = np.zeros_like(p_val) + p_cov = np.zeros((p_val.size, p_val.size), dtype=float) + if fix_gamma is not None: + ip_remove = [0] + ip_use = [i for i in ip_use if i not in ip_remove] + p_val[ip_remove] = fix_gamma[0] + p_var[ip_remove] = fix_gamma[1] + + X_gamma = ( + sp.vstack((split["X_gamma"], split["X_m"].tocsr()[:, 0].tocoo())) + .toarray() + .flatten() + ) + + y -= fix_gamma[0] * X_gamma + w = 1 / (1 / w + fix_gamma[1] * X_gamma) + if fix_alpha is not None: + ip_remove = list(range(1, nx + 1)) + ip_use = [i for i in ip_use if i not in ip_remove] + p_val[ip_remove] = fix_alpha[0] + p_var[ip_remove] = fix_alpha[1] + + # X_alpha needs to be vertically extended to support matching sections + y -= split["X_alpha"].dot(fix_alpha[0]) + w = 1 / (1 / w + split["X_alpha"].dot(fix_alpha[1])) + if fix_dalpha is not None: + ip_remove = [1] + ip_use = [i for i in ip_use if i not in ip_remove] + p_val[ip_remove] = fix_dalpha[0] + p_var[ip_remove] = fix_dalpha[1] + + y -= np.hstack( + ( + fix_dalpha[0] * split["X_dalpha"].toarray().flatten(), + ( + fix_dalpha[0] + * split["X_m"].tocsr()[:, 1].tocoo().toarray().flatten() + ), + ) + ) + w = 1 / ( + 1 / w + + np.hstack( + ( + fix_dalpha[1] * split["X_dalpha"].toarray().flatten(), + ( + fix_dalpha[1] + * split["X_m"].tocsr()[:, 1].tocoo().toarray().flatten() + ), + ) + ) + ) + if solver == "sparse": + out = wls_sparse( + X[:, ip_use], + y, + w=w, + x0=p_val[ip_use], + calc_cov=calc_cov, + verbose=False, + ) + + elif solver == "stats": + out = wls_stats(X[:, ip_use], y, w=w, calc_cov=calc_cov, verbose=False) + + else: + assert 0, "Unknown solver" + p_val[ip_use] = out[0] + p_var[ip_use] = out[1] + np.fill_diagonal(p_cov, p_var) # set variance of all fixed params + p_cov[np.ix_(ip_use, ip_use)] = out[2] + return p_cov, p_val, p_var + + def calibration_single_ended_solver( # noqa: MC0001 ds, st_var=None, @@ -342,6 +483,623 @@ def calibration_single_ended_solver( # noqa: MC0001 return (p_sol, p_var, p_cov) if calc_cov else (p_sol, p_var) +def calibration_double_ended_helper( + self, + st_var, + ast_var, + rst_var, + rast_var, + fix_alpha, + fix_gamma, + nt, + nta, + nx, + nx_sec, + ix_sec, + matching_indices, + solver, + verbose, +): + if fix_alpha or fix_gamma: + split = calibration_double_ended_solver( + self, + st_var, + ast_var, + rst_var, + rast_var, + calc_cov=True, + solver="external_split", + matching_indices=matching_indices, + verbose=verbose, + ) + else: + out = calibration_double_ended_solver( + self, + st_var, + ast_var, + rst_var, + rast_var, + calc_cov=True, + solver=solver, + matching_indices=matching_indices, + verbose=verbose, + ) + + p_val, p_var, p_cov = out + # adjust split to fix parameters + """Wrapped in a function to reduce memory usage. + Constructing: + Z_gamma (nt * nx, 1). Data: positive 1/temp + Z_D (nt * nx, nt). Data: ones + E (nt * nx, nx). Data: ones + Zero_gamma (nt * nx, 1) + zero_d (nt * nx, nt) + Z_TA_fw (nt * nx, nta * 2 * nt) minus ones + Z_TA_bw (nt * nx, nta * 2 * nt) minus ones + Z_TA_E (nt * nx, nta * 2 * nt) + I_fw = 1/Tref*gamma - D_fw - E - TA_fw + I_bw = 1/Tref*gamma - D_bw + E - TA_bw + (I_bw - I_fw) / 2 = D_fw/2 - D_bw/2 + E + TA_fw/2 - TA_bw/2 Eq42 + """ + if fix_alpha and fix_gamma: + assert np.size(fix_alpha[0]) == self.x.size, "define alpha for each location" + assert ( + np.size(fix_alpha[1]) == self.x.size + ), "define var alpha for each location" + m = ( + "The integrated differential attenuation is zero at the " + "first index of the reference sections." + ) + assert np.abs(fix_alpha[0][ix_sec[0]]) < 1e-8, m + # The array with the integrated differential att is termed E + + if np.any(matching_indices): + n_E_in_cal = split["ix_from_cal_match_to_glob"].size + p0_est = np.concatenate( + ( + split["p0_est"][1 : 1 + 2 * nt], + split["p0_est"][1 + 2 * nt + n_E_in_cal :], + ) + ) + X_E1 = sp.csr_matrix(([], ([], [])), shape=(nt * nx_sec, self.x.size)) + X_E1[:, ix_sec[1:]] = split["E"] + X_E2 = X_E1[:, split["ix_from_cal_match_to_glob"]] + X_E = sp.vstack( + ( + -X_E2, + X_E2, + split["E_match_F"], + split["E_match_B"], + split["E_match_no_cal"], + ) + ) + + X_gamma = ( + sp.vstack( + ( + split["Z_gamma"], + split["Z_gamma"], + split["Zero_eq12_gamma"], + split["Zero_eq12_gamma"], + split["Zero_eq3_gamma"], + ) + ) + .toarray() + .flatten() + ) + + X = sp.vstack( + ( + sp.hstack((-split["Z_D"], split["Zero_d"], split["Z_TA_fw"])), + sp.hstack((split["Zero_d"], -split["Z_D"], split["Z_TA_bw"])), + sp.hstack((split["Zero_d_eq12"], split["Z_TA_eq1"])), + sp.hstack((split["Zero_d_eq12"], split["Z_TA_eq2"])), + sp.hstack((split["d_no_cal"], split["Z_TA_eq3"])), + ) + ) + + y = np.concatenate( + ( + split["y_F"], + split["y_B"], + split["y_eq1"], + split["y_eq2"], + split["y_eq3"], + ) + ) + y -= X_E.dot(fix_alpha[0][split["ix_from_cal_match_to_glob"]]) + y -= fix_gamma[0] * X_gamma + + # variances are added. weight is the inverse of the variance + # of the observations + w_ = np.concatenate( + ( + split["w_F"], + split["w_B"], + split["w_eq1"], + split["w_eq2"], + split["w_eq3"], + ) + ) + w = 1 / ( + 1 / w_ + + X_E.dot(fix_alpha[1][split["ix_from_cal_match_to_glob"]]) + + fix_gamma[1] * X_gamma + ) + + else: + # X_gamma + X_E = sp.vstack((-split["E"], split["E"])) + X_gamma = ( + sp.vstack((split["Z_gamma"], split["Z_gamma"])).toarray().flatten() + ) + # Use only the remaining coefficients + # Stack all X's + X = sp.vstack( + ( + sp.hstack((-split["Z_D"], split["Zero_d"], split["Z_TA_fw"])), + sp.hstack((split["Zero_d"], -split["Z_D"], split["Z_TA_bw"])), + ) + ) + + # Move the coefficients times the fixed gamma to the + # observations + y = np.concatenate((split["y_F"], split["y_B"])) + y -= X_E.dot(fix_alpha[0][ix_sec[1:]]) + y -= fix_gamma[0] * X_gamma + # variances are added. weight is the inverse of the variance + # of the observations + w_ = np.concatenate((split["w_F"], split["w_B"])) + w = 1 / ( + 1 / w_ + X_E.dot(fix_alpha[1][ix_sec[1:]]) + fix_gamma[1] * X_gamma + ) + + # [C_1, C_2, .., C_nt, TA_fw_a_1, TA_fw_a_2, TA_fw_a_nt, + # TA_bw_a_1, TA_bw_a_2, TA_bw_a_nt] Then continues with + # TA for connector b. + p0_est = np.concatenate( + ( + split["p0_est"][1 : 1 + 2 * nt], + split["p0_est"][1 + 2 * nt + nx_sec - 1 :], + ) + ) + + if solver == "sparse": + out = wls_sparse(X, y, w=w, x0=p0_est, calc_cov=True, verbose=False) + + elif solver == "stats": + out = wls_stats(X, y, w=w, calc_cov=True, verbose=False) + + # Added fixed gamma and its variance to the solution + p_val = np.concatenate( + ([fix_gamma[0]], out[0][: 2 * nt], fix_alpha[0], out[0][2 * nt :]) + ) + p_var = np.concatenate( + ([fix_gamma[1]], out[1][: 2 * nt], fix_alpha[1], out[1][2 * nt :]) + ) + + # whether it returns a copy or a view depends on what + # version of numpy you are using + p_cov = np.diag(p_var).copy() + from_i = np.concatenate( + ( + np.arange(1, 2 * nt + 1), + np.arange(1 + 2 * nt + nx, 1 + 2 * nt + nx + nta * nt * 2), + ) + ) + iox_sec1, iox_sec2 = np.meshgrid(from_i, from_i, indexing="ij") + p_cov[iox_sec1, iox_sec2] = out[2] + + elif fix_gamma: + if np.any(matching_indices): + # n_E_in_cal = split['ix_from_cal_match_to_glob'].size + p0_est = split["p0_est"][1:] + X_E1 = sp.csr_matrix(([], ([], [])), shape=(nt * nx_sec, self.x.size)) + from_i = ix_sec[1:] + X_E1[:, from_i] = split["E"] + X_E2 = X_E1[:, split["ix_from_cal_match_to_glob"]] + X = sp.vstack( + ( + sp.hstack( + ( + -split["Z_D"], + split["Zero_d"], + -X_E2, + split["Z_TA_fw"], + ) + ), + sp.hstack((split["Zero_d"], -split["Z_D"], X_E2, split["Z_TA_bw"])), + sp.hstack( + ( + split["Zero_d_eq12"], + split["E_match_F"], + split["Z_TA_eq1"], + ) + ), + sp.hstack( + ( + split["Zero_d_eq12"], + split["E_match_B"], + split["Z_TA_eq2"], + ) + ), + sp.hstack( + ( + split["d_no_cal"], + split["E_match_no_cal"], + split["Z_TA_eq3"], + ) + ), + ) + ) + X_gamma = ( + sp.vstack( + ( + split["Z_gamma"], + split["Z_gamma"], + split["Zero_eq12_gamma"], + split["Zero_eq12_gamma"], + split["Zero_eq3_gamma"], + ) + ) + .toarray() + .flatten() + ) + + y = np.concatenate( + ( + split["y_F"], + split["y_B"], + split["y_eq1"], + split["y_eq2"], + split["y_eq3"], + ) + ) + y -= fix_gamma[0] * X_gamma + + # variances are added. weight is the inverse of the variance + # of the observations + w_ = np.concatenate( + ( + split["w_F"], + split["w_B"], + split["w_eq1"], + split["w_eq2"], + split["w_eq3"], + ) + ) + w = 1 / (1 / w_ + fix_gamma[1] * X_gamma) + + else: + X_gamma = ( + sp.vstack((split["Z_gamma"], split["Z_gamma"])).toarray().flatten() + ) + # Use only the remaining coefficients + X = sp.vstack( + ( + sp.hstack( + ( + -split["Z_D"], + split["Zero_d"], + -split["E"], + split["Z_TA_fw"], + ) + ), + sp.hstack( + ( + split["Zero_d"], + -split["Z_D"], + split["E"], + split["Z_TA_bw"], + ) + ), + ) + ) + # Move the coefficients times the fixed gamma to the + # observations + y = np.concatenate((split["y_F"], split["y_B"])) + y -= fix_gamma[0] * X_gamma + # variances are added. weight is the inverse of the variance + # of the observations + w_ = np.concatenate((split["w_F"], split["w_B"])) + w = 1 / (1 / w_ + fix_gamma[1] * X_gamma) + + p0_est = split["p0_est"][1:] + + if solver == "sparse": + out = wls_sparse(X, y, w=w, x0=p0_est, calc_cov=True, verbose=False) + + elif solver == "stats": + out = wls_stats(X, y, w=w, calc_cov=True, verbose=False) + + # put E outside of reference section in solution + # concatenating makes a copy of the data instead of using a + # pointer + ds_sub = self[["st", "ast", "rst", "rast", "trans_att"]] + ds_sub["df"] = (("time",), out[0][:nt]) + ds_sub["df_var"] = (("time",), out[1][:nt]) + ds_sub["db"] = (("time",), out[0][nt : 2 * nt]) + ds_sub["db_var"] = (("time",), out[1][nt : 2 * nt]) + + if nta > 0: + if np.any(matching_indices): + n_E_in_cal = split["ix_from_cal_match_to_glob"].size + ta = out[0][2 * nt + n_E_in_cal :].reshape((nt, 2, nta), order="F") + ta_var = out[1][2 * nt + n_E_in_cal :].reshape((nt, 2, nta), order="F") + + else: + ta = out[0][2 * nt + nx_sec - 1 :].reshape((nt, 2, nta), order="F") + ta_var = out[1][2 * nt + nx_sec - 1 :].reshape((nt, 2, nta), order="F") + + talpha_fw = ta[:, 0, :] + talpha_bw = ta[:, 1, :] + talpha_fw_var = ta_var[:, 0, :] + talpha_bw_var = ta_var[:, 1, :] + else: + talpha_fw = None + talpha_bw = None + talpha_fw_var = None + talpha_bw_var = None + + E_all_exact, E_all_var_exact = calc_alpha_double( + mode="exact", + ds=ds_sub, + st_var=st_var, + ast_var=ast_var, + rst_var=rst_var, + rast_var=rast_var, + ix_alpha_is_zero=ix_sec[0], + talpha_fw=talpha_fw, + talpha_bw=talpha_bw, + talpha_fw_var=talpha_fw_var, + talpha_bw_var=talpha_bw_var, + ) + + if not np.any(matching_indices): + # Added fixed gamma and its variance to the solution. And + # expand to include locations outside reference sections. + p_val = np.concatenate( + ( + [fix_gamma[0]], + out[0][: 2 * nt], + E_all_exact, + out[0][2 * nt + nx_sec - 1 :], + ) + ) + p_val[1 + 2 * nt + ix_sec[1:]] = out[0][2 * nt : 2 * nt + nx_sec - 1] + p_val[1 + 2 * nt + ix_sec[0]] = 0.0 + p_var = np.concatenate( + ( + [fix_gamma[1]], + out[1][: 2 * nt], + E_all_var_exact, + out[1][2 * nt + nx_sec - 1 :], + ) + ) + p_var[1 + 2 * nt + ix_sec[1:]] = out[1][2 * nt : 2 * nt + nx_sec - 1] + else: + n_E_in_cal = split["ix_from_cal_match_to_glob"].size + + # Added fixed gamma and its variance to the solution. And + # expand to include locations outside reference sections. + p_val = np.concatenate( + ( + [fix_gamma[0]], + out[0][: 2 * nt], + E_all_exact, + out[0][2 * nt + n_E_in_cal :], + ) + ) + p_val[1 + 2 * nt + split["ix_from_cal_match_to_glob"]] = out[0][ + 2 * nt : 2 * nt + n_E_in_cal + ] + p_val[1 + 2 * nt + ix_sec[0]] = 0.0 + p_var = np.concatenate( + ( + [fix_gamma[1]], + out[1][: 2 * nt], + E_all_var_exact, + out[1][2 * nt + n_E_in_cal :], + ) + ) + p_var[1 + 2 * nt + split["ix_from_cal_match_to_glob"]] = out[1][ + 2 * nt : 2 * nt + n_E_in_cal + ] + + p_cov = np.diag(p_var).copy() + + if not np.any(matching_indices): + from_i = np.concatenate( + ( + np.arange(1, 2 * nt + 1), + 2 * nt + 1 + ix_sec[1:], + np.arange(1 + 2 * nt + nx, 1 + 2 * nt + nx + nta * nt * 2), + ) + ) + else: + from_i = np.concatenate( + ( + np.arange(1, 2 * nt + 1), + 2 * nt + 1 + split["ix_from_cal_match_to_glob"], + np.arange(1 + 2 * nt + nx, 1 + 2 * nt + nx + nta * nt * 2), + ) + ) + + iox_sec1, iox_sec2 = np.meshgrid(from_i, from_i, indexing="ij") + p_cov[iox_sec1, iox_sec2] = out[2] + + elif fix_alpha: + assert np.size(fix_alpha[0]) == self.x.size, "define alpha for each location" + assert ( + np.size(fix_alpha[1]) == self.x.size + ), "define var alpha for each location" + m = ( + "The integrated differential attenuation is zero at the " + "first index of the reference sections." + ) + assert np.abs(fix_alpha[0][ix_sec[0]]) < 1e-6, m + # The array with the integrated differential att is termed E + + if not np.any(matching_indices): + # X_gamma + X_E = sp.vstack((-split["E"], split["E"])) + # Use only the remaining coefficients + # Stack all X's + X = sp.vstack( + ( + sp.hstack( + ( + split["Z_gamma"], + -split["Z_D"], + split["Zero_d"], + split["Z_TA_fw"], + ) + ), + sp.hstack( + ( + split["Z_gamma"], + split["Zero_d"], + -split["Z_D"], + split["Z_TA_bw"], + ) + ), + ) + ) + # Move the coefficients times the fixed gamma to the + # observations + y = np.concatenate((split["y_F"], split["y_B"])) + y -= X_E.dot(fix_alpha[0][ix_sec[1:]]) + + # variances are added. weight is the inverse of the variance + # of the observations + w_ = np.concatenate((split["w_F"], split["w_B"])) + w = 1 / (1 / w_ + X_E.dot(fix_alpha[1][ix_sec[1:]])) + + p0_est = np.concatenate( + ( + split["p0_est"][: 1 + 2 * nt], + split["p0_est"][1 + 2 * nt + nx_sec - 1 :], + ) + ) + + else: + n_E_in_cal = split["ix_from_cal_match_to_glob"].size + p0_est = np.concatenate( + ( + split["p0_est"][: 1 + 2 * nt], + split["p0_est"][1 + 2 * nt + n_E_in_cal :], + ) + ) + X_E1 = sp.csr_matrix(([], ([], [])), shape=(nt * nx_sec, self.x.size)) + X_E1[:, ix_sec[1:]] = split["E"] + X_E2 = X_E1[:, split["ix_from_cal_match_to_glob"]] + X_E = sp.vstack( + ( + -X_E2, + X_E2, + split["E_match_F"], + split["E_match_B"], + split["E_match_no_cal"], + ) + ) + + X = sp.vstack( + ( + sp.hstack( + ( + split["Z_gamma"], + -split["Z_D"], + split["Zero_d"], + split["Z_TA_fw"], + ) + ), + sp.hstack( + ( + split["Z_gamma"], + split["Zero_d"], + -split["Z_D"], + split["Z_TA_bw"], + ) + ), + sp.hstack( + ( + split["Zero_eq12_gamma"], + split["Zero_d_eq12"], + split["Z_TA_eq1"], + ) + ), + sp.hstack( + ( + split["Zero_eq12_gamma"], + split["Zero_d_eq12"], + split["Z_TA_eq2"], + ) + ), + sp.hstack( + ( + split["Zero_eq3_gamma"], + split["d_no_cal"], + split["Z_TA_eq3"], + ) + ), + ) + ) + + y = np.concatenate( + ( + split["y_F"], + split["y_B"], + split["y_eq1"], + split["y_eq2"], + split["y_eq3"], + ) + ) + y -= X_E.dot(fix_alpha[0][split["ix_from_cal_match_to_glob"]]) + + # variances are added. weight is the inverse of the variance + # of the observations + w_ = np.concatenate( + ( + split["w_F"], + split["w_B"], + split["w_eq1"], + split["w_eq2"], + split["w_eq3"], + ) + ) + w = 1 / (1 / w_ + X_E.dot(fix_alpha[1][split["ix_from_cal_match_to_glob"]])) + + if solver == "sparse": + out = wls_sparse(X, y, w=w, x0=p0_est, calc_cov=True, verbose=False) + + elif solver == "stats": + out = wls_stats(X, y, w=w, calc_cov=True, verbose=False) + + # Added fixed gamma and its variance to the solution + p_val = np.concatenate( + (out[0][: 1 + 2 * nt], fix_alpha[0], out[0][1 + 2 * nt :]) + ) + p_var = np.concatenate( + (out[1][: 1 + 2 * nt], fix_alpha[1], out[1][1 + 2 * nt :]) + ) + + p_cov = np.diag(p_var).copy() + + from_i = np.concatenate( + ( + np.arange(1 + 2 * nt), + np.arange(1 + 2 * nt + nx, 1 + 2 * nt + nx + nta * nt * 2), + ) + ) + + iox_sec1, iox_sec2 = np.meshgrid(from_i, from_i, indexing="ij") + p_cov[iox_sec1, iox_sec2] = out[2] + + else: + pass + return p_cov, p_val, p_var + + def calibration_double_ended_solver( # noqa: MC0001 ds, st_var=None, diff --git a/src/dtscalibration/datastore.py b/src/dtscalibration/datastore.py index 320e0998..3323c414 100644 --- a/src/dtscalibration/datastore.py +++ b/src/dtscalibration/datastore.py @@ -13,16 +13,11 @@ from scipy.optimize import minimize from scipy.sparse import linalg as ln -from dtscalibration.calibrate_utils import calc_alpha_double -from dtscalibration.calibrate_utils import calibration_double_ended_solver -from dtscalibration.calibrate_utils import calibration_single_ended_solver from dtscalibration.calibrate_utils import match_sections from dtscalibration.calibrate_utils import parse_st_var -from dtscalibration.calibrate_utils import wls_sparse -from dtscalibration.calibrate_utils import wls_stats -from dtscalibration.datastore_utils import check_timestep_allclose from dtscalibration.datastore_utils import ParameterIndexDoubleEnded from dtscalibration.datastore_utils import ParameterIndexSingleEnded +from dtscalibration.datastore_utils import check_timestep_allclose from dtscalibration.datastore_utils import ufunc_per_section_helper from dtscalibration.io_utils import _dim_attrs @@ -284,6 +279,42 @@ def validate_sections(self, sections: Dict[str, List[slice]]): return sections_fix_slice_fixed + def check_reference_section_values(self): + """ + Checks if the values of the used sections are of the right datatype + (floats), if there are finite number (no NaN/inf), and if the time + dimension corresponds with the time dimension of the st/ast data. + + Parameters + ---------- + + Returns + ------- + + """ + for key in self.sections.keys(): + if not np.issubdtype(self[key].dtype, np.floating): + raise ValueError( + 'Data of reference temperature "' + + key + + '" does not have a float data type. Please ensure that ' + "the data is of a valid type (e.g. np.float32)" + ) + + if np.any(~np.isfinite(self[key].values)): + raise ValueError( + 'NaN/inf value(s) found in reference temperature "' + key + '"' + ) + + if self[key].dims != ("time",): + raise ValueError( + "Time dimension of the reference temperature timeseries " + + key + + "is not the same as the time dimension" + + " of the Stokes measurement. See examples/notebooks/09" + + "Import_timeseries.ipynb for more info" + ) + @property def is_double_ended(self) -> float: """ @@ -1720,42 +1751,6 @@ def set_trans_att(self, trans_att=None): self.trans_att.attrs = dim_attrs["trans_att"] pass - def check_reference_section_values(self): - """ - Checks if the values of the used sections are of the right datatype - (floats), if there are finite number (no NaN/inf), and if the time - dimension corresponds with the time dimension of the st/ast data. - - Parameters - ---------- - - Returns - ------- - - """ - for key in self.sections.keys(): - if not np.issubdtype(self[key].dtype, np.floating): - raise ValueError( - 'Data of reference temperature "' - + key - + '" does not have a float data type. Please ensure that ' - "the data is of a valid type (e.g. np.float32)" - ) - - if np.any(~np.isfinite(self[key].values)): - raise ValueError( - 'NaN/inf value(s) found in reference temperature "' + key + '"' - ) - - if self[key].dims != ("time",): - raise ValueError( - "Time dimension of the reference temperature timeseries " - + key - + "is not the same as the time dimension" - + " of the Stokes measurement. See examples/notebooks/09" - + "Import_timeseries.ipynb for more info" - ) - def calibration_single_ended( self, sections=None, @@ -1936,151 +1931,25 @@ def calibration_single_ended( ) if method == "wls": - for input_item in [st_var, ast_var]: - assert input_item is not None, ( - "For wls define all " "variances (`st_var`, " "`ast_var`) " - ) - - calc_cov = True - - split = calibration_single_ended_solver( - self, + p_cov, p_val, p_var = self.calibration_single_ended_helper( st_var, ast_var, - calc_cov=calc_cov, - solver="external_split", - matching_indices=matching_indices, + fix_alpha, + fix_dalpha, + fix_gamma, + matching_indices, + nt, + nta, + nx, + solver, ) - y = split["y"] - w = split["w"] - - # Stack all X's - if fix_alpha: - assert not fix_dalpha, "Use either `fix_dalpha` or `fix_alpha`" - assert fix_alpha[0].size == self.x.size, ( - "fix_alpha also needs to be defined outside the reference " - "sections" - ) - assert fix_alpha[1].size == self.x.size, ( - "fix_alpha also needs to be defined outside the reference " - "sections" - ) - p_val = split["p0_est_alpha"].copy() - - if np.any(matching_indices): - raise NotImplementedError( - "Configuring fix_alpha and matching sections requires extra code" - ) - - X = sp.hstack( - (split["X_gamma"], split["X_alpha"], split["X_c"], split["X_TA"]) - ).tocsr() - ip_use = list(range(1 + nx + nt + nta * nt)) - - else: - X = sp.vstack( - ( - sp.hstack( - ( - split["X_gamma"], - split["X_dalpha"], - split["X_c"], - split["X_TA"], - ) - ), - split["X_m"], - ) - ).tocsr() - p_val = split["p0_est_dalpha"].copy() - ip_use = list(range(1 + 1 + nt + nta * nt)) - - p_var = np.zeros_like(p_val) - p_cov = np.zeros((p_val.size, p_val.size), dtype=float) - - if fix_gamma is not None: - ip_remove = [0] - ip_use = [i for i in ip_use if i not in ip_remove] - p_val[ip_remove] = fix_gamma[0] - p_var[ip_remove] = fix_gamma[1] - - X_gamma = ( - sp.vstack((split["X_gamma"], split["X_m"].tocsr()[:, 0].tocoo())) - .toarray() - .flatten() - ) - - y -= fix_gamma[0] * X_gamma - w = 1 / (1 / w + fix_gamma[1] * X_gamma) - - if fix_alpha is not None: - ip_remove = list(range(1, nx + 1)) - ip_use = [i for i in ip_use if i not in ip_remove] - p_val[ip_remove] = fix_alpha[0] - p_var[ip_remove] = fix_alpha[1] - - # X_alpha needs to be vertically extended to support matching sections - y -= split["X_alpha"].dot(fix_alpha[0]) - w = 1 / (1 / w + split["X_alpha"].dot(fix_alpha[1])) - - if fix_dalpha is not None: - ip_remove = [1] - ip_use = [i for i in ip_use if i not in ip_remove] - p_val[ip_remove] = fix_dalpha[0] - p_var[ip_remove] = fix_dalpha[1] - - y -= np.hstack( - ( - fix_dalpha[0] * split["X_dalpha"].toarray().flatten(), - ( - fix_dalpha[0] - * split["X_m"].tocsr()[:, 1].tocoo().toarray().flatten() - ), - ) - ) - w = 1 / ( - 1 / w - + np.hstack( - ( - fix_dalpha[1] * split["X_dalpha"].toarray().flatten(), - ( - fix_dalpha[1] - * split["X_m"].tocsr()[:, 1].tocoo().toarray().flatten() - ), - ) - ) - ) - - if solver == "sparse": - out = wls_sparse( - X[:, ip_use], - y, - w=w, - x0=p_val[ip_use], - calc_cov=calc_cov, - verbose=False, - ) - - elif solver == "stats": - out = wls_stats(X[:, ip_use], y, w=w, calc_cov=calc_cov, verbose=False) - - else: - assert 0, "Unknown solver" - - p_val[ip_use] = out[0] - p_var[ip_use] = out[1] - np.fill_diagonal(p_cov, p_var) # set variance of all fixed params - p_cov[np.ix_(ip_use, ip_use)] = out[2] - elif method == "external": for input_item in [p_val, p_var, p_cov]: assert ( input_item is not None ), "Define p_val, p_var, p_cov when using an external solver" - elif method == "external_split": - raise ValueError("Not implemented yet") - else: raise ValueError("Choose a valid method") @@ -2347,6 +2216,8 @@ def calibration_double_ended( estimate of the temperature is obtained from the weighted average of :math:`T_\mathrm{F}` and :math:`T_\mathrm{B}` as discussed in Section 7.2 [1]_ . + + Parameters ---------- p_val : array-like, optional @@ -2511,645 +2382,22 @@ def calibration_double_ended( matching_indices = match_sections(self, matching_sections) if method == "wls": - if fix_alpha or fix_gamma: - split = calibration_double_ended_solver( - self, - st_var, - ast_var, - rst_var, - rast_var, - calc_cov=True, - solver="external_split", - matching_indices=matching_indices, - verbose=verbose, - ) - else: - out = calibration_double_ended_solver( - self, - st_var, - ast_var, - rst_var, - rast_var, - calc_cov=True, - solver=solver, - matching_indices=matching_indices, - verbose=verbose, - ) - - p_val, p_var, p_cov = out - - # adjust split to fix parameters - """Wrapped in a function to reduce memory usage. - Constructing: - Z_gamma (nt * nx, 1). Data: positive 1/temp - Z_D (nt * nx, nt). Data: ones - E (nt * nx, nx). Data: ones - Zero_gamma (nt * nx, 1) - zero_d (nt * nx, nt) - Z_TA_fw (nt * nx, nta * 2 * nt) minus ones - Z_TA_bw (nt * nx, nta * 2 * nt) minus ones - Z_TA_E (nt * nx, nta * 2 * nt) - I_fw = 1/Tref*gamma - D_fw - E - TA_fw - I_bw = 1/Tref*gamma - D_bw + E - TA_bw - (I_bw - I_fw) / 2 = D_fw/2 - D_bw/2 + E + TA_fw/2 - TA_bw/2 Eq42 - """ - if fix_alpha and fix_gamma: - assert ( - np.size(fix_alpha[0]) == self.x.size - ), "define alpha for each location" - assert ( - np.size(fix_alpha[1]) == self.x.size - ), "define var alpha for each location" - m = ( - "The integrated differential attenuation is zero at the " - "first index of the reference sections." - ) - assert np.abs(fix_alpha[0][ix_sec[0]]) < 1e-8, m - # The array with the integrated differential att is termed E - - if np.any(matching_indices): - n_E_in_cal = split["ix_from_cal_match_to_glob"].size - p0_est = np.concatenate( - ( - split["p0_est"][1 : 1 + 2 * nt], - split["p0_est"][1 + 2 * nt + n_E_in_cal :], - ) - ) - X_E1 = sp.csr_matrix( - ([], ([], [])), shape=(nt * nx_sec, self.x.size) - ) - X_E1[:, ix_sec[1:]] = split["E"] - X_E2 = X_E1[:, split["ix_from_cal_match_to_glob"]] - X_E = sp.vstack( - ( - -X_E2, - X_E2, - split["E_match_F"], - split["E_match_B"], - split["E_match_no_cal"], - ) - ) - - X_gamma = ( - sp.vstack( - ( - split["Z_gamma"], - split["Z_gamma"], - split["Zero_eq12_gamma"], - split["Zero_eq12_gamma"], - split["Zero_eq3_gamma"], - ) - ) - .toarray() - .flatten() - ) - - X = sp.vstack( - ( - sp.hstack( - (-split["Z_D"], split["Zero_d"], split["Z_TA_fw"]) - ), - sp.hstack( - (split["Zero_d"], -split["Z_D"], split["Z_TA_bw"]) - ), - sp.hstack((split["Zero_d_eq12"], split["Z_TA_eq1"])), - sp.hstack((split["Zero_d_eq12"], split["Z_TA_eq2"])), - sp.hstack((split["d_no_cal"], split["Z_TA_eq3"])), - ) - ) - - y = np.concatenate( - ( - split["y_F"], - split["y_B"], - split["y_eq1"], - split["y_eq2"], - split["y_eq3"], - ) - ) - y -= X_E.dot(fix_alpha[0][split["ix_from_cal_match_to_glob"]]) - y -= fix_gamma[0] * X_gamma - - # variances are added. weight is the inverse of the variance - # of the observations - w_ = np.concatenate( - ( - split["w_F"], - split["w_B"], - split["w_eq1"], - split["w_eq2"], - split["w_eq3"], - ) - ) - w = 1 / ( - 1 / w_ - + X_E.dot(fix_alpha[1][split["ix_from_cal_match_to_glob"]]) - + fix_gamma[1] * X_gamma - ) - - else: - # X_gamma - X_E = sp.vstack((-split["E"], split["E"])) - X_gamma = ( - sp.vstack((split["Z_gamma"], split["Z_gamma"])) - .toarray() - .flatten() - ) - # Use only the remaining coefficients - # Stack all X's - X = sp.vstack( - ( - sp.hstack( - (-split["Z_D"], split["Zero_d"], split["Z_TA_fw"]) - ), - sp.hstack( - (split["Zero_d"], -split["Z_D"], split["Z_TA_bw"]) - ), - ) - ) - - # Move the coefficients times the fixed gamma to the - # observations - y = np.concatenate((split["y_F"], split["y_B"])) - y -= X_E.dot(fix_alpha[0][ix_sec[1:]]) - y -= fix_gamma[0] * X_gamma - # variances are added. weight is the inverse of the variance - # of the observations - w_ = np.concatenate((split["w_F"], split["w_B"])) - w = 1 / ( - 1 / w_ - + X_E.dot(fix_alpha[1][ix_sec[1:]]) - + fix_gamma[1] * X_gamma - ) - - # [C_1, C_2, .., C_nt, TA_fw_a_1, TA_fw_a_2, TA_fw_a_nt, - # TA_bw_a_1, TA_bw_a_2, TA_bw_a_nt] Then continues with - # TA for connector b. - p0_est = np.concatenate( - ( - split["p0_est"][1 : 1 + 2 * nt], - split["p0_est"][1 + 2 * nt + nx_sec - 1 :], - ) - ) - - if solver == "sparse": - out = wls_sparse(X, y, w=w, x0=p0_est, calc_cov=True, verbose=False) - - elif solver == "stats": - out = wls_stats(X, y, w=w, calc_cov=True, verbose=False) - - # Added fixed gamma and its variance to the solution - p_val = np.concatenate( - ([fix_gamma[0]], out[0][: 2 * nt], fix_alpha[0], out[0][2 * nt :]) - ) - p_var = np.concatenate( - ([fix_gamma[1]], out[1][: 2 * nt], fix_alpha[1], out[1][2 * nt :]) - ) - - # whether it returns a copy or a view depends on what - # version of numpy you are using - p_cov = np.diag(p_var).copy() - from_i = np.concatenate( - ( - np.arange(1, 2 * nt + 1), - np.arange(1 + 2 * nt + nx, 1 + 2 * nt + nx + nta * nt * 2), - ) - ) - iox_sec1, iox_sec2 = np.meshgrid(from_i, from_i, indexing="ij") - p_cov[iox_sec1, iox_sec2] = out[2] - - elif fix_gamma: - if np.any(matching_indices): - # n_E_in_cal = split['ix_from_cal_match_to_glob'].size - p0_est = split["p0_est"][1:] - X_E1 = sp.csr_matrix( - ([], ([], [])), shape=(nt * nx_sec, self.x.size) - ) - from_i = ix_sec[1:] - X_E1[:, from_i] = split["E"] - X_E2 = X_E1[:, split["ix_from_cal_match_to_glob"]] - X = sp.vstack( - ( - sp.hstack( - ( - -split["Z_D"], - split["Zero_d"], - -X_E2, - split["Z_TA_fw"], - ) - ), - sp.hstack( - (split["Zero_d"], -split["Z_D"], X_E2, split["Z_TA_bw"]) - ), - sp.hstack( - ( - split["Zero_d_eq12"], - split["E_match_F"], - split["Z_TA_eq1"], - ) - ), - sp.hstack( - ( - split["Zero_d_eq12"], - split["E_match_B"], - split["Z_TA_eq2"], - ) - ), - sp.hstack( - ( - split["d_no_cal"], - split["E_match_no_cal"], - split["Z_TA_eq3"], - ) - ), - ) - ) - X_gamma = ( - sp.vstack( - ( - split["Z_gamma"], - split["Z_gamma"], - split["Zero_eq12_gamma"], - split["Zero_eq12_gamma"], - split["Zero_eq3_gamma"], - ) - ) - .toarray() - .flatten() - ) - - y = np.concatenate( - ( - split["y_F"], - split["y_B"], - split["y_eq1"], - split["y_eq2"], - split["y_eq3"], - ) - ) - y -= fix_gamma[0] * X_gamma - - # variances are added. weight is the inverse of the variance - # of the observations - w_ = np.concatenate( - ( - split["w_F"], - split["w_B"], - split["w_eq1"], - split["w_eq2"], - split["w_eq3"], - ) - ) - w = 1 / (1 / w_ + fix_gamma[1] * X_gamma) - - else: - X_gamma = ( - sp.vstack((split["Z_gamma"], split["Z_gamma"])) - .toarray() - .flatten() - ) - # Use only the remaining coefficients - X = sp.vstack( - ( - sp.hstack( - ( - -split["Z_D"], - split["Zero_d"], - -split["E"], - split["Z_TA_fw"], - ) - ), - sp.hstack( - ( - split["Zero_d"], - -split["Z_D"], - split["E"], - split["Z_TA_bw"], - ) - ), - ) - ) - # Move the coefficients times the fixed gamma to the - # observations - y = np.concatenate((split["y_F"], split["y_B"])) - y -= fix_gamma[0] * X_gamma - # variances are added. weight is the inverse of the variance - # of the observations - w_ = np.concatenate((split["w_F"], split["w_B"])) - w = 1 / (1 / w_ + fix_gamma[1] * X_gamma) - - p0_est = split["p0_est"][1:] - - if solver == "sparse": - out = wls_sparse(X, y, w=w, x0=p0_est, calc_cov=True, verbose=False) - - elif solver == "stats": - out = wls_stats(X, y, w=w, calc_cov=True, verbose=False) - - # put E outside of reference section in solution - # concatenating makes a copy of the data instead of using a - # pointer - ds_sub = self[["st", "ast", "rst", "rast", "trans_att"]] - ds_sub["df"] = (("time",), out[0][:nt]) - ds_sub["df_var"] = (("time",), out[1][:nt]) - ds_sub["db"] = (("time",), out[0][nt : 2 * nt]) - ds_sub["db_var"] = (("time",), out[1][nt : 2 * nt]) - - if nta > 0: - if np.any(matching_indices): - n_E_in_cal = split["ix_from_cal_match_to_glob"].size - ta = out[0][2 * nt + n_E_in_cal :].reshape( - (nt, 2, nta), order="F" - ) - ta_var = out[1][2 * nt + n_E_in_cal :].reshape( - (nt, 2, nta), order="F" - ) - - else: - ta = out[0][2 * nt + nx_sec - 1 :].reshape( - (nt, 2, nta), order="F" - ) - ta_var = out[1][2 * nt + nx_sec - 1 :].reshape( - (nt, 2, nta), order="F" - ) - - talpha_fw = ta[:, 0, :] - talpha_bw = ta[:, 1, :] - talpha_fw_var = ta_var[:, 0, :] - talpha_bw_var = ta_var[:, 1, :] - else: - talpha_fw = None - talpha_bw = None - talpha_fw_var = None - talpha_bw_var = None - - E_all_exact, E_all_var_exact = calc_alpha_double( - mode="exact", - ds=ds_sub, - st_var=st_var, - ast_var=ast_var, - rst_var=rst_var, - rast_var=rast_var, - ix_alpha_is_zero=ix_sec[0], - talpha_fw=talpha_fw, - talpha_bw=talpha_bw, - talpha_fw_var=talpha_fw_var, - talpha_bw_var=talpha_bw_var, - ) - - if not np.any(matching_indices): - # Added fixed gamma and its variance to the solution. And - # expand to include locations outside reference sections. - p_val = np.concatenate( - ( - [fix_gamma[0]], - out[0][: 2 * nt], - E_all_exact, - out[0][2 * nt + nx_sec - 1 :], - ) - ) - p_val[1 + 2 * nt + ix_sec[1:]] = out[0][ - 2 * nt : 2 * nt + nx_sec - 1 - ] - p_val[1 + 2 * nt + ix_sec[0]] = 0.0 - p_var = np.concatenate( - ( - [fix_gamma[1]], - out[1][: 2 * nt], - E_all_var_exact, - out[1][2 * nt + nx_sec - 1 :], - ) - ) - p_var[1 + 2 * nt + ix_sec[1:]] = out[1][ - 2 * nt : 2 * nt + nx_sec - 1 - ] - else: - n_E_in_cal = split["ix_from_cal_match_to_glob"].size - - # Added fixed gamma and its variance to the solution. And - # expand to include locations outside reference sections. - p_val = np.concatenate( - ( - [fix_gamma[0]], - out[0][: 2 * nt], - E_all_exact, - out[0][2 * nt + n_E_in_cal :], - ) - ) - p_val[1 + 2 * nt + split["ix_from_cal_match_to_glob"]] = out[0][ - 2 * nt : 2 * nt + n_E_in_cal - ] - p_val[1 + 2 * nt + ix_sec[0]] = 0.0 - p_var = np.concatenate( - ( - [fix_gamma[1]], - out[1][: 2 * nt], - E_all_var_exact, - out[1][2 * nt + n_E_in_cal :], - ) - ) - p_var[1 + 2 * nt + split["ix_from_cal_match_to_glob"]] = out[1][ - 2 * nt : 2 * nt + n_E_in_cal - ] - - p_cov = np.diag(p_var).copy() - - if not np.any(matching_indices): - from_i = np.concatenate( - ( - np.arange(1, 2 * nt + 1), - 2 * nt + 1 + ix_sec[1:], - np.arange(1 + 2 * nt + nx, 1 + 2 * nt + nx + nta * nt * 2), - ) - ) - else: - from_i = np.concatenate( - ( - np.arange(1, 2 * nt + 1), - 2 * nt + 1 + split["ix_from_cal_match_to_glob"], - np.arange(1 + 2 * nt + nx, 1 + 2 * nt + nx + nta * nt * 2), - ) - ) - - iox_sec1, iox_sec2 = np.meshgrid(from_i, from_i, indexing="ij") - p_cov[iox_sec1, iox_sec2] = out[2] - - elif fix_alpha: - assert ( - np.size(fix_alpha[0]) == self.x.size - ), "define alpha for each location" - assert ( - np.size(fix_alpha[1]) == self.x.size - ), "define var alpha for each location" - m = ( - "The integrated differential attenuation is zero at the " - "first index of the reference sections." - ) - assert np.abs(fix_alpha[0][ix_sec[0]]) < 1e-6, m - # The array with the integrated differential att is termed E - - if not np.any(matching_indices): - # X_gamma - X_E = sp.vstack((-split["E"], split["E"])) - # Use only the remaining coefficients - # Stack all X's - X = sp.vstack( - ( - sp.hstack( - ( - split["Z_gamma"], - -split["Z_D"], - split["Zero_d"], - split["Z_TA_fw"], - ) - ), - sp.hstack( - ( - split["Z_gamma"], - split["Zero_d"], - -split["Z_D"], - split["Z_TA_bw"], - ) - ), - ) - ) - # Move the coefficients times the fixed gamma to the - # observations - y = np.concatenate((split["y_F"], split["y_B"])) - y -= X_E.dot(fix_alpha[0][ix_sec[1:]]) - - # variances are added. weight is the inverse of the variance - # of the observations - w_ = np.concatenate((split["w_F"], split["w_B"])) - w = 1 / (1 / w_ + X_E.dot(fix_alpha[1][ix_sec[1:]])) - - p0_est = np.concatenate( - ( - split["p0_est"][: 1 + 2 * nt], - split["p0_est"][1 + 2 * nt + nx_sec - 1 :], - ) - ) - - else: - n_E_in_cal = split["ix_from_cal_match_to_glob"].size - p0_est = np.concatenate( - ( - split["p0_est"][: 1 + 2 * nt], - split["p0_est"][1 + 2 * nt + n_E_in_cal :], - ) - ) - X_E1 = sp.csr_matrix( - ([], ([], [])), shape=(nt * nx_sec, self.x.size) - ) - X_E1[:, ix_sec[1:]] = split["E"] - X_E2 = X_E1[:, split["ix_from_cal_match_to_glob"]] - X_E = sp.vstack( - ( - -X_E2, - X_E2, - split["E_match_F"], - split["E_match_B"], - split["E_match_no_cal"], - ) - ) - - X = sp.vstack( - ( - sp.hstack( - ( - split["Z_gamma"], - -split["Z_D"], - split["Zero_d"], - split["Z_TA_fw"], - ) - ), - sp.hstack( - ( - split["Z_gamma"], - split["Zero_d"], - -split["Z_D"], - split["Z_TA_bw"], - ) - ), - sp.hstack( - ( - split["Zero_eq12_gamma"], - split["Zero_d_eq12"], - split["Z_TA_eq1"], - ) - ), - sp.hstack( - ( - split["Zero_eq12_gamma"], - split["Zero_d_eq12"], - split["Z_TA_eq2"], - ) - ), - sp.hstack( - ( - split["Zero_eq3_gamma"], - split["d_no_cal"], - split["Z_TA_eq3"], - ) - ), - ) - ) - - y = np.concatenate( - ( - split["y_F"], - split["y_B"], - split["y_eq1"], - split["y_eq2"], - split["y_eq3"], - ) - ) - y -= X_E.dot(fix_alpha[0][split["ix_from_cal_match_to_glob"]]) - - # variances are added. weight is the inverse of the variance - # of the observations - w_ = np.concatenate( - ( - split["w_F"], - split["w_B"], - split["w_eq1"], - split["w_eq2"], - split["w_eq3"], - ) - ) - w = 1 / ( - 1 / w_ - + X_E.dot(fix_alpha[1][split["ix_from_cal_match_to_glob"]]) - ) - - if solver == "sparse": - out = wls_sparse(X, y, w=w, x0=p0_est, calc_cov=True, verbose=False) - - elif solver == "stats": - out = wls_stats(X, y, w=w, calc_cov=True, verbose=False) - - # Added fixed gamma and its variance to the solution - p_val = np.concatenate( - (out[0][: 1 + 2 * nt], fix_alpha[0], out[0][1 + 2 * nt :]) - ) - p_var = np.concatenate( - (out[1][: 1 + 2 * nt], fix_alpha[1], out[1][1 + 2 * nt :]) - ) - - p_cov = np.diag(p_var).copy() - - from_i = np.concatenate( - ( - np.arange(1 + 2 * nt), - np.arange(1 + 2 * nt + nx, 1 + 2 * nt + nx + nta * nt * 2), - ) - ) - - iox_sec1, iox_sec2 = np.meshgrid(from_i, from_i, indexing="ij") - p_cov[iox_sec1, iox_sec2] = out[2] - - else: - pass + p_cov, p_val, p_var = self.calibration_double_ended_helper( + st_var, + ast_var, + rst_var, + rast_var, + fix_alpha, + fix_gamma, + nt, + nta, + nx, + nx_sec, + ix_sec, + matching_indices, + solver, + verbose, + ) elif method == "external": for input_item in [p_val, p_var, p_cov]: From 1eded973ec5acf4be950bb51d2f3f3883321c623 Mon Sep 17 00:00:00 2001 From: Bas des Tombe Date: Sun, 6 Aug 2023 17:45:08 +0200 Subject: [PATCH 20/26] Updated changelog and corrected small error due to refactoring --- CHANGELOG.rst | 2 ++ src/dtscalibration/datastore.py | 8 ++++++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 569022b7..d8bf41cb 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -20,6 +20,8 @@ Changed * Requiring netcdf4 >= 1.6.4 * Optional dependencies of xarray that improve performance are now required by default. * variance_stokes_linear() does not default through zero anymore. +* Refactored calibration_single_ended and calibration_double_ended() +* Moved ParameterIndices classes to calibration_utils.py * Flox included in requirements to speed up resampling via xarray ([Xarray #5734](https://github.com/pydata/xarray/pull/5734)). Removed diff --git a/src/dtscalibration/datastore.py b/src/dtscalibration/datastore.py index 3323c414..1c32e781 100644 --- a/src/dtscalibration/datastore.py +++ b/src/dtscalibration/datastore.py @@ -13,6 +13,8 @@ from scipy.optimize import minimize from scipy.sparse import linalg as ln +from dtscalibration.calibrate_utils import calibration_single_ended_helper +from dtscalibration.calibrate_utils import calibration_double_ended_helper from dtscalibration.calibrate_utils import match_sections from dtscalibration.calibrate_utils import parse_st_var from dtscalibration.datastore_utils import ParameterIndexDoubleEnded @@ -1931,7 +1933,8 @@ def calibration_single_ended( ) if method == "wls": - p_cov, p_val, p_var = self.calibration_single_ended_helper( + p_cov, p_val, p_var = calibration_single_ended_helper( + self, st_var, ast_var, fix_alpha, @@ -2382,7 +2385,8 @@ def calibration_double_ended( matching_indices = match_sections(self, matching_sections) if method == "wls": - p_cov, p_val, p_var = self.calibration_double_ended_helper( + p_cov, p_val, p_var = calibration_double_ended_helper( + self st_var, ast_var, rst_var, From 270c74d24637da828958affc320889bfa3c19b08 Mon Sep 17 00:00:00 2001 From: Bas des Tombe Date: Sun, 6 Aug 2023 17:47:00 +0200 Subject: [PATCH 21/26] Forgotten comma --- src/dtscalibration/datastore.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dtscalibration/datastore.py b/src/dtscalibration/datastore.py index 1c32e781..806a7a01 100644 --- a/src/dtscalibration/datastore.py +++ b/src/dtscalibration/datastore.py @@ -2386,7 +2386,7 @@ def calibration_double_ended( if method == "wls": p_cov, p_val, p_var = calibration_double_ended_helper( - self + self, st_var, ast_var, rst_var, From f8872ec2346afcb7f3d73bce21d93eb9484340c4 Mon Sep 17 00:00:00 2001 From: Bas des Tombe Date: Sun, 6 Aug 2023 19:53:45 +0200 Subject: [PATCH 22/26] Sorted imports --- src/dtscalibration/datastore.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dtscalibration/datastore.py b/src/dtscalibration/datastore.py index 806a7a01..6738d5bc 100644 --- a/src/dtscalibration/datastore.py +++ b/src/dtscalibration/datastore.py @@ -13,8 +13,8 @@ from scipy.optimize import minimize from scipy.sparse import linalg as ln -from dtscalibration.calibrate_utils import calibration_single_ended_helper from dtscalibration.calibrate_utils import calibration_double_ended_helper +from dtscalibration.calibrate_utils import calibration_single_ended_helper from dtscalibration.calibrate_utils import match_sections from dtscalibration.calibrate_utils import parse_st_var from dtscalibration.datastore_utils import ParameterIndexDoubleEnded From cff98093783059fa5932412c677205ecb63c521f Mon Sep 17 00:00:00 2001 From: Bas des Tombe Date: Sun, 6 Aug 2023 21:03:06 +0200 Subject: [PATCH 23/26] Reordered some functions in datastore_utils.py --- src/dtscalibration/calibrate_utils.py | 2 +- src/dtscalibration/datastore.py | 2441 ++++++++++++------------- src/dtscalibration/datastore_utils.py | 1823 +++++++++--------- 3 files changed, 2135 insertions(+), 2131 deletions(-) diff --git a/src/dtscalibration/calibrate_utils.py b/src/dtscalibration/calibrate_utils.py index 466d3d2a..6e90e208 100644 --- a/src/dtscalibration/calibrate_utils.py +++ b/src/dtscalibration/calibrate_utils.py @@ -20,7 +20,7 @@ def parse_st_var(ds, st_var, st_label="st"): If `callable` the variance of the noise from the Stokes detector is a function of the intensity, as defined in the callable function. Or when the variance is a function of the intensity (Poisson - distributed) define a DataArray of the shape shape as ds.st, where the + distributed) define a DataArray of the shape as ds.st, where the variance can be a function of time and/or x. st_label : string Name of the (reverse) stokes/anti-stokes data variable which is being diff --git a/src/dtscalibration/datastore.py b/src/dtscalibration/datastore.py index 6738d5bc..814c21b0 100644 --- a/src/dtscalibration/datastore.py +++ b/src/dtscalibration/datastore.py @@ -19,6 +19,7 @@ from dtscalibration.calibrate_utils import parse_st_var from dtscalibration.datastore_utils import ParameterIndexDoubleEnded from dtscalibration.datastore_utils import ParameterIndexSingleEnded +from dtscalibration.datastore_utils import check_deprecated_kwargs from dtscalibration.datastore_utils import check_timestep_allclose from dtscalibration.datastore_utils import ufunc_per_section_helper from dtscalibration.io_utils import _dim_attrs @@ -767,73 +768,6 @@ def get_section_indices(self, sec): xis = self.x.astype(int) * 0 + np.arange(self.x.size, dtype=int) return xis.sel(x=sec).values - def check_deprecated_kwargs(self, kwargs): - """ - Internal function that parses the `kwargs` for depreciated keyword - arguments. - - Depreciated keywords raise an error, pending to be depreciated do not. - But this requires that the code currently deals with those arguments. - - Parameters - ---------- - kwargs : Dict - A dictionary with keyword arguments. - - Returns - ------- - - """ - msg = """Previously, it was possible to manually set the label from - which the Stokes and anti-Stokes were read within the DataStore - object. To reduce the clutter in the code base and be able to - maintain it, this option was removed. - See: https://github.com/dtscalibration/python-dts-calibration/issues/81 - - The new **fixed** names are: st, ast, rst, rast. - - It is still possible to use the previous defaults, for example when - reading stored measurements from netCDF, by renaming the labels. The - old default labels were ST, AST, REV-ST, REV-AST. - - ``` - ds = open_datastore(path_to_old_file) - ds = ds.rename_labels() - ds.calibration_double_ended( - st_var=1.5, - ast_var=1.5, - rst_var=1., - rast_var=1., - method='wls') - ``` - - ds.tmpw.plot() - """ - list_of_depr = [ - "st_label", - "ast_label", - "rst_label", - "rast_label", - "transient_asym_att_x", - "transient_att_x", - ] - list_of_pending_depr = [] - - kwargs = {k: v for k, v in kwargs.items() if k not in list_of_pending_depr} - - for k in kwargs: - if k in list_of_depr: - raise NotImplementedError(msg) - - if len(kwargs) != 0: - raise NotImplementedError( - "The following keywords are not " - + "supported: " - + ", ".join(kwargs.keys()) - ) - - pass - def rename_labels(self, assertion=True): """ Renames the `ST` DataArrays (old convention) to `st` (new convention). @@ -1667,52 +1601,6 @@ def inverse_variance_weighted_mean_array( pass - def in_confidence_interval(self, ci_label, conf_ints=None, sections=None): - """ - Returns an array with bools wether the temperature of the reference - sections are within the confidence intervals - - Parameters - ---------- - sections : Dict[str, List[slice]] - ci_label : str - The label of the data containing the confidence intervals. - conf_ints : Tuple - A tuple containing two floats between 0 and 1, representing the - levels between which the reference temperature should lay. - - Returns - ------- - - """ - if sections is None: - sections = self.sections - else: - sections = self.validate_sections(sections) - - if conf_ints is None: - conf_ints = self[ci_label].values - - assert len(conf_ints) == 2, "Please define conf_ints" - - tmp_dn = self[ci_label].sel(CI=conf_ints[0], method="nearest") - tmp_up = self[ci_label].sel(CI=conf_ints[1], method="nearest") - - ref = self.ufunc_per_section( - sections=sections, label="st", ref_temp_broadcasted=True, calc_per="all" - ) - ix_resid = self.ufunc_per_section( - sections=sections, x_indices=True, calc_per="all" - ) - ref_sorted = np.full(shape=tmp_dn.shape, fill_value=np.nan) - ref_sorted[ix_resid, :] = ref - ref_da = xr.DataArray(data=ref_sorted, coords=tmp_dn.coords) - - mask_dn = ref_da >= tmp_dn - mask_up = ref_da <= tmp_up - - return np.logical_and(mask_dn, mask_up) - def set_trans_att(self, trans_att=None): """Gracefully set the locations that introduce directional differential attenuation @@ -1899,7 +1787,7 @@ def calibration_single_ended( 07Calibrate_single_wls.ipynb>`_ """ - self.check_deprecated_kwargs(kwargs) + check_deprecated_kwargs(kwargs) self.set_trans_att(trans_att=trans_att, **kwargs) if sections is not None: @@ -2329,7 +2217,7 @@ def calibration_double_ended( 08Calibrate_double_wls.ipynb>`_ """ # TODO: confidence intervals using the variance approximated by linear error propagation - self.check_deprecated_kwargs(kwargs) + check_deprecated_kwargs(kwargs) self.set_trans_att(trans_att=trans_att, **kwargs) @@ -2713,7 +2601,7 @@ def calibration_double_ended( pass - def conf_int_single_ended( + def average_single_ended( self, p_val="p_val", p_cov="p_cov", @@ -2721,29 +2609,24 @@ def conf_int_single_ended( ast_var=None, conf_ints=None, mc_sample_size=100, + ci_avg_time_flag1=False, + ci_avg_time_flag2=False, + ci_avg_time_sel=None, + ci_avg_time_isel=None, + ci_avg_x_flag1=False, + ci_avg_x_flag2=False, + ci_avg_x_sel=None, + ci_avg_x_isel=None, + var_only_sections=None, da_random_state=None, mc_remove_set_flag=True, reduce_memory_usage=False, **kwargs, ): - r""" - Estimation of the confidence intervals for the temperatures measured - with a single-ended setup. It consists of five steps. First, the variances - of the Stokes and anti-Stokes intensity measurements are estimated - following the steps in Section 4 [1]_. A Normal - distribution is assigned to each intensity measurement that is centered - at the measurement and using the estimated variance. Second, a multi- - variate Normal distribution is assigned to the estimated parameters - using the covariance matrix from the calibration procedure presented in - Section 5 [1]_. Third, the distributions are sampled, and the - temperature is computed with Equation 12 [1]_. Fourth, step - three is repeated, e.g., 10,000 times for each location and for each - time. The resulting 10,000 realizations of the temperatures - approximate the probability density functions of the estimated - temperature at that location and time. Fifth, the standard uncertainties - are computed with the standard deviations of the realizations of the - temperatures, and the 95\% confidence intervals are computed from the - 2.5\% and 97.5\% percentiles of the realizations of the temperatures. + """ + Average temperatures from single-ended setups. + + Four types of averaging are implemented. Please see Example Notebook 16. Parameters @@ -2774,6 +2657,80 @@ def conf_int_single_ended( mc_sample_size : int Size of the monte carlo parameter set used to calculate the confidence interval + ci_avg_time_flag1 : bool + The confidence intervals differ each time step. Assumes the + temperature varies during the measurement period. Computes the + arithmic temporal mean. If you would like to know the confidence + interfal of: + (1) a single additional measurement. So you can state "if another + measurement were to be taken, it would have this ci" + (2) all measurements. So you can state "The temperature remained + during the entire measurement period between these ci bounds". + Adds "tmpw" + '_avg1' and "tmpw" + '_mc_avg1_var' to the + DataStore. If `conf_ints` are set, also the confidence intervals + `_mc_avg1` are added to the DataStore. Works independently of the + ci_avg_time_flag2 and ci_avg_x_flag. + ci_avg_time_flag2 : bool + The confidence intervals differ each time step. Assumes the + temperature remains constant during the measurement period. + Computes the inverse-variance-weighted-temporal-mean temperature + and its uncertainty. + If you would like to know the confidence interfal of: + (1) I want to estimate a background temperature with confidence + intervals. I hereby assume the temperature does not change over + time and average all measurements to get a better estimate of the + background temperature. + Adds "tmpw" + '_avg2' and "tmpw" + '_mc_avg2_var' to the + DataStore. If `conf_ints` are set, also the confidence intervals + `_mc_avg2` are added to the DataStore. Works independently of the + ci_avg_time_flag1 and ci_avg_x_flag. + ci_avg_time_sel : slice + Compute ci_avg_time_flag1 and ci_avg_time_flag2 using only a + selection of the data + ci_avg_time_isel : iterable of int + Compute ci_avg_time_flag1 and ci_avg_time_flag2 using only a + selection of the data + ci_avg_x_flag1 : bool + The confidence intervals differ at each location. Assumes the + temperature varies over `x` and over time. Computes the + arithmic spatial mean. If you would like to know the confidence + interfal of: + (1) a single additional measurement location. So you can state "if + another measurement location were to be taken, + it would have this ci" + (2) all measurement locations. So you can state "The temperature + along the fiber remained between these ci bounds". + Adds "tmpw" + '_avgx1' and "tmpw" + '_mc_avgx1_var' to the + DataStore. If `conf_ints` are set, also the confidence intervals + `_mc_avgx1` are added to the DataStore. Works independently of the + ci_avg_time_flag1, ci_avg_time_flag2 and ci_avg_x2_flag. + ci_avg_x_flag2 : bool + The confidence intervals differ at each location. Assumes the + temperature is the same at each location but varies over time. + Computes the inverse-variance-weighted-spatial-mean temperature + and its uncertainty. + If you would like to know the confidence interfal of: + (1) I have put a lot of fiber in water, and I know that the + temperature variation in the water is much smaller than along + other parts of the fiber. And I would like to average the + measurements from multiple locations to improve the estimated + temperature. + Adds "tmpw" + '_avg2' and "tmpw" + '_mc_avg2_var' to the + DataStore. If `conf_ints` are set, also the confidence intervals + `_mc_avg2` are added to the DataStore. Works independently of the + ci_avg_time_flag1 and ci_avg_x_flag. + ci_avg_x_sel : slice + Compute ci_avg_time_flag1 and ci_avg_time_flag2 using only a + selection of the data + ci_avg_x_isel : iterable of int + Compute ci_avg_time_flag1 and ci_avg_time_flag2 using only a + selection of the data + var_only_sections : bool + useful if using the ci_avg_x_flag. Only calculates the var over the + sections, so that the values can be compared with accuracy along the + reference sections. Where the accuracy is the variance of the + residuals between the estimated temperature and temperature of the + water baths. da_random_state For testing purposes. Similar to random seed. The seed for dask. Makes random not so random. To produce reproducable results for @@ -2784,207 +2741,227 @@ def conf_int_single_ended( reduce_memory_usage : bool Use less memory but at the expense of longer computation time + Returns + ------- - References - ---------- - .. [1] des Tombe, B., Schilperoort, B., & Bakker, M. (2020). Estimation - of Temperature and Associated Uncertainty from Fiber-Optic Raman- - Spectrum Distributed Temperature Sensing. Sensors, 20(8), 2235. - https://doi.org/10.3390/s20082235 """ - self.check_deprecated_kwargs(kwargs) - - if da_random_state: - state = da_random_state - else: - state = da.random.RandomState() - - no, nt = self.st.data.shape - if "trans_att" in self.keys(): - nta = self.trans_att.size - else: - nta = 0 + check_deprecated_kwargs(kwargs) - assert isinstance(p_val, (str, np.ndarray, np.generic)) - if isinstance(p_val, str): - p_val = self[p_val].data - - npar = p_val.size - - # number of parameters - if npar == nt + 2 + nt * nta: - fixed_alpha = False - elif npar == 1 + no + nt + nt * nta: - fixed_alpha = True - else: - raise Exception("The size of `p_val` is not what I expected") - - self.coords["mc"] = range(mc_sample_size) - - if conf_ints: - self.coords["CI"] = conf_ints - - # WLS - if isinstance(p_cov, str): - p_cov = self[p_cov].data - assert p_cov.shape == (npar, npar) - - p_mc = sst.multivariate_normal.rvs(mean=p_val, cov=p_cov, size=mc_sample_size) + if var_only_sections is not None: + raise NotImplementedError() - if fixed_alpha: - self["alpha_mc"] = (("mc", "x"), p_mc[:, 1 : no + 1]) - self["c_mc"] = (("mc", "time"), p_mc[:, 1 + no : 1 + no + nt]) - else: - self["dalpha_mc"] = (("mc",), p_mc[:, 1]) - self["c_mc"] = (("mc", "time"), p_mc[:, 2 : nt + 2]) + self.conf_int_single_ended( + p_val=p_val, + p_cov=p_cov, + st_var=st_var, + ast_var=ast_var, + conf_ints=None, + mc_sample_size=mc_sample_size, + da_random_state=da_random_state, + mc_remove_set_flag=False, + reduce_memory_usage=reduce_memory_usage, + **kwargs, + ) - self["gamma_mc"] = (("mc",), p_mc[:, 0]) - if nta: - self["ta_mc"] = ( - ("mc", "trans_att", "time"), - np.reshape(p_mc[:, -nt * nta :], (mc_sample_size, nta, nt)), + if ci_avg_time_sel is not None: + time_dim2 = "time" + "_avg" + x_dim2 = "x" + self.coords[time_dim2] = ( + (time_dim2,), + self["time"].sel(**{"time": ci_avg_time_sel}).data, + ) + self["tmpf_avgsec"] = ( + ("x", time_dim2), + self["tmpf"].sel(**{"time": ci_avg_time_sel}).data, + ) + self["tmpf_mc_set"] = ( + ("mc", "x", time_dim2), + self["tmpf" + "_mc_set"].sel(**{"time": ci_avg_time_sel}).data, ) - rsize = (self.mc.size, self.x.size, self.time.size) + elif ci_avg_time_isel is not None: + time_dim2 = "time" + "_avg" + x_dim2 = "x" + self.coords[time_dim2] = ( + (time_dim2,), + self["time"].isel(**{"time": ci_avg_time_isel}).data, + ) + self["tmpf_avgsec"] = ( + ("x", time_dim2), + self["tmpf"].isel(**{"time": ci_avg_time_isel}).data, + ) + self["tmpf_mc_set"] = ( + ("mc", "x", time_dim2), + self["tmpf" + "_mc_set"].isel(**{"time": ci_avg_time_isel}).data, + ) - if reduce_memory_usage: - memchunk = da.ones( - (mc_sample_size, no, nt), chunks={0: -1, 1: 1, 2: "auto"} - ).chunks + elif ci_avg_x_sel is not None: + time_dim2 = "time" + x_dim2 = "x_avg" + self.coords[x_dim2] = ((x_dim2,), self.x.sel(x=ci_avg_x_sel).data) + self["tmpf_avgsec"] = ( + (x_dim2, "time"), + self["tmpf"].sel(x=ci_avg_x_sel).data, + ) + self["tmpf_mc_set"] = ( + ("mc", x_dim2, "time"), + self["tmpf_mc_set"].sel(x=ci_avg_x_sel).data, + ) + + elif ci_avg_x_isel is not None: + time_dim2 = "time" + x_dim2 = "x_avg" + self.coords[x_dim2] = ((x_dim2,), self.x.isel(x=ci_avg_x_isel).data) + self["tmpf_avgsec"] = ( + (x_dim2, time_dim2), + self["tmpf"].isel(x=ci_avg_x_isel).data, + ) + self["tmpf_mc_set"] = ( + ("mc", x_dim2, time_dim2), + self["tmpf_mc_set"].isel(x=ci_avg_x_isel).data, + ) else: - memchunk = da.ones( - (mc_sample_size, no, nt), chunks={0: -1, 1: "auto", 2: "auto"} - ).chunks + self["tmpf_avgsec"] = self["tmpf"] + x_dim2 = "x" + time_dim2 = "time" - # Draw from the normal distributions for the Stokes intensities - for k, st_labeli, st_vari in zip( - ["r_st", "r_ast"], ["st", "ast"], [st_var, ast_var] - ): - # Load the mean as chunked Dask array, otherwise eats memory - if type(self[st_labeli].data) == da.core.Array: - loc = da.asarray(self[st_labeli].data, chunks=memchunk[1:]) - else: - loc = da.from_array(self[st_labeli].data, chunks=memchunk[1:]) + # subtract the mean temperature + q = self["tmpf_mc_set"] - self["tmpf_avgsec"] + self["tmpf_mc" + "_avgsec_var"] = q.var(dim="mc", ddof=1) - # Make sure variance is of size (no, nt) - if np.size(st_vari) > 1: - if st_vari.shape == self[st_labeli].shape: - pass - else: - st_vari = np.broadcast_to(st_vari, (no, nt)) - else: - pass + if ci_avg_x_flag1: + # unweighted mean + self["tmpf_avgx1"] = self["tmpf" + "_avgsec"].mean(dim=x_dim2) - # Load variance as chunked Dask array, otherwise eats memory - if type(st_vari) == da.core.Array: - st_vari_da = da.asarray(st_vari, chunks=memchunk[1:]) + q = self["tmpf_mc_set"] - self["tmpf_avgsec"] + qvar = q.var(dim=["mc", x_dim2], ddof=1) + self["tmpf_mc_avgx1_var"] = qvar - elif callable(st_vari) and type(self[st_labeli].data) == da.core.Array: - st_vari_da = da.asarray( - st_vari(self[st_labeli]).data, chunks=memchunk[1:] - ) + if conf_ints: + new_chunks = (len(conf_ints), self["tmpf_mc_set"].chunks[2]) + avg_axis = self["tmpf_mc_set"].get_axis_num(["mc", x_dim2]) + q = self["tmpf_mc_set"].data.map_blocks( + lambda x: np.percentile(x, q=conf_ints, axis=avg_axis), + chunks=new_chunks, # + drop_axis=avg_axis, + # avg dimensions are dropped from input arr + new_axis=0, + ) # The new CI dim is added as firsaxis - elif callable(st_vari) and type(self[st_labeli].data) != da.core.Array: - st_vari_da = da.from_array( - st_vari(self[st_labeli]).data, chunks=memchunk[1:] - ) + self["tmpf_mc_avgx1"] = (("CI", time_dim2), q) - else: - st_vari_da = da.from_array(st_vari, chunks=memchunk[1:]) + if ci_avg_x_flag2: + q = self["tmpf_mc_set"] - self["tmpf_avgsec"] - self[k] = ( - ("mc", "x", "time"), - state.normal( - loc=loc, # has chunks=memchunk[1:] - scale=st_vari_da**0.5, - size=rsize, - chunks=memchunk, - ), - ) + qvar = q.var(dim=["mc"], ddof=1) - ta_arr = np.zeros((mc_sample_size, no, nt)) + # Inverse-variance weighting + avg_x_var = 1 / (1 / qvar).sum(dim=x_dim2) - if nta: - for ii, ta in enumerate(self["ta_mc"]): - for tai, taxi in zip(ta.values, self.trans_att.values): - ta_arr[ii, self.x.values >= taxi] = ( - ta_arr[ii, self.x.values >= taxi] + tai - ) - self["ta_mc_arr"] = (("mc", "x", "time"), ta_arr) + self["tmpf_mc_avgx2_var"] = avg_x_var - if fixed_alpha: - self["tmpf_mc_set"] = ( - self["gamma_mc"] - / ( - ( - np.log(self["r_st"]) - - np.log(self["r_ast"]) - + (self["c_mc"] + self["ta_mc_arr"]) - ) - + self["alpha_mc"] - ) - - 273.15 - ) - else: - self["tmpf_mc_set"] = ( - self["gamma_mc"] - / ( - ( - np.log(self["r_st"]) - - np.log(self["r_ast"]) - + (self["c_mc"] + self["ta_mc_arr"]) - ) - + (self["dalpha_mc"] * self.x) - ) - - 273.15 - ) + self["tmpf" + "_mc_avgx2_set"] = (self["tmpf_mc_set"] / qvar).sum( + dim=x_dim2 + ) * avg_x_var + self["tmpf" + "_avgx2"] = self["tmpf" + "_mc_avgx2_set"].mean(dim="mc") - avg_dims = ["mc"] + if conf_ints: + new_chunks = (len(conf_ints), self["tmpf_mc_set"].chunks[2]) + avg_axis_avgx = self["tmpf_mc_set"].get_axis_num("mc") - avg_axis = self["tmpf_mc_set"].get_axis_num(avg_dims) + qq = self["tmpf_mc_avgx2_set"].data.map_blocks( + lambda x: np.percentile(x, q=conf_ints, axis=avg_axis_avgx), + chunks=new_chunks, # + drop_axis=avg_axis_avgx, + # avg dimensions are dropped from input arr + new_axis=0, + dtype=float, + ) # The new CI dimension is added as + # firsaxis + self["tmpf_mc_avgx2"] = (("CI", time_dim2), qq) - self["tmpf_mc_var"] = (self["tmpf_mc_set"] - self["tmpf"]).var( - dim=avg_dims, ddof=1 - ) + if ci_avg_time_flag1 is not None: + # unweighted mean + self["tmpf_avg1"] = self["tmpf" + "_avgsec"].mean(dim=time_dim2) - if conf_ints: - new_chunks = ((len(conf_ints),),) + self["tmpf_mc_set"].chunks[1:] + q = self["tmpf_mc_set"] - self["tmpf_avgsec"] + qvar = q.var(dim=["mc", time_dim2], ddof=1) + self["tmpf_mc_avg1_var"] = qvar - qq = self["tmpf_mc_set"] + if conf_ints: + new_chunks = (len(conf_ints), self["tmpf_mc_set"].chunks[1]) + avg_axis = self["tmpf_mc_set"].get_axis_num(["mc", time_dim2]) + q = self["tmpf_mc_set"].data.map_blocks( + lambda x: np.percentile(x, q=conf_ints, axis=avg_axis), + chunks=new_chunks, # + drop_axis=avg_axis, + # avg dimensions are dropped from input arr + new_axis=0, + ) # The new CI dim is added as firsaxis - q = qq.data.map_blocks( - lambda x: np.percentile(x, q=conf_ints, axis=avg_axis), - chunks=new_chunks, # - drop_axis=avg_axis, # avg dimesnions are dropped from input arr - new_axis=0, - ) # The new CI dimension is added as first axis + self["tmpf_mc_avg1"] = (("CI", x_dim2), q) - self["tmpf_mc"] = (("CI", "x", "time"), q) + if ci_avg_time_flag2: + q = self["tmpf_mc_set"] - self["tmpf_avgsec"] + + qvar = q.var(dim=["mc"], ddof=1) + + # Inverse-variance weighting + avg_time_var = 1 / (1 / qvar).sum(dim=time_dim2) + + self["tmpf_mc_avg2_var"] = avg_time_var + self["tmpf" + "_mc_avg2_set"] = (self["tmpf_mc_set"] / qvar).sum( + dim=time_dim2 + ) * avg_time_var + self["tmpf_avg2"] = self["tmpf" + "_mc_avg2_set"].mean(dim="mc") + + if conf_ints: + new_chunks = (len(conf_ints), self["tmpf_mc_set"].chunks[1]) + avg_axis_avg2 = self["tmpf_mc_set"].get_axis_num("mc") + + qq = self["tmpf_mc_avg2_set"].data.map_blocks( + lambda x: np.percentile(x, q=conf_ints, axis=avg_axis_avg2), + chunks=new_chunks, # + drop_axis=avg_axis_avg2, + # avg dimensions are dropped from input arr + new_axis=0, + dtype=float, + ) # The new CI dimension is added as + # firsaxis + self["tmpf_mc_avg2"] = (("CI", x_dim2), qq) + # Clean up the garbage. All arrays with a Monte Carlo dimension. if mc_remove_set_flag: - drop_var = [ + remove_mc_set = [ + "r_st", + "r_ast", "gamma_mc", "dalpha_mc", - "alpha_mc", "c_mc", + "x_avg", + "time_avg", "mc", - "r_st", - "r_ast", - "tmpf_mc_set", "ta_mc_arr", ] - for k in drop_var: + remove_mc_set.append("tmpf_avgsec") + remove_mc_set.append("tmpf_mc_set") + remove_mc_set.append("tmpf_mc_avg2_set") + remove_mc_set.append("tmpf_mc_avgx2_set") + remove_mc_set.append("tmpf_mc_avgsec_var") + + for k in remove_mc_set: if k in self: del self[k] - pass - def average_single_ended( + def average_double_ended( self, p_val="p_val", p_cov="p_cov", st_var=None, ast_var=None, + rst_var=None, + rast_var=None, conf_ints=None, mc_sample_size=100, ci_avg_time_flag1=False, @@ -2995,31 +2972,29 @@ def average_single_ended( ci_avg_x_flag2=False, ci_avg_x_sel=None, ci_avg_x_isel=None, - var_only_sections=None, da_random_state=None, mc_remove_set_flag=True, reduce_memory_usage=False, **kwargs, ): """ - Average temperatures from single-ended setups. + Average temperatures from double-ended setups. Four types of averaging are implemented. Please see Example Notebook 16. - Parameters ---------- p_val : array-like, optional Define `p_val`, `p_var`, `p_cov` if you used an external function - for calibration. Has size 2 + `nt`. First value is :math:`\gamma`, - second is :math:`\Delta \\alpha`, others are :math:`C` for each + for calibration. Has size 2 + `nt`. First value is :math:`\\gamma`, + second is :math:`\\Delta \\alpha`, others are :math:`C` for each timestep. If set to False, no uncertainty in the parameters is propagated into the confidence intervals. Similar to the spec sheets of the DTS manufacturers. And similar to passing an array filled with zeros p_cov : array-like, optional The covariances of `p_val`. - st_var, ast_var : float, callable, array-like, optional + st_var, ast_var, rst_var, rast_var : float, callable, array-like, optional The variance of the measurement noise of the Stokes signals in the forward direction. If `float` the variance of the noise from the Stokes detector is described with a single value. @@ -3103,12 +3078,6 @@ def average_single_ended( ci_avg_x_isel : iterable of int Compute ci_avg_time_flag1 and ci_avg_time_flag2 using only a selection of the data - var_only_sections : bool - useful if using the ci_avg_x_flag. Only calculates the var over the - sections, so that the values can be compared with accuracy along the - reference sections. Where the accuracy is the variance of the - residuals between the estimated temperature and temperature of the - water baths. da_random_state For testing purposes. Similar to random seed. The seed for dask. Makes random not so random. To produce reproducable results for @@ -3123,16 +3092,62 @@ def average_single_ended( ------- """ - self.check_deprecated_kwargs(kwargs) - if var_only_sections is not None: - raise NotImplementedError() + def create_da_ta2(no, i_splice, direction="fw", chunks=None): + """create mask array mc, o, nt""" - self.conf_int_single_ended( + if direction == "fw": + arr = da.concatenate( + ( + da.zeros( + (1, i_splice, 1), chunks=((1, i_splice, 1)), dtype=bool + ), + da.ones( + (1, no - i_splice, 1), + chunks=(1, no - i_splice, 1), + dtype=bool, + ), + ), + axis=1, + ).rechunk((1, chunks[1], 1)) + else: + arr = da.concatenate( + ( + da.ones((1, i_splice, 1), chunks=(1, i_splice, 1), dtype=bool), + da.zeros( + (1, no - i_splice, 1), + chunks=((1, no - i_splice, 1)), + dtype=bool, + ), + ), + axis=1, + ).rechunk((1, chunks[1], 1)) + return arr + + check_deprecated_kwargs(kwargs) + + if (ci_avg_x_flag1 or ci_avg_x_flag2) and ( + ci_avg_time_flag1 or ci_avg_time_flag2 + ): + raise NotImplementedError( + "Incompatible flags. Can not pick " "the right chunks" + ) + + elif not ( + ci_avg_x_flag1 or ci_avg_x_flag2 or ci_avg_time_flag1 or ci_avg_time_flag2 + ): + raise NotImplementedError("Pick one of the averaging options") + + else: + pass + + self.conf_int_double_ended( p_val=p_val, p_cov=p_cov, st_var=st_var, ast_var=ast_var, + rst_var=rst_var, + rast_var=rast_var, conf_ints=None, mc_sample_size=mc_sample_size, da_random_state=da_random_state, @@ -3141,208 +3156,344 @@ def average_single_ended( **kwargs, ) - if ci_avg_time_sel is not None: - time_dim2 = "time" + "_avg" - x_dim2 = "x" - self.coords[time_dim2] = ( - (time_dim2,), - self["time"].sel(**{"time": ci_avg_time_sel}).data, - ) - self["tmpf_avgsec"] = ( - ("x", time_dim2), - self["tmpf"].sel(**{"time": ci_avg_time_sel}).data, - ) - self["tmpf_mc_set"] = ( - ("mc", "x", time_dim2), - self["tmpf" + "_mc_set"].sel(**{"time": ci_avg_time_sel}).data, - ) - - elif ci_avg_time_isel is not None: - time_dim2 = "time" + "_avg" - x_dim2 = "x" - self.coords[time_dim2] = ( - (time_dim2,), - self["time"].isel(**{"time": ci_avg_time_isel}).data, - ) - self["tmpf_avgsec"] = ( - ("x", time_dim2), - self["tmpf"].isel(**{"time": ci_avg_time_isel}).data, - ) - self["tmpf_mc_set"] = ( - ("mc", "x", time_dim2), - self["tmpf" + "_mc_set"].isel(**{"time": ci_avg_time_isel}).data, - ) + for label in ["tmpf", "tmpb"]: + if ci_avg_time_sel is not None: + time_dim2 = "time" + "_avg" + x_dim2 = "x" + self.coords[time_dim2] = ( + (time_dim2,), + self["time"].sel(**{"time": ci_avg_time_sel}).data, + ) + self[label + "_avgsec"] = ( + ("x", time_dim2), + self[label].sel(**{"time": ci_avg_time_sel}).data, + ) + self[label + "_mc_set"] = ( + ("mc", "x", time_dim2), + self[label + "_mc_set"].sel(**{"time": ci_avg_time_sel}).data, + ) - elif ci_avg_x_sel is not None: - time_dim2 = "time" - x_dim2 = "x_avg" - self.coords[x_dim2] = ((x_dim2,), self.x.sel(x=ci_avg_x_sel).data) - self["tmpf_avgsec"] = ( - (x_dim2, "time"), - self["tmpf"].sel(x=ci_avg_x_sel).data, - ) - self["tmpf_mc_set"] = ( - ("mc", x_dim2, "time"), - self["tmpf_mc_set"].sel(x=ci_avg_x_sel).data, - ) + elif ci_avg_time_isel is not None: + time_dim2 = "time" + "_avg" + x_dim2 = "x" + self.coords[time_dim2] = ( + (time_dim2,), + self["time"].isel(**{"time": ci_avg_time_isel}).data, + ) + self[label + "_avgsec"] = ( + ("x", time_dim2), + self[label].isel(**{"time": ci_avg_time_isel}).data, + ) + self[label + "_mc_set"] = ( + ("mc", "x", time_dim2), + self[label + "_mc_set"].isel(**{"time": ci_avg_time_isel}).data, + ) - elif ci_avg_x_isel is not None: - time_dim2 = "time" - x_dim2 = "x_avg" - self.coords[x_dim2] = ((x_dim2,), self.x.isel(x=ci_avg_x_isel).data) - self["tmpf_avgsec"] = ( - (x_dim2, time_dim2), - self["tmpf"].isel(x=ci_avg_x_isel).data, - ) - self["tmpf_mc_set"] = ( - ("mc", x_dim2, time_dim2), - self["tmpf_mc_set"].isel(x=ci_avg_x_isel).data, - ) - else: - self["tmpf_avgsec"] = self["tmpf"] - x_dim2 = "x" - time_dim2 = "time" + elif ci_avg_x_sel is not None: + time_dim2 = "time" + x_dim2 = "x_avg" + self.coords[x_dim2] = ((x_dim2,), self.x.sel(x=ci_avg_x_sel).data) + self[label + "_avgsec"] = ( + (x_dim2, "time"), + self[label].sel(x=ci_avg_x_sel).data, + ) + self[label + "_mc_set"] = ( + ("mc", x_dim2, "time"), + self[label + "_mc_set"].sel(x=ci_avg_x_sel).data, + ) - # subtract the mean temperature - q = self["tmpf_mc_set"] - self["tmpf_avgsec"] - self["tmpf_mc" + "_avgsec_var"] = q.var(dim="mc", ddof=1) + elif ci_avg_x_isel is not None: + time_dim2 = "time" + x_dim2 = "x_avg" + self.coords[x_dim2] = ((x_dim2,), self.x.isel(x=ci_avg_x_isel).data) + self[label + "_avgsec"] = ( + (x_dim2, time_dim2), + self[label].isel(x=ci_avg_x_isel).data, + ) + self[label + "_mc_set"] = ( + ("mc", x_dim2, time_dim2), + self[label + "_mc_set"].isel(x=ci_avg_x_isel).data, + ) + else: + self[label + "_avgsec"] = self[label] + x_dim2 = "x" + time_dim2 = "time" - if ci_avg_x_flag1: - # unweighted mean - self["tmpf_avgx1"] = self["tmpf" + "_avgsec"].mean(dim=x_dim2) + memchunk = self[label + "_mc_set"].chunks - q = self["tmpf_mc_set"] - self["tmpf_avgsec"] - qvar = q.var(dim=["mc", x_dim2], ddof=1) - self["tmpf_mc_avgx1_var"] = qvar + # subtract the mean temperature + q = self[label + "_mc_set"] - self[label + "_avgsec"] + self[label + "_mc" + "_avgsec_var"] = q.var(dim="mc", ddof=1) - if conf_ints: - new_chunks = (len(conf_ints), self["tmpf_mc_set"].chunks[2]) - avg_axis = self["tmpf_mc_set"].get_axis_num(["mc", x_dim2]) - q = self["tmpf_mc_set"].data.map_blocks( - lambda x: np.percentile(x, q=conf_ints, axis=avg_axis), - chunks=new_chunks, # - drop_axis=avg_axis, - # avg dimensions are dropped from input arr - new_axis=0, - ) # The new CI dim is added as firsaxis + if ci_avg_x_flag1: + # unweighted mean + self[label + "_avgx1"] = self[label + "_avgsec"].mean(dim=x_dim2) - self["tmpf_mc_avgx1"] = (("CI", time_dim2), q) + q = self[label + "_mc_set"] - self[label + "_avgsec"] + qvar = q.var(dim=["mc", x_dim2], ddof=1) + self[label + "_mc_avgx1_var"] = qvar - if ci_avg_x_flag2: - q = self["tmpf_mc_set"] - self["tmpf_avgsec"] + if conf_ints: + new_chunks = (len(conf_ints), self[label + "_mc_set"].chunks[2]) + avg_axis = self[label + "_mc_set"].get_axis_num(["mc", x_dim2]) + q = self[label + "_mc_set"].data.map_blocks( + lambda x: np.percentile(x, q=conf_ints, axis=avg_axis), + chunks=new_chunks, # + drop_axis=avg_axis, + # avg dimensions are dropped from input arr + new_axis=0, + ) # The new CI dim is added as firsaxis - qvar = q.var(dim=["mc"], ddof=1) + self[label + "_mc_avgx1"] = (("CI", time_dim2), q) - # Inverse-variance weighting - avg_x_var = 1 / (1 / qvar).sum(dim=x_dim2) + if ci_avg_x_flag2: + q = self[label + "_mc_set"] - self[label + "_avgsec"] - self["tmpf_mc_avgx2_var"] = avg_x_var + qvar = q.var(dim=["mc"], ddof=1) - self["tmpf" + "_mc_avgx2_set"] = (self["tmpf_mc_set"] / qvar).sum( - dim=x_dim2 - ) * avg_x_var - self["tmpf" + "_avgx2"] = self["tmpf" + "_mc_avgx2_set"].mean(dim="mc") + # Inverse-variance weighting + avg_x_var = 1 / (1 / qvar).sum(dim=x_dim2) - if conf_ints: - new_chunks = (len(conf_ints), self["tmpf_mc_set"].chunks[2]) - avg_axis_avgx = self["tmpf_mc_set"].get_axis_num("mc") + self[label + "_mc_avgx2_var"] = avg_x_var - qq = self["tmpf_mc_avgx2_set"].data.map_blocks( - lambda x: np.percentile(x, q=conf_ints, axis=avg_axis_avgx), - chunks=new_chunks, # - drop_axis=avg_axis_avgx, - # avg dimensions are dropped from input arr - new_axis=0, - dtype=float, - ) # The new CI dimension is added as - # firsaxis - self["tmpf_mc_avgx2"] = (("CI", time_dim2), qq) + self[label + "_mc_avgx2_set"] = (self[label + "_mc_set"] / qvar).sum( + dim=x_dim2 + ) * avg_x_var + self[label + "_avgx2"] = self[label + "_mc_avgx2_set"].mean(dim="mc") - if ci_avg_time_flag1 is not None: - # unweighted mean - self["tmpf_avg1"] = self["tmpf" + "_avgsec"].mean(dim=time_dim2) + if conf_ints: + new_chunks = (len(conf_ints), self[label + "_mc_set"].chunks[2]) + avg_axis_avgx = self[label + "_mc_set"].get_axis_num("mc") - q = self["tmpf_mc_set"] - self["tmpf_avgsec"] - qvar = q.var(dim=["mc", time_dim2], ddof=1) - self["tmpf_mc_avg1_var"] = qvar + qq = self[label + "_mc_avgx2_set"].data.map_blocks( + lambda x: np.percentile(x, q=conf_ints, axis=avg_axis_avgx), + chunks=new_chunks, # + drop_axis=avg_axis_avgx, + # avg dimensions are dropped from input arr + new_axis=0, + dtype=float, + ) # The new CI dimension is added as + # firsaxis + self[label + "_mc_avgx2"] = (("CI", time_dim2), qq) + + if ci_avg_time_flag1 is not None: + # unweighted mean + self[label + "_avg1"] = self[label + "_avgsec"].mean(dim=time_dim2) + + q = self[label + "_mc_set"] - self[label + "_avgsec"] + qvar = q.var(dim=["mc", time_dim2], ddof=1) + self[label + "_mc_avg1_var"] = qvar + + if conf_ints: + new_chunks = (len(conf_ints), self[label + "_mc_set"].chunks[1]) + avg_axis = self[label + "_mc_set"].get_axis_num(["mc", time_dim2]) + q = self[label + "_mc_set"].data.map_blocks( + lambda x: np.percentile(x, q=conf_ints, axis=avg_axis), + chunks=new_chunks, # + drop_axis=avg_axis, + # avg dimensions are dropped from input arr + new_axis=0, + ) # The new CI dim is added as firsaxis + + self[label + "_mc_avg1"] = (("CI", x_dim2), q) + + if ci_avg_time_flag2: + q = self[label + "_mc_set"] - self[label + "_avgsec"] + + qvar = q.var(dim=["mc"], ddof=1) + + # Inverse-variance weighting + avg_time_var = 1 / (1 / qvar).sum(dim=time_dim2) + + self[label + "_mc_avg2_var"] = avg_time_var + + self[label + "_mc_avg2_set"] = (self[label + "_mc_set"] / qvar).sum( + dim=time_dim2 + ) * avg_time_var + self[label + "_avg2"] = self[label + "_mc_avg2_set"].mean(dim="mc") + + if conf_ints: + new_chunks = (len(conf_ints), self[label + "_mc_set"].chunks[1]) + avg_axis_avg2 = self[label + "_mc_set"].get_axis_num("mc") + + qq = self[label + "_mc_avg2_set"].data.map_blocks( + lambda x: np.percentile(x, q=conf_ints, axis=avg_axis_avg2), + chunks=new_chunks, # + drop_axis=avg_axis_avg2, + # avg dimensions are dropped from input arr + new_axis=0, + dtype=float, + ) # The new CI dimension is added as + # firsaxis + self[label + "_mc_avg2"] = (("CI", x_dim2), qq) + + # Weighted mean of the forward and backward + tmpw_var = 1 / ( + 1 / self["tmpf_mc" + "_avgsec_var"] + 1 / self["tmpb_mc" + "_avgsec_var"] + ) + + q = ( + self["tmpf_mc_set"] / self["tmpf_mc" + "_avgsec_var"] + + self["tmpb_mc_set"] / self["tmpb_mc" + "_avgsec_var"] + ) * tmpw_var + + self["tmpw" + "_mc_set"] = q # + + # self["tmpw"] = self["tmpw" + '_mc_set'].mean(dim='mc') + self["tmpw" + "_avgsec"] = ( + self["tmpf_avgsec"] / self["tmpf_mc" + "_avgsec_var"] + + self["tmpb_avgsec"] / self["tmpb_mc" + "_avgsec_var"] + ) * tmpw_var + + q = self["tmpw" + "_mc_set"] - self["tmpw" + "_avgsec"] + self["tmpw" + "_mc" + "_avgsec_var"] = q.var(dim="mc", ddof=1) + + if ci_avg_time_flag1: + self["tmpw" + "_avg1"] = self["tmpw" + "_avgsec"].mean(dim=time_dim2) + + self["tmpw" + "_mc_avg1_var"] = self["tmpw" + "_mc_set"].var( + dim=["mc", time_dim2] + ) if conf_ints: - new_chunks = (len(conf_ints), self["tmpf_mc_set"].chunks[1]) - avg_axis = self["tmpf_mc_set"].get_axis_num(["mc", time_dim2]) - q = self["tmpf_mc_set"].data.map_blocks( + new_chunks_weighted = ((len(conf_ints),),) + (memchunk[1],) + avg_axis = self["tmpw" + "_mc_set"].get_axis_num(["mc", time_dim2]) + q2 = self["tmpw" + "_mc_set"].data.map_blocks( lambda x: np.percentile(x, q=conf_ints, axis=avg_axis), - chunks=new_chunks, # - drop_axis=avg_axis, - # avg dimensions are dropped from input arr + chunks=new_chunks_weighted, + # Explicitly define output chunks + drop_axis=avg_axis, # avg dimensions are dropped new_axis=0, - ) # The new CI dim is added as firsaxis - - self["tmpf_mc_avg1"] = (("CI", x_dim2), q) + dtype=float, + ) # The new CI dimension is added as + # first axis + self["tmpw" + "_mc_avg1"] = (("CI", x_dim2), q2) if ci_avg_time_flag2: - q = self["tmpf_mc_set"] - self["tmpf_avgsec"] + tmpw_var_avg2 = 1 / ( + 1 / self["tmpf_mc_avg2_var"] + 1 / self["tmpb_mc_avg2_var"] + ) - qvar = q.var(dim=["mc"], ddof=1) + q = ( + self["tmpf_mc_avg2_set"] / self["tmpf_mc_avg2_var"] + + self["tmpb_mc_avg2_set"] / self["tmpb_mc_avg2_var"] + ) * tmpw_var_avg2 - # Inverse-variance weighting - avg_time_var = 1 / (1 / qvar).sum(dim=time_dim2) + self["tmpw" + "_mc_avg2_set"] = q # - self["tmpf_mc_avg2_var"] = avg_time_var + self["tmpw" + "_avg2"] = ( + self["tmpf_avg2"] / self["tmpf_mc_avg2_var"] + + self["tmpb_avg2"] / self["tmpb_mc_avg2_var"] + ) * tmpw_var_avg2 - self["tmpf" + "_mc_avg2_set"] = (self["tmpf_mc_set"] / qvar).sum( - dim=time_dim2 - ) * avg_time_var - self["tmpf_avg2"] = self["tmpf" + "_mc_avg2_set"].mean(dim="mc") + self["tmpw" + "_mc_avg2_var"] = tmpw_var_avg2 if conf_ints: - new_chunks = (len(conf_ints), self["tmpf_mc_set"].chunks[1]) - avg_axis_avg2 = self["tmpf_mc_set"].get_axis_num("mc") - - qq = self["tmpf_mc_avg2_set"].data.map_blocks( + # We first need to know the x-dim-chunk-size + new_chunks_weighted = ((len(conf_ints),),) + (memchunk[1],) + avg_axis_avg2 = self["tmpw" + "_mc_avg2_set"].get_axis_num("mc") + q2 = self["tmpw" + "_mc_avg2_set"].data.map_blocks( lambda x: np.percentile(x, q=conf_ints, axis=avg_axis_avg2), - chunks=new_chunks, # - drop_axis=avg_axis_avg2, - # avg dimensions are dropped from input arr + chunks=new_chunks_weighted, + # Explicitly define output chunks + drop_axis=avg_axis_avg2, # avg dimensions are dropped + new_axis=0, + dtype=float, + ) # The new CI dimension is added as firstax + self["tmpw" + "_mc_avg2"] = (("CI", x_dim2), q2) + + if ci_avg_x_flag1: + self["tmpw" + "_avgx1"] = self["tmpw" + "_avgsec"].mean(dim=x_dim2) + + self["tmpw" + "_mc_avgx1_var"] = self["tmpw" + "_mc_set"].var(dim=x_dim2) + + if conf_ints: + new_chunks_weighted = ((len(conf_ints),),) + (memchunk[2],) + avg_axis = self["tmpw" + "_mc_set"].get_axis_num(["mc", x_dim2]) + q2 = self["tmpw" + "_mc_set"].data.map_blocks( + lambda x: np.percentile(x, q=conf_ints, axis=avg_axis), + chunks=new_chunks_weighted, + # Explicitly define output chunks + drop_axis=avg_axis, # avg dimensions are dropped new_axis=0, dtype=float, ) # The new CI dimension is added as - # firsaxis - self["tmpf_mc_avg2"] = (("CI", x_dim2), qq) + # first axis + self["tmpw" + "_mc_avgx1"] = (("CI", time_dim2), q2) + + if ci_avg_x_flag2: + tmpw_var_avgx2 = 1 / ( + 1 / self["tmpf_mc_avgx2_var"] + 1 / self["tmpb_mc_avgx2_var"] + ) + + q = ( + self["tmpf_mc_avgx2_set"] / self["tmpf_mc_avgx2_var"] + + self["tmpb_mc_avgx2_set"] / self["tmpb_mc_avgx2_var"] + ) * tmpw_var_avgx2 + + self["tmpw" + "_mc_avgx2_set"] = q # + + self["tmpw" + "_avgx2"] = ( + self["tmpf_avgx2"] / self["tmpf_mc_avgx2_var"] + + self["tmpb_avgx2"] / self["tmpb_mc_avgx2_var"] + ) * tmpw_var_avgx2 + + self["tmpw" + "_mc_avgx2_var"] = tmpw_var_avgx2 + + if conf_ints: + # We first need to know the x-dim-chunk-size + new_chunks_weighted = ((len(conf_ints),),) + (memchunk[2],) + avg_axis_avgx2 = self["tmpw" + "_mc_avgx2_set"].get_axis_num("mc") + q2 = self["tmpw" + "_mc_avgx2_set"].data.map_blocks( + lambda x: np.percentile(x, q=conf_ints, axis=avg_axis_avgx2), + chunks=new_chunks_weighted, + # Explicitly define output chunks + drop_axis=avg_axis_avgx2, # avg dimensions are dropped + new_axis=0, + dtype=float, + ) # The new CI dimension is added as firstax + self["tmpw" + "_mc_avgx2"] = (("CI", time_dim2), q2) + # Clean up the garbage. All arrays with a Monte Carlo dimension. if mc_remove_set_flag: remove_mc_set = [ "r_st", "r_ast", + "r_rst", + "r_rast", "gamma_mc", - "dalpha_mc", - "c_mc", + "alpha_mc", + "df_mc", + "db_mc", "x_avg", "time_avg", "mc", - "ta_mc_arr", ] - remove_mc_set.append("tmpf_avgsec") - remove_mc_set.append("tmpf_mc_set") - remove_mc_set.append("tmpf_mc_avg2_set") - remove_mc_set.append("tmpf_mc_avgx2_set") - remove_mc_set.append("tmpf_mc_avgsec_var") + + for i in ["tmpf", "tmpb", "tmpw"]: + remove_mc_set.append(i + "_avgsec") + remove_mc_set.append(i + "_mc_set") + remove_mc_set.append(i + "_mc_avg2_set") + remove_mc_set.append(i + "_mc_avgx2_set") + remove_mc_set.append(i + "_mc_avgsec_var") + + if self.trans_att.size: + remove_mc_set.append('talpha"_fw_mc') + remove_mc_set.append('talpha"_bw_mc') for k in remove_mc_set: if k in self: del self[k] pass - def conf_int_double_ended( + def conf_int_single_ended( self, p_val="p_val", p_cov="p_cov", st_var=None, ast_var=None, - rst_var=None, - rast_var=None, conf_ints=None, mc_sample_size=100, - var_only_sections=False, da_random_state=None, mc_remove_set_flag=True, reduce_memory_usage=False, @@ -3350,84 +3501,37 @@ def conf_int_double_ended( ): r""" Estimation of the confidence intervals for the temperatures measured - with a double-ended setup. - Double-ended setups require four additional steps to estimate the - confidence intervals for the temperature. First, the variances of the - Stokes and anti-Stokes intensity measurements of the forward and - backward channels are estimated following the steps in - Section 4 [1]_. See `ds.variance_stokes_constant()`. - A Normal distribution is assigned to each - intensity measurement that is centered at the measurement and using the - estimated variance. Second, a multi-variate Normal distribution is - assigned to the estimated parameters using the covariance matrix from - the calibration procedure presented in Section 6 [1]_ (`p_cov`). Third, - Normal distributions are assigned for :math:`A` (`ds.alpha`) - for each location - outside of the reference sections. These distributions are centered - around :math:`A_p` and have variance :math:`\sigma^2\left[A_p\\right]` - given by Equations 44 and 45. Fourth, the distributions are sampled - and :math:`T_{\mathrm{F},m,n}` and :math:`T_{\mathrm{B},m,n}` are - computed with Equations 16 and 17, respectively. Fifth, step four is repeated to - compute, e.g., 10,000 realizations (`mc_sample_size`) of :math:`T_{\mathrm{F},m,n}` and - :math:`T_{\mathrm{B},m,n}` to approximate their probability density - functions. Sixth, the standard uncertainties of - :math:`T_{\mathrm{F},m,n}` and :math:`T_{\mathrm{B},m,n}` - (:math:`\sigma\left[T_{\mathrm{F},m,n}\\right]` and - :math:`\sigma\left[T_{\mathrm{B},m,n}\\right]`) are estimated with the - standard deviation of their realizations. Seventh, for each realization - :math:`i` the temperature :math:`T_{m,n,i}` is computed as the weighted - average of :math:`T_{\mathrm{F},m,n,i}` and - :math:`T_{\mathrm{B},m,n,i}`: - - .. math:: - - T_{m,n,i} =\ - \sigma^2\left[T_{m,n}\\right]\left({\\frac{T_{\mathrm{F},m,n,i}}{\ - \sigma^2\left[T_{\mathrm{F},m,n}\\right]} +\ - \\frac{T_{\mathrm{B},m,n,i}}{\ - \sigma^2\left[T_{\mathrm{B},m,n}\\right]}}\\right) - - where - - .. math:: - - \sigma^2\left[T_{m,n}\\right] = \\frac{1}{1 /\ - \sigma^2\left[T_{\mathrm{F},m,n}\\right] + 1 /\ - \sigma^2\left[T_{\mathrm{B},m,n}\\right]} - - The best estimate of the temperature :math:`T_{m,n}` is computed - directly from the best estimates of :math:`T_{\mathrm{F},m,n}` and - :math:`T_{\mathrm{B},m,n}` as: - - .. math:: - T_{m,n} =\ - \sigma^2\left[T_{m,n}\\right]\left({\\frac{T_{\mathrm{F},m,n}}{\ - \sigma^2\left[T_{\mathrm{F},m,n}\\right]} + \\frac{T_{\mathrm{B},m,n}}{\ - \sigma^2\left[T_{\mathrm{B},m,n}\\right]}}\\right) - - Alternatively, the best estimate of :math:`T_{m,n}` can be approximated - with the mean of the :math:`T_{m,n,i}` values. Finally, the 95\% - confidence interval for :math:`T_{m,n}` are estimated with the 2.5\% and - 97.5\% percentiles of :math:`T_{m,n,i}`. + with a single-ended setup. It consists of five steps. First, the variances + of the Stokes and anti-Stokes intensity measurements are estimated + following the steps in Section 4 [1]_. A Normal + distribution is assigned to each intensity measurement that is centered + at the measurement and using the estimated variance. Second, a multi- + variate Normal distribution is assigned to the estimated parameters + using the covariance matrix from the calibration procedure presented in + Section 5 [1]_. Third, the distributions are sampled, and the + temperature is computed with Equation 12 [1]_. Fourth, step + three is repeated, e.g., 10,000 times for each location and for each + time. The resulting 10,000 realizations of the temperatures + approximate the probability density functions of the estimated + temperature at that location and time. Fifth, the standard uncertainties + are computed with the standard deviations of the realizations of the + temperatures, and the 95\% confidence intervals are computed from the + 2.5\% and 97.5\% percentiles of the realizations of the temperatures. - Assumes sections are set. Parameters ---------- p_val : array-like, optional Define `p_val`, `p_var`, `p_cov` if you used an external function - for calibration. Has size `1 + 2 * nt + nx + 2 * nt * nta`. - First value is :math:`\gamma`, then `nt` times - :math:`D_\mathrm{F}`, then `nt` times - :math:`D_\mathrm{B}`, then for each location :math:`D_\mathrm{B}`, - then for each connector that introduces directional attenuation two - parameters per time step. - p_cov : array-like, optional - The covariances of `p_val`. Square matrix. + for calibration. Has size 2 + `nt`. First value is :math:`\gamma`, + second is :math:`\Delta \\alpha`, others are :math:`C` for each + timestep. If set to False, no uncertainty in the parameters is propagated into the confidence intervals. Similar to the spec sheets of the DTS - manufacturers. And similar to passing an array filled with zeros. - st_var, ast_var, rst_var, rast_var : float, callable, array-like, optional + manufacturers. And similar to passing an array filled with zeros + p_cov : array-like, optional + The covariances of `p_val`. + st_var, ast_var : float, callable, array-like, optional The variance of the measurement noise of the Stokes signals in the forward direction. If `float` the variance of the noise from the Stokes detector is described with a single value. @@ -3438,16 +3542,11 @@ def conf_int_double_ended( x. Required if method is wls. conf_ints : iterable object of float A list with the confidence boundaries that are calculated. Valid - values are between [0, 1]. + values are between + [0, 1]. mc_sample_size : int Size of the monte carlo parameter set used to calculate the confidence interval - var_only_sections : bool - useful if using the ci_avg_x_flag. Only calculates the var over the - sections, so that the values can be compared with accuracy along the - reference sections. Where the accuracy is the variance of the - residuals between the estimated temperature and temperature of the - water baths. da_random_state For testing purposes. Similar to random seed. The seed for dask. Makes random not so random. To produce reproducable results for @@ -3458,8 +3557,6 @@ def conf_int_double_ended( reduce_memory_usage : bool Use less memory but at the expense of longer computation time - Returns - ------- References ---------- @@ -3467,61 +3564,61 @@ def conf_int_double_ended( of Temperature and Associated Uncertainty from Fiber-Optic Raman- Spectrum Distributed Temperature Sensing. Sensors, 20(8), 2235. https://doi.org/10.3390/s20082235 - """ - - def create_da_ta2(no, i_splice, direction="fw", chunks=None): - """create mask array mc, o, nt""" - - if direction == "fw": - arr = da.concatenate( - ( - da.zeros( - (1, i_splice, 1), chunks=((1, i_splice, 1)), dtype=bool - ), - da.ones( - (1, no - i_splice, 1), - chunks=(1, no - i_splice, 1), - dtype=bool, - ), - ), - axis=1, - ).rechunk((1, chunks[1], 1)) - else: - arr = da.concatenate( - ( - da.ones((1, i_splice, 1), chunks=(1, i_splice, 1), dtype=bool), - da.zeros( - (1, no - i_splice, 1), - chunks=((1, no - i_splice, 1)), - dtype=bool, - ), - ), - axis=1, - ).rechunk((1, chunks[1], 1)) - return arr - - self.check_deprecated_kwargs(kwargs) + check_deprecated_kwargs(kwargs) if da_random_state: - # In testing environments - assert isinstance(da_random_state, da.random.RandomState) state = da_random_state else: state = da.random.RandomState() + no, nt = self.st.data.shape + if "trans_att" in self.keys(): + nta = self.trans_att.size + else: + nta = 0 + + assert isinstance(p_val, (str, np.ndarray, np.generic)) + if isinstance(p_val, str): + p_val = self[p_val].data + + npar = p_val.size + + # number of parameters + if npar == nt + 2 + nt * nta: + fixed_alpha = False + elif npar == 1 + no + nt + nt * nta: + fixed_alpha = True + else: + raise Exception("The size of `p_val` is not what I expected") + + self.coords["mc"] = range(mc_sample_size) + if conf_ints: - assert "tmpw", ( - "Current implementation requires you to " - 'define "tmpw" when estimating confidence ' - "intervals" - ) + self.coords["CI"] = conf_ints - no, nt = self.st.shape - nta = self.trans_att.size - npar = 1 + 2 * nt + no + nt * 2 * nta # number of parameters + # WLS + if isinstance(p_cov, str): + p_cov = self[p_cov].data + assert p_cov.shape == (npar, npar) - rsize = (mc_sample_size, no, nt) + p_mc = sst.multivariate_normal.rvs(mean=p_val, cov=p_cov, size=mc_sample_size) + + if fixed_alpha: + self["alpha_mc"] = (("mc", "x"), p_mc[:, 1 : no + 1]) + self["c_mc"] = (("mc", "time"), p_mc[:, 1 + no : 1 + no + nt]) + else: + self["dalpha_mc"] = (("mc",), p_mc[:, 1]) + self["c_mc"] = (("mc", "time"), p_mc[:, 2 : nt + 2]) + + self["gamma_mc"] = (("mc",), p_mc[:, 0]) + if nta: + self["ta_mc"] = ( + ("mc", "trans_att", "time"), + np.reshape(p_mc[:, -nt * nta :], (mc_sample_size, nta, nt)), + ) + + rsize = (self.mc.size, self.x.size, self.time.size) if reduce_memory_usage: memchunk = da.ones( @@ -3532,355 +3629,224 @@ def create_da_ta2(no, i_splice, direction="fw", chunks=None): (mc_sample_size, no, nt), chunks={0: -1, 1: "auto", 2: "auto"} ).chunks - self.coords["mc"] = range(mc_sample_size) - if conf_ints: - self.coords["CI"] = conf_ints + # Draw from the normal distributions for the Stokes intensities + for k, st_labeli, st_vari in zip( + ["r_st", "r_ast"], ["st", "ast"], [st_var, ast_var] + ): + # Load the mean as chunked Dask array, otherwise eats memory + if type(self[st_labeli].data) == da.core.Array: + loc = da.asarray(self[st_labeli].data, chunks=memchunk[1:]) + else: + loc = da.from_array(self[st_labeli].data, chunks=memchunk[1:]) - assert isinstance(p_val, (str, np.ndarray, np.generic)) - if isinstance(p_val, str): - p_val = self[p_val].values - assert p_val.shape == (npar,), ( - "Did you set 'talpha' as " - "keyword argument of the " - "conf_int_double_ended() function?" - ) + # Make sure variance is of size (no, nt) + if np.size(st_vari) > 1: + if st_vari.shape == self[st_labeli].shape: + pass + else: + st_vari = np.broadcast_to(st_vari, (no, nt)) + else: + pass - assert isinstance(p_cov, (str, np.ndarray, np.generic, bool)) + # Load variance as chunked Dask array, otherwise eats memory + if type(st_vari) == da.core.Array: + st_vari_da = da.asarray(st_vari, chunks=memchunk[1:]) - if isinstance(p_cov, bool) and not p_cov: - # Exclude parameter uncertainty if p_cov == False - gamma = p_val[0] - d_fw = p_val[1 : nt + 1] - d_bw = p_val[1 + nt : 2 * nt + 1] - alpha = p_val[2 * nt + 1 : 2 * nt + 1 + no] + elif callable(st_vari) and type(self[st_labeli].data) == da.core.Array: + st_vari_da = da.asarray( + st_vari(self[st_labeli]).data, chunks=memchunk[1:] + ) - self["gamma_mc"] = (tuple(), gamma) - self["alpha_mc"] = (("x",), alpha) - self["df_mc"] = (("time",), d_fw) - self["db_mc"] = (("time",), d_bw) + elif callable(st_vari) and type(self[st_labeli].data) != da.core.Array: + st_vari_da = da.from_array( + st_vari(self[st_labeli]).data, chunks=memchunk[1:] + ) - if nta: - ta = p_val[2 * nt + 1 + no :].reshape((nt, 2, nta), order="F") - ta_fw = ta[:, 0, :] - ta_bw = ta[:, 1, :] + else: + st_vari_da = da.from_array(st_vari, chunks=memchunk[1:]) - ta_fw_arr = np.zeros((no, nt)) - for tai, taxi in zip(ta_fw.T, self.coords["trans_att"].values): - ta_fw_arr[self.x.values >= taxi] = ( - ta_fw_arr[self.x.values >= taxi] + tai - ) + self[k] = ( + ("mc", "x", "time"), + state.normal( + loc=loc, # has chunks=memchunk[1:] + scale=st_vari_da**0.5, + size=rsize, + chunks=memchunk, + ), + ) - ta_bw_arr = np.zeros((no, nt)) - for tai, taxi in zip(ta_bw.T, self.coords["trans_att"].values): - ta_bw_arr[self.x.values < taxi] = ( - ta_bw_arr[self.x.values < taxi] + tai + ta_arr = np.zeros((mc_sample_size, no, nt)) + + if nta: + for ii, ta in enumerate(self["ta_mc"]): + for tai, taxi in zip(ta.values, self.trans_att.values): + ta_arr[ii, self.x.values >= taxi] = ( + ta_arr[ii, self.x.values >= taxi] + tai ) + self["ta_mc_arr"] = (("mc", "x", "time"), ta_arr) - self["talpha_fw_mc"] = (("x", "time"), ta_fw_arr) - self["talpha_bw_mc"] = (("x", "time"), ta_bw_arr) - - elif isinstance(p_cov, bool) and p_cov: - raise NotImplementedError("Not an implemented option. Check p_cov argument") - - else: - # WLS - if isinstance(p_cov, str): - p_cov = self[p_cov].values - assert p_cov.shape == (npar, npar) - - ix_sec = self.ufunc_per_section(x_indices=True, calc_per="all") - nx_sec = ix_sec.size - from_i = np.concatenate( - ( - np.arange(1 + 2 * nt), - 1 + 2 * nt + ix_sec, - np.arange(1 + 2 * nt + no, 1 + 2 * nt + no + nt * 2 * nta), + if fixed_alpha: + self["tmpf_mc_set"] = ( + self["gamma_mc"] + / ( + ( + np.log(self["r_st"]) + - np.log(self["r_ast"]) + + (self["c_mc"] + self["ta_mc_arr"]) + ) + + self["alpha_mc"] ) + - 273.15 ) - iox_sec1, iox_sec2 = np.meshgrid(from_i, from_i, indexing="ij") - po_val = p_val[from_i] - po_cov = p_cov[iox_sec1, iox_sec2] - - po_mc = sst.multivariate_normal.rvs( - mean=po_val, cov=po_cov, size=mc_sample_size - ) - - gamma = po_mc[:, 0] - d_fw = po_mc[:, 1 : nt + 1] - d_bw = po_mc[:, 1 + nt : 2 * nt + 1] - - self["gamma_mc"] = (("mc",), gamma) - self["df_mc"] = (("mc", "time"), d_fw) - self["db_mc"] = (("mc", "time"), d_bw) - - # calculate alpha seperately - alpha = np.zeros((mc_sample_size, no), dtype=float) - alpha[:, ix_sec] = po_mc[:, 1 + 2 * nt : 1 + 2 * nt + nx_sec] - - not_ix_sec = np.array([i for i in range(no) if i not in ix_sec]) - - if np.any(not_ix_sec): - not_alpha_val = p_val[2 * nt + 1 + not_ix_sec] - not_alpha_var = p_cov[2 * nt + 1 + not_ix_sec, 2 * nt + 1 + not_ix_sec] - - not_alpha_mc = np.random.normal( - loc=not_alpha_val, - scale=not_alpha_var**0.5, - size=(mc_sample_size, not_alpha_val.size), - ) - - alpha[:, not_ix_sec] = not_alpha_mc - - self["alpha_mc"] = (("mc", "x"), alpha) - - if nta: - ta = po_mc[:, 2 * nt + 1 + nx_sec :].reshape( - (mc_sample_size, nt, 2, nta), order="F" - ) - ta_fw = ta[:, :, 0, :] - ta_bw = ta[:, :, 1, :] - - ta_fw_arr = da.zeros( - (mc_sample_size, no, nt), chunks=memchunk, dtype=float - ) - for tai, taxi in zip( - ta_fw.swapaxes(0, 2), self.coords["trans_att"].values - ): - # iterate over the splices - i_splice = sum(self.x.values < taxi) - mask = create_da_ta2(no, i_splice, direction="fw", chunks=memchunk) - - ta_fw_arr += mask * tai.T[:, None, :] - - ta_bw_arr = da.zeros( - (mc_sample_size, no, nt), chunks=memchunk, dtype=float - ) - for tai, taxi in zip( - ta_bw.swapaxes(0, 2), self.coords["trans_att"].values - ): - i_splice = sum(self.x.values < taxi) - mask = create_da_ta2(no, i_splice, direction="bw", chunks=memchunk) - - ta_bw_arr += mask * tai.T[:, None, :] - - self["talpha_fw_mc"] = (("mc", "x", "time"), ta_fw_arr) - self["talpha_bw_mc"] = (("mc", "x", "time"), ta_bw_arr) - - # Draw from the normal distributions for the Stokes intensities - for k, st_labeli, st_vari in zip( - ["r_st", "r_ast", "r_rst", "r_rast"], - ["st", "ast", "rst", "rast"], - [st_var, ast_var, rst_var, rast_var], - ): - # Load the mean as chunked Dask array, otherwise eats memory - if type(self[st_labeli].data) == da.core.Array: - loc = da.asarray(self[st_labeli].data, chunks=memchunk[1:]) - else: - loc = da.from_array(self[st_labeli].data, chunks=memchunk[1:]) - - # Make sure variance is of size (no, nt) - if np.size(st_vari) > 1: - if st_vari.shape == self[st_labeli].shape: - pass - else: - st_vari = np.broadcast_to(st_vari, (no, nt)) - else: - pass - - # Load variance as chunked Dask array, otherwise eats memory - if type(st_vari) == da.core.Array: - st_vari_da = da.asarray(st_vari, chunks=memchunk[1:]) - - elif callable(st_vari) and type(self[st_labeli].data) == da.core.Array: - st_vari_da = da.asarray( - st_vari(self[st_labeli]).data, chunks=memchunk[1:] - ) - - elif callable(st_vari) and type(self[st_labeli].data) != da.core.Array: - st_vari_da = da.from_array( - st_vari(self[st_labeli]).data, chunks=memchunk[1:] + else: + self["tmpf_mc_set"] = ( + self["gamma_mc"] + / ( + ( + np.log(self["r_st"]) + - np.log(self["r_ast"]) + + (self["c_mc"] + self["ta_mc_arr"]) + ) + + (self["dalpha_mc"] * self.x) ) + - 273.15 + ) - else: - st_vari_da = da.from_array(st_vari, chunks=memchunk[1:]) + avg_dims = ["mc"] - self[k] = ( - ("mc", "x", "time"), - state.normal( - loc=loc, # has chunks=memchunk[1:] - scale=st_vari_da**0.5, - size=rsize, - chunks=memchunk, - ), - ) + avg_axis = self["tmpf_mc_set"].get_axis_num(avg_dims) - for label in ["tmpf", "tmpb"]: - if "tmpw" or label: - if label == "tmpf": - if nta: - self["tmpf_mc_set"] = ( - self["gamma_mc"] - / ( - np.log(self["r_st"] / self["r_ast"]) - + self["df_mc"] - + self["alpha_mc"] - + self["talpha_fw_mc"] - ) - - 273.15 - ) - else: - self["tmpf_mc_set"] = ( - self["gamma_mc"] - / ( - np.log(self["r_st"] / self["r_ast"]) - + self["df_mc"] - + self["alpha_mc"] - ) - - 273.15 - ) - else: - if nta: - self["tmpb_mc_set"] = ( - self["gamma_mc"] - / ( - np.log(self["r_rst"] / self["r_rast"]) - + self["db_mc"] - - self["alpha_mc"] - + self["talpha_bw_mc"] - ) - - 273.15 - ) - else: - self["tmpb_mc_set"] = ( - self["gamma_mc"] - / ( - np.log(self["r_rst"] / self["r_rast"]) - + self["db_mc"] - - self["alpha_mc"] - ) - - 273.15 - ) + self["tmpf_mc_var"] = (self["tmpf_mc_set"] - self["tmpf"]).var( + dim=avg_dims, ddof=1 + ) - if var_only_sections: - # sets the values outside the reference sections to NaN - xi = self.ufunc_per_section(x_indices=True, calc_per="all") - x_mask_ = [True if ix in xi else False for ix in range(self.x.size)] - x_mask = np.reshape(x_mask_, (1, -1, 1)) - self[label + "_mc_set"] = self[label + "_mc_set"].where(x_mask) + if conf_ints: + new_chunks = ((len(conf_ints),),) + self["tmpf_mc_set"].chunks[1:] - # subtract the mean temperature - q = self[label + "_mc_set"] - self[label] - self[label + "_mc_var"] = q.var(dim="mc", ddof=1) + qq = self["tmpf_mc_set"] - if conf_ints: - new_chunks = list(self[label + "_mc_set"].chunks) - new_chunks[0] = (len(conf_ints),) - avg_axis = self[label + "_mc_set"].get_axis_num("mc") - q = self[label + "_mc_set"].data.map_blocks( - lambda x: np.percentile(x, q=conf_ints, axis=avg_axis), - chunks=new_chunks, # - drop_axis=avg_axis, - # avg dimensions are dropped from input arr - new_axis=0, - ) # The new CI dimension is added as firsaxis + q = qq.data.map_blocks( + lambda x: np.percentile(x, q=conf_ints, axis=avg_axis), + chunks=new_chunks, # + drop_axis=avg_axis, # avg dimesnions are dropped from input arr + new_axis=0, + ) # The new CI dimension is added as first axis - self[label + "_mc"] = (("CI", "x", "time"), q) + self["tmpf_mc"] = (("CI", "x", "time"), q) - # Weighted mean of the forward and backward - tmpw_var = 1 / (1 / self["tmpf_mc_var"] + 1 / self["tmpb_mc_var"]) + if mc_remove_set_flag: + drop_var = [ + "gamma_mc", + "dalpha_mc", + "alpha_mc", + "c_mc", + "mc", + "r_st", + "r_ast", + "tmpf_mc_set", + "ta_mc_arr", + ] + for k in drop_var: + if k in self: + del self[k] - q = ( - self["tmpf_mc_set"] / self["tmpf_mc_var"] - + self["tmpb_mc_set"] / self["tmpb_mc_var"] - ) * tmpw_var + pass - self["tmpw" + "_mc_set"] = q # + def conf_int_double_ended( + self, + p_val="p_val", + p_cov="p_cov", + st_var=None, + ast_var=None, + rst_var=None, + rast_var=None, + conf_ints=None, + mc_sample_size=100, + var_only_sections=False, + da_random_state=None, + mc_remove_set_flag=True, + reduce_memory_usage=False, + **kwargs, + ): + r""" + Estimation of the confidence intervals for the temperatures measured + with a double-ended setup. + Double-ended setups require four additional steps to estimate the + confidence intervals for the temperature. First, the variances of the + Stokes and anti-Stokes intensity measurements of the forward and + backward channels are estimated following the steps in + Section 4 [1]_. See `ds.variance_stokes_constant()`. + A Normal distribution is assigned to each + intensity measurement that is centered at the measurement and using the + estimated variance. Second, a multi-variate Normal distribution is + assigned to the estimated parameters using the covariance matrix from + the calibration procedure presented in Section 6 [1]_ (`p_cov`). Third, + Normal distributions are assigned for :math:`A` (`ds.alpha`) + for each location + outside of the reference sections. These distributions are centered + around :math:`A_p` and have variance :math:`\sigma^2\left[A_p\\right]` + given by Equations 44 and 45. Fourth, the distributions are sampled + and :math:`T_{\mathrm{F},m,n}` and :math:`T_{\mathrm{B},m,n}` are + computed with Equations 16 and 17, respectively. Fifth, step four is repeated to + compute, e.g., 10,000 realizations (`mc_sample_size`) of :math:`T_{\mathrm{F},m,n}` and + :math:`T_{\mathrm{B},m,n}` to approximate their probability density + functions. Sixth, the standard uncertainties of + :math:`T_{\mathrm{F},m,n}` and :math:`T_{\mathrm{B},m,n}` + (:math:`\sigma\left[T_{\mathrm{F},m,n}\\right]` and + :math:`\sigma\left[T_{\mathrm{B},m,n}\\right]`) are estimated with the + standard deviation of their realizations. Seventh, for each realization + :math:`i` the temperature :math:`T_{m,n,i}` is computed as the weighted + average of :math:`T_{\mathrm{F},m,n,i}` and + :math:`T_{\mathrm{B},m,n,i}`: - self["tmpw"] = ( - self["tmpf"] / self["tmpf_mc_var"] + self["tmpb"] / self["tmpb_mc_var"] - ) * tmpw_var + .. math:: - q = self["tmpw" + "_mc_set"] - self["tmpw"] - self["tmpw" + "_mc_var"] = q.var(dim="mc", ddof=1) + T_{m,n,i} =\ + \sigma^2\left[T_{m,n}\\right]\left({\\frac{T_{\mathrm{F},m,n,i}}{\ + \sigma^2\left[T_{\mathrm{F},m,n}\\right]} +\ + \\frac{T_{\mathrm{B},m,n,i}}{\ + \sigma^2\left[T_{\mathrm{B},m,n}\\right]}}\\right) - # Calculate the CI of the weighted MC_set - if conf_ints: - new_chunks_weighted = ((len(conf_ints),),) + memchunk[1:] - avg_axis = self["tmpw" + "_mc_set"].get_axis_num("mc") - q2 = self["tmpw" + "_mc_set"].data.map_blocks( - lambda x: np.percentile(x, q=conf_ints, axis=avg_axis), - chunks=new_chunks_weighted, # Explicitly define output chunks - drop_axis=avg_axis, # avg dimensions are dropped - new_axis=0, - dtype=float, - ) # The new CI dimension is added as first axis - self["tmpw" + "_mc"] = (("CI", "x", "time"), q2) + where - # Clean up the garbage. All arrays with a Monte Carlo dimension. - if mc_remove_set_flag: - remove_mc_set = [ - "r_st", - "r_ast", - "r_rst", - "r_rast", - "gamma_mc", - "alpha_mc", - "df_mc", - "db_mc", - ] + .. math:: - for i in ["tmpf", "tmpb", "tmpw"]: - remove_mc_set.append(i + "_mc_set") + \sigma^2\left[T_{m,n}\\right] = \\frac{1}{1 /\ + \sigma^2\left[T_{\mathrm{F},m,n}\\right] + 1 /\ + \sigma^2\left[T_{\mathrm{B},m,n}\\right]} - if nta: - remove_mc_set.append('talpha"_fw_mc') - remove_mc_set.append('talpha"_bw_mc') + The best estimate of the temperature :math:`T_{m,n}` is computed + directly from the best estimates of :math:`T_{\mathrm{F},m,n}` and + :math:`T_{\mathrm{B},m,n}` as: - for k in remove_mc_set: - if k in self: - del self[k] - pass + .. math:: + T_{m,n} =\ + \sigma^2\left[T_{m,n}\\right]\left({\\frac{T_{\mathrm{F},m,n}}{\ + \sigma^2\left[T_{\mathrm{F},m,n}\\right]} + \\frac{T_{\mathrm{B},m,n}}{\ + \sigma^2\left[T_{\mathrm{B},m,n}\\right]}}\\right) - def average_double_ended( - self, - p_val="p_val", - p_cov="p_cov", - st_var=None, - ast_var=None, - rst_var=None, - rast_var=None, - conf_ints=None, - mc_sample_size=100, - ci_avg_time_flag1=False, - ci_avg_time_flag2=False, - ci_avg_time_sel=None, - ci_avg_time_isel=None, - ci_avg_x_flag1=False, - ci_avg_x_flag2=False, - ci_avg_x_sel=None, - ci_avg_x_isel=None, - da_random_state=None, - mc_remove_set_flag=True, - reduce_memory_usage=False, - **kwargs, - ): - """ - Average temperatures from double-ended setups. + Alternatively, the best estimate of :math:`T_{m,n}` can be approximated + with the mean of the :math:`T_{m,n,i}` values. Finally, the 95\% + confidence interval for :math:`T_{m,n}` are estimated with the 2.5\% and + 97.5\% percentiles of :math:`T_{m,n,i}`. - Four types of averaging are implemented. Please see Example Notebook 16. + Assumes sections are set. Parameters ---------- p_val : array-like, optional Define `p_val`, `p_var`, `p_cov` if you used an external function - for calibration. Has size 2 + `nt`. First value is :math:`\\gamma`, - second is :math:`\\Delta \\alpha`, others are :math:`C` for each - timestep. + for calibration. Has size `1 + 2 * nt + nx + 2 * nt * nta`. + First value is :math:`\gamma`, then `nt` times + :math:`D_\mathrm{F}`, then `nt` times + :math:`D_\mathrm{B}`, then for each location :math:`D_\mathrm{B}`, + then for each connector that introduces directional attenuation two + parameters per time step. + p_cov : array-like, optional + The covariances of `p_val`. Square matrix. If set to False, no uncertainty in the parameters is propagated into the confidence intervals. Similar to the spec sheets of the DTS - manufacturers. And similar to passing an array filled with zeros - p_cov : array-like, optional - The covariances of `p_val`. + manufacturers. And similar to passing an array filled with zeros. st_var, ast_var, rst_var, rast_var : float, callable, array-like, optional The variance of the measurement noise of the Stokes signals in the forward direction. If `float` the variance of the noise from the @@ -3892,79 +3858,16 @@ def average_double_ended( x. Required if method is wls. conf_ints : iterable object of float A list with the confidence boundaries that are calculated. Valid - values are between - [0, 1]. + values are between [0, 1]. mc_sample_size : int Size of the monte carlo parameter set used to calculate the confidence interval - ci_avg_time_flag1 : bool - The confidence intervals differ each time step. Assumes the - temperature varies during the measurement period. Computes the - arithmic temporal mean. If you would like to know the confidence - interfal of: - (1) a single additional measurement. So you can state "if another - measurement were to be taken, it would have this ci" - (2) all measurements. So you can state "The temperature remained - during the entire measurement period between these ci bounds". - Adds "tmpw" + '_avg1' and "tmpw" + '_mc_avg1_var' to the - DataStore. If `conf_ints` are set, also the confidence intervals - `_mc_avg1` are added to the DataStore. Works independently of the - ci_avg_time_flag2 and ci_avg_x_flag. - ci_avg_time_flag2 : bool - The confidence intervals differ each time step. Assumes the - temperature remains constant during the measurement period. - Computes the inverse-variance-weighted-temporal-mean temperature - and its uncertainty. - If you would like to know the confidence interfal of: - (1) I want to estimate a background temperature with confidence - intervals. I hereby assume the temperature does not change over - time and average all measurements to get a better estimate of the - background temperature. - Adds "tmpw" + '_avg2' and "tmpw" + '_mc_avg2_var' to the - DataStore. If `conf_ints` are set, also the confidence intervals - `_mc_avg2` are added to the DataStore. Works independently of the - ci_avg_time_flag1 and ci_avg_x_flag. - ci_avg_time_sel : slice - Compute ci_avg_time_flag1 and ci_avg_time_flag2 using only a - selection of the data - ci_avg_time_isel : iterable of int - Compute ci_avg_time_flag1 and ci_avg_time_flag2 using only a - selection of the data - ci_avg_x_flag1 : bool - The confidence intervals differ at each location. Assumes the - temperature varies over `x` and over time. Computes the - arithmic spatial mean. If you would like to know the confidence - interfal of: - (1) a single additional measurement location. So you can state "if - another measurement location were to be taken, - it would have this ci" - (2) all measurement locations. So you can state "The temperature - along the fiber remained between these ci bounds". - Adds "tmpw" + '_avgx1' and "tmpw" + '_mc_avgx1_var' to the - DataStore. If `conf_ints` are set, also the confidence intervals - `_mc_avgx1` are added to the DataStore. Works independently of the - ci_avg_time_flag1, ci_avg_time_flag2 and ci_avg_x2_flag. - ci_avg_x_flag2 : bool - The confidence intervals differ at each location. Assumes the - temperature is the same at each location but varies over time. - Computes the inverse-variance-weighted-spatial-mean temperature - and its uncertainty. - If you would like to know the confidence interfal of: - (1) I have put a lot of fiber in water, and I know that the - temperature variation in the water is much smaller than along - other parts of the fiber. And I would like to average the - measurements from multiple locations to improve the estimated - temperature. - Adds "tmpw" + '_avg2' and "tmpw" + '_mc_avg2_var' to the - DataStore. If `conf_ints` are set, also the confidence intervals - `_mc_avg2` are added to the DataStore. Works independently of the - ci_avg_time_flag1 and ci_avg_x_flag. - ci_avg_x_sel : slice - Compute ci_avg_time_flag1 and ci_avg_time_flag2 using only a - selection of the data - ci_avg_x_isel : iterable of int - Compute ci_avg_time_flag1 and ci_avg_time_flag2 using only a - selection of the data + var_only_sections : bool + useful if using the ci_avg_x_flag. Only calculates the var over the + sections, so that the values can be compared with accuracy along the + reference sections. Where the accuracy is the variance of the + residuals between the estimated temperature and temperature of the + water baths. da_random_state For testing purposes. Similar to random seed. The seed for dask. Makes random not so random. To produce reproducable results for @@ -3978,6 +3881,13 @@ def average_double_ended( Returns ------- + References + ---------- + .. [1] des Tombe, B., Schilperoort, B., & Bakker, M. (2020). Estimation + of Temperature and Associated Uncertainty from Fiber-Optic Raman- + Spectrum Distributed Temperature Sensing. Sensors, 20(8), 2235. + https://doi.org/10.3390/s20082235 + """ def create_da_ta2(no, i_splice, direction="fw", chunks=None): @@ -4011,335 +3921,320 @@ def create_da_ta2(no, i_splice, direction="fw", chunks=None): ).rechunk((1, chunks[1], 1)) return arr - self.check_deprecated_kwargs(kwargs) + check_deprecated_kwargs(kwargs) + + if da_random_state: + # In testing environments + assert isinstance(da_random_state, da.random.RandomState) + state = da_random_state + else: + state = da.random.RandomState() + + if conf_ints: + assert "tmpw", ( + "Current implementation requires you to " + 'define "tmpw" when estimating confidence ' + "intervals" + ) + + no, nt = self.st.shape + nta = self.trans_att.size + npar = 1 + 2 * nt + no + nt * 2 * nta # number of parameters + + rsize = (mc_sample_size, no, nt) + + if reduce_memory_usage: + memchunk = da.ones( + (mc_sample_size, no, nt), chunks={0: -1, 1: 1, 2: "auto"} + ).chunks + else: + memchunk = da.ones( + (mc_sample_size, no, nt), chunks={0: -1, 1: "auto", 2: "auto"} + ).chunks + + self.coords["mc"] = range(mc_sample_size) + if conf_ints: + self.coords["CI"] = conf_ints + + assert isinstance(p_val, (str, np.ndarray, np.generic)) + if isinstance(p_val, str): + p_val = self[p_val].values + assert p_val.shape == (npar,), ( + "Did you set 'talpha' as " + "keyword argument of the " + "conf_int_double_ended() function?" + ) + + assert isinstance(p_cov, (str, np.ndarray, np.generic, bool)) + + if isinstance(p_cov, bool) and not p_cov: + # Exclude parameter uncertainty if p_cov == False + gamma = p_val[0] + d_fw = p_val[1 : nt + 1] + d_bw = p_val[1 + nt : 2 * nt + 1] + alpha = p_val[2 * nt + 1 : 2 * nt + 1 + no] + + self["gamma_mc"] = (tuple(), gamma) + self["alpha_mc"] = (("x",), alpha) + self["df_mc"] = (("time",), d_fw) + self["db_mc"] = (("time",), d_bw) + + if nta: + ta = p_val[2 * nt + 1 + no :].reshape((nt, 2, nta), order="F") + ta_fw = ta[:, 0, :] + ta_bw = ta[:, 1, :] + + ta_fw_arr = np.zeros((no, nt)) + for tai, taxi in zip(ta_fw.T, self.coords["trans_att"].values): + ta_fw_arr[self.x.values >= taxi] = ( + ta_fw_arr[self.x.values >= taxi] + tai + ) + + ta_bw_arr = np.zeros((no, nt)) + for tai, taxi in zip(ta_bw.T, self.coords["trans_att"].values): + ta_bw_arr[self.x.values < taxi] = ( + ta_bw_arr[self.x.values < taxi] + tai + ) + + self["talpha_fw_mc"] = (("x", "time"), ta_fw_arr) + self["talpha_bw_mc"] = (("x", "time"), ta_bw_arr) + + elif isinstance(p_cov, bool) and p_cov: + raise NotImplementedError("Not an implemented option. Check p_cov argument") + + else: + # WLS + if isinstance(p_cov, str): + p_cov = self[p_cov].values + assert p_cov.shape == (npar, npar) + + ix_sec = self.ufunc_per_section(x_indices=True, calc_per="all") + nx_sec = ix_sec.size + from_i = np.concatenate( + ( + np.arange(1 + 2 * nt), + 1 + 2 * nt + ix_sec, + np.arange(1 + 2 * nt + no, 1 + 2 * nt + no + nt * 2 * nta), + ) + ) + iox_sec1, iox_sec2 = np.meshgrid(from_i, from_i, indexing="ij") + po_val = p_val[from_i] + po_cov = p_cov[iox_sec1, iox_sec2] - if (ci_avg_x_flag1 or ci_avg_x_flag2) and ( - ci_avg_time_flag1 or ci_avg_time_flag2 - ): - raise NotImplementedError( - "Incompatible flags. Can not pick " "the right chunks" + po_mc = sst.multivariate_normal.rvs( + mean=po_val, cov=po_cov, size=mc_sample_size ) - elif not ( - ci_avg_x_flag1 or ci_avg_x_flag2 or ci_avg_time_flag1 or ci_avg_time_flag2 - ): - raise NotImplementedError("Pick one of the averaging options") + gamma = po_mc[:, 0] + d_fw = po_mc[:, 1 : nt + 1] + d_bw = po_mc[:, 1 + nt : 2 * nt + 1] - else: - pass + self["gamma_mc"] = (("mc",), gamma) + self["df_mc"] = (("mc", "time"), d_fw) + self["db_mc"] = (("mc", "time"), d_bw) - self.conf_int_double_ended( - p_val=p_val, - p_cov=p_cov, - st_var=st_var, - ast_var=ast_var, - rst_var=rst_var, - rast_var=rast_var, - conf_ints=None, - mc_sample_size=mc_sample_size, - da_random_state=da_random_state, - mc_remove_set_flag=False, - reduce_memory_usage=reduce_memory_usage, - **kwargs, - ) + # calculate alpha seperately + alpha = np.zeros((mc_sample_size, no), dtype=float) + alpha[:, ix_sec] = po_mc[:, 1 + 2 * nt : 1 + 2 * nt + nx_sec] - for label in ["tmpf", "tmpb"]: - if ci_avg_time_sel is not None: - time_dim2 = "time" + "_avg" - x_dim2 = "x" - self.coords[time_dim2] = ( - (time_dim2,), - self["time"].sel(**{"time": ci_avg_time_sel}).data, - ) - self[label + "_avgsec"] = ( - ("x", time_dim2), - self[label].sel(**{"time": ci_avg_time_sel}).data, - ) - self[label + "_mc_set"] = ( - ("mc", "x", time_dim2), - self[label + "_mc_set"].sel(**{"time": ci_avg_time_sel}).data, - ) + not_ix_sec = np.array([i for i in range(no) if i not in ix_sec]) - elif ci_avg_time_isel is not None: - time_dim2 = "time" + "_avg" - x_dim2 = "x" - self.coords[time_dim2] = ( - (time_dim2,), - self["time"].isel(**{"time": ci_avg_time_isel}).data, - ) - self[label + "_avgsec"] = ( - ("x", time_dim2), - self[label].isel(**{"time": ci_avg_time_isel}).data, - ) - self[label + "_mc_set"] = ( - ("mc", "x", time_dim2), - self[label + "_mc_set"].isel(**{"time": ci_avg_time_isel}).data, - ) + if np.any(not_ix_sec): + not_alpha_val = p_val[2 * nt + 1 + not_ix_sec] + not_alpha_var = p_cov[2 * nt + 1 + not_ix_sec, 2 * nt + 1 + not_ix_sec] - elif ci_avg_x_sel is not None: - time_dim2 = "time" - x_dim2 = "x_avg" - self.coords[x_dim2] = ((x_dim2,), self.x.sel(x=ci_avg_x_sel).data) - self[label + "_avgsec"] = ( - (x_dim2, "time"), - self[label].sel(x=ci_avg_x_sel).data, - ) - self[label + "_mc_set"] = ( - ("mc", x_dim2, "time"), - self[label + "_mc_set"].sel(x=ci_avg_x_sel).data, + not_alpha_mc = np.random.normal( + loc=not_alpha_val, + scale=not_alpha_var**0.5, + size=(mc_sample_size, not_alpha_val.size), ) - elif ci_avg_x_isel is not None: - time_dim2 = "time" - x_dim2 = "x_avg" - self.coords[x_dim2] = ((x_dim2,), self.x.isel(x=ci_avg_x_isel).data) - self[label + "_avgsec"] = ( - (x_dim2, time_dim2), - self[label].isel(x=ci_avg_x_isel).data, - ) - self[label + "_mc_set"] = ( - ("mc", x_dim2, time_dim2), - self[label + "_mc_set"].isel(x=ci_avg_x_isel).data, + alpha[:, not_ix_sec] = not_alpha_mc + + self["alpha_mc"] = (("mc", "x"), alpha) + + if nta: + ta = po_mc[:, 2 * nt + 1 + nx_sec :].reshape( + (mc_sample_size, nt, 2, nta), order="F" ) - else: - self[label + "_avgsec"] = self[label] - x_dim2 = "x" - time_dim2 = "time" + ta_fw = ta[:, :, 0, :] + ta_bw = ta[:, :, 1, :] - memchunk = self[label + "_mc_set"].chunks + ta_fw_arr = da.zeros( + (mc_sample_size, no, nt), chunks=memchunk, dtype=float + ) + for tai, taxi in zip( + ta_fw.swapaxes(0, 2), self.coords["trans_att"].values + ): + # iterate over the splices + i_splice = sum(self.x.values < taxi) + mask = create_da_ta2(no, i_splice, direction="fw", chunks=memchunk) - # subtract the mean temperature - q = self[label + "_mc_set"] - self[label + "_avgsec"] - self[label + "_mc" + "_avgsec_var"] = q.var(dim="mc", ddof=1) + ta_fw_arr += mask * tai.T[:, None, :] - if ci_avg_x_flag1: - # unweighted mean - self[label + "_avgx1"] = self[label + "_avgsec"].mean(dim=x_dim2) + ta_bw_arr = da.zeros( + (mc_sample_size, no, nt), chunks=memchunk, dtype=float + ) + for tai, taxi in zip( + ta_bw.swapaxes(0, 2), self.coords["trans_att"].values + ): + i_splice = sum(self.x.values < taxi) + mask = create_da_ta2(no, i_splice, direction="bw", chunks=memchunk) - q = self[label + "_mc_set"] - self[label + "_avgsec"] - qvar = q.var(dim=["mc", x_dim2], ddof=1) - self[label + "_mc_avgx1_var"] = qvar + ta_bw_arr += mask * tai.T[:, None, :] - if conf_ints: - new_chunks = (len(conf_ints), self[label + "_mc_set"].chunks[2]) - avg_axis = self[label + "_mc_set"].get_axis_num(["mc", x_dim2]) - q = self[label + "_mc_set"].data.map_blocks( - lambda x: np.percentile(x, q=conf_ints, axis=avg_axis), - chunks=new_chunks, # - drop_axis=avg_axis, - # avg dimensions are dropped from input arr - new_axis=0, - ) # The new CI dim is added as firsaxis + self["talpha_fw_mc"] = (("mc", "x", "time"), ta_fw_arr) + self["talpha_bw_mc"] = (("mc", "x", "time"), ta_bw_arr) - self[label + "_mc_avgx1"] = (("CI", time_dim2), q) + # Draw from the normal distributions for the Stokes intensities + for k, st_labeli, st_vari in zip( + ["r_st", "r_ast", "r_rst", "r_rast"], + ["st", "ast", "rst", "rast"], + [st_var, ast_var, rst_var, rast_var], + ): + # Load the mean as chunked Dask array, otherwise eats memory + if type(self[st_labeli].data) == da.core.Array: + loc = da.asarray(self[st_labeli].data, chunks=memchunk[1:]) + else: + loc = da.from_array(self[st_labeli].data, chunks=memchunk[1:]) - if ci_avg_x_flag2: - q = self[label + "_mc_set"] - self[label + "_avgsec"] + # Make sure variance is of size (no, nt) + if np.size(st_vari) > 1: + if st_vari.shape == self[st_labeli].shape: + pass + else: + st_vari = np.broadcast_to(st_vari, (no, nt)) + else: + pass - qvar = q.var(dim=["mc"], ddof=1) + # Load variance as chunked Dask array, otherwise eats memory + if type(st_vari) == da.core.Array: + st_vari_da = da.asarray(st_vari, chunks=memchunk[1:]) - # Inverse-variance weighting - avg_x_var = 1 / (1 / qvar).sum(dim=x_dim2) + elif callable(st_vari) and type(self[st_labeli].data) == da.core.Array: + st_vari_da = da.asarray( + st_vari(self[st_labeli]).data, chunks=memchunk[1:] + ) - self[label + "_mc_avgx2_var"] = avg_x_var + elif callable(st_vari) and type(self[st_labeli].data) != da.core.Array: + st_vari_da = da.from_array( + st_vari(self[st_labeli]).data, chunks=memchunk[1:] + ) - self[label + "_mc_avgx2_set"] = (self[label + "_mc_set"] / qvar).sum( - dim=x_dim2 - ) * avg_x_var - self[label + "_avgx2"] = self[label + "_mc_avgx2_set"].mean(dim="mc") + else: + st_vari_da = da.from_array(st_vari, chunks=memchunk[1:]) - if conf_ints: - new_chunks = (len(conf_ints), self[label + "_mc_set"].chunks[2]) - avg_axis_avgx = self[label + "_mc_set"].get_axis_num("mc") + self[k] = ( + ("mc", "x", "time"), + state.normal( + loc=loc, # has chunks=memchunk[1:] + scale=st_vari_da**0.5, + size=rsize, + chunks=memchunk, + ), + ) - qq = self[label + "_mc_avgx2_set"].data.map_blocks( - lambda x: np.percentile(x, q=conf_ints, axis=avg_axis_avgx), - chunks=new_chunks, # - drop_axis=avg_axis_avgx, - # avg dimensions are dropped from input arr - new_axis=0, - dtype=float, - ) # The new CI dimension is added as - # firsaxis - self[label + "_mc_avgx2"] = (("CI", time_dim2), qq) + for label in ["tmpf", "tmpb"]: + if "tmpw" or label: + if label == "tmpf": + if nta: + self["tmpf_mc_set"] = ( + self["gamma_mc"] + / ( + np.log(self["r_st"] / self["r_ast"]) + + self["df_mc"] + + self["alpha_mc"] + + self["talpha_fw_mc"] + ) + - 273.15 + ) + else: + self["tmpf_mc_set"] = ( + self["gamma_mc"] + / ( + np.log(self["r_st"] / self["r_ast"]) + + self["df_mc"] + + self["alpha_mc"] + ) + - 273.15 + ) + else: + if nta: + self["tmpb_mc_set"] = ( + self["gamma_mc"] + / ( + np.log(self["r_rst"] / self["r_rast"]) + + self["db_mc"] + - self["alpha_mc"] + + self["talpha_bw_mc"] + ) + - 273.15 + ) + else: + self["tmpb_mc_set"] = ( + self["gamma_mc"] + / ( + np.log(self["r_rst"] / self["r_rast"]) + + self["db_mc"] + - self["alpha_mc"] + ) + - 273.15 + ) - if ci_avg_time_flag1 is not None: - # unweighted mean - self[label + "_avg1"] = self[label + "_avgsec"].mean(dim=time_dim2) + if var_only_sections: + # sets the values outside the reference sections to NaN + xi = self.ufunc_per_section(x_indices=True, calc_per="all") + x_mask_ = [True if ix in xi else False for ix in range(self.x.size)] + x_mask = np.reshape(x_mask_, (1, -1, 1)) + self[label + "_mc_set"] = self[label + "_mc_set"].where(x_mask) - q = self[label + "_mc_set"] - self[label + "_avgsec"] - qvar = q.var(dim=["mc", time_dim2], ddof=1) - self[label + "_mc_avg1_var"] = qvar + # subtract the mean temperature + q = self[label + "_mc_set"] - self[label] + self[label + "_mc_var"] = q.var(dim="mc", ddof=1) if conf_ints: - new_chunks = (len(conf_ints), self[label + "_mc_set"].chunks[1]) - avg_axis = self[label + "_mc_set"].get_axis_num(["mc", time_dim2]) + new_chunks = list(self[label + "_mc_set"].chunks) + new_chunks[0] = (len(conf_ints),) + avg_axis = self[label + "_mc_set"].get_axis_num("mc") q = self[label + "_mc_set"].data.map_blocks( lambda x: np.percentile(x, q=conf_ints, axis=avg_axis), chunks=new_chunks, # drop_axis=avg_axis, # avg dimensions are dropped from input arr new_axis=0, - ) # The new CI dim is added as firsaxis - - self[label + "_mc_avg1"] = (("CI", x_dim2), q) - - if ci_avg_time_flag2: - q = self[label + "_mc_set"] - self[label + "_avgsec"] - - qvar = q.var(dim=["mc"], ddof=1) - - # Inverse-variance weighting - avg_time_var = 1 / (1 / qvar).sum(dim=time_dim2) - - self[label + "_mc_avg2_var"] = avg_time_var - - self[label + "_mc_avg2_set"] = (self[label + "_mc_set"] / qvar).sum( - dim=time_dim2 - ) * avg_time_var - self[label + "_avg2"] = self[label + "_mc_avg2_set"].mean(dim="mc") - - if conf_ints: - new_chunks = (len(conf_ints), self[label + "_mc_set"].chunks[1]) - avg_axis_avg2 = self[label + "_mc_set"].get_axis_num("mc") + ) # The new CI dimension is added as firsaxis - qq = self[label + "_mc_avg2_set"].data.map_blocks( - lambda x: np.percentile(x, q=conf_ints, axis=avg_axis_avg2), - chunks=new_chunks, # - drop_axis=avg_axis_avg2, - # avg dimensions are dropped from input arr - new_axis=0, - dtype=float, - ) # The new CI dimension is added as - # firsaxis - self[label + "_mc_avg2"] = (("CI", x_dim2), qq) + self[label + "_mc"] = (("CI", "x", "time"), q) # Weighted mean of the forward and backward - tmpw_var = 1 / ( - 1 / self["tmpf_mc" + "_avgsec_var"] + 1 / self["tmpb_mc" + "_avgsec_var"] - ) + tmpw_var = 1 / (1 / self["tmpf_mc_var"] + 1 / self["tmpb_mc_var"]) q = ( - self["tmpf_mc_set"] / self["tmpf_mc" + "_avgsec_var"] - + self["tmpb_mc_set"] / self["tmpb_mc" + "_avgsec_var"] + self["tmpf_mc_set"] / self["tmpf_mc_var"] + + self["tmpb_mc_set"] / self["tmpb_mc_var"] ) * tmpw_var self["tmpw" + "_mc_set"] = q # - # self["tmpw"] = self["tmpw" + '_mc_set'].mean(dim='mc') - self["tmpw" + "_avgsec"] = ( - self["tmpf_avgsec"] / self["tmpf_mc" + "_avgsec_var"] - + self["tmpb_avgsec"] / self["tmpb_mc" + "_avgsec_var"] + self["tmpw"] = ( + self["tmpf"] / self["tmpf_mc_var"] + self["tmpb"] / self["tmpb_mc_var"] ) * tmpw_var - q = self["tmpw" + "_mc_set"] - self["tmpw" + "_avgsec"] - self["tmpw" + "_mc" + "_avgsec_var"] = q.var(dim="mc", ddof=1) - - if ci_avg_time_flag1: - self["tmpw" + "_avg1"] = self["tmpw" + "_avgsec"].mean(dim=time_dim2) - - self["tmpw" + "_mc_avg1_var"] = self["tmpw" + "_mc_set"].var( - dim=["mc", time_dim2] - ) - - if conf_ints: - new_chunks_weighted = ((len(conf_ints),),) + (memchunk[1],) - avg_axis = self["tmpw" + "_mc_set"].get_axis_num(["mc", time_dim2]) - q2 = self["tmpw" + "_mc_set"].data.map_blocks( - lambda x: np.percentile(x, q=conf_ints, axis=avg_axis), - chunks=new_chunks_weighted, - # Explicitly define output chunks - drop_axis=avg_axis, # avg dimensions are dropped - new_axis=0, - dtype=float, - ) # The new CI dimension is added as - # first axis - self["tmpw" + "_mc_avg1"] = (("CI", x_dim2), q2) - - if ci_avg_time_flag2: - tmpw_var_avg2 = 1 / ( - 1 / self["tmpf_mc_avg2_var"] + 1 / self["tmpb_mc_avg2_var"] - ) - - q = ( - self["tmpf_mc_avg2_set"] / self["tmpf_mc_avg2_var"] - + self["tmpb_mc_avg2_set"] / self["tmpb_mc_avg2_var"] - ) * tmpw_var_avg2 - - self["tmpw" + "_mc_avg2_set"] = q # - - self["tmpw" + "_avg2"] = ( - self["tmpf_avg2"] / self["tmpf_mc_avg2_var"] - + self["tmpb_avg2"] / self["tmpb_mc_avg2_var"] - ) * tmpw_var_avg2 - - self["tmpw" + "_mc_avg2_var"] = tmpw_var_avg2 - - if conf_ints: - # We first need to know the x-dim-chunk-size - new_chunks_weighted = ((len(conf_ints),),) + (memchunk[1],) - avg_axis_avg2 = self["tmpw" + "_mc_avg2_set"].get_axis_num("mc") - q2 = self["tmpw" + "_mc_avg2_set"].data.map_blocks( - lambda x: np.percentile(x, q=conf_ints, axis=avg_axis_avg2), - chunks=new_chunks_weighted, - # Explicitly define output chunks - drop_axis=avg_axis_avg2, # avg dimensions are dropped - new_axis=0, - dtype=float, - ) # The new CI dimension is added as firstax - self["tmpw" + "_mc_avg2"] = (("CI", x_dim2), q2) - - if ci_avg_x_flag1: - self["tmpw" + "_avgx1"] = self["tmpw" + "_avgsec"].mean(dim=x_dim2) - - self["tmpw" + "_mc_avgx1_var"] = self["tmpw" + "_mc_set"].var(dim=x_dim2) - - if conf_ints: - new_chunks_weighted = ((len(conf_ints),),) + (memchunk[2],) - avg_axis = self["tmpw" + "_mc_set"].get_axis_num(["mc", x_dim2]) - q2 = self["tmpw" + "_mc_set"].data.map_blocks( - lambda x: np.percentile(x, q=conf_ints, axis=avg_axis), - chunks=new_chunks_weighted, - # Explicitly define output chunks - drop_axis=avg_axis, # avg dimensions are dropped - new_axis=0, - dtype=float, - ) # The new CI dimension is added as - # first axis - self["tmpw" + "_mc_avgx1"] = (("CI", time_dim2), q2) - - if ci_avg_x_flag2: - tmpw_var_avgx2 = 1 / ( - 1 / self["tmpf_mc_avgx2_var"] + 1 / self["tmpb_mc_avgx2_var"] - ) - - q = ( - self["tmpf_mc_avgx2_set"] / self["tmpf_mc_avgx2_var"] - + self["tmpb_mc_avgx2_set"] / self["tmpb_mc_avgx2_var"] - ) * tmpw_var_avgx2 - - self["tmpw" + "_mc_avgx2_set"] = q # - - self["tmpw" + "_avgx2"] = ( - self["tmpf_avgx2"] / self["tmpf_mc_avgx2_var"] - + self["tmpb_avgx2"] / self["tmpb_mc_avgx2_var"] - ) * tmpw_var_avgx2 - - self["tmpw" + "_mc_avgx2_var"] = tmpw_var_avgx2 + q = self["tmpw" + "_mc_set"] - self["tmpw"] + self["tmpw" + "_mc_var"] = q.var(dim="mc", ddof=1) - if conf_ints: - # We first need to know the x-dim-chunk-size - new_chunks_weighted = ((len(conf_ints),),) + (memchunk[2],) - avg_axis_avgx2 = self["tmpw" + "_mc_avgx2_set"].get_axis_num("mc") - q2 = self["tmpw" + "_mc_avgx2_set"].data.map_blocks( - lambda x: np.percentile(x, q=conf_ints, axis=avg_axis_avgx2), - chunks=new_chunks_weighted, - # Explicitly define output chunks - drop_axis=avg_axis_avgx2, # avg dimensions are dropped - new_axis=0, - dtype=float, - ) # The new CI dimension is added as firstax - self["tmpw" + "_mc_avgx2"] = (("CI", time_dim2), q2) + # Calculate the CI of the weighted MC_set + if conf_ints: + new_chunks_weighted = ((len(conf_ints),),) + memchunk[1:] + avg_axis = self["tmpw" + "_mc_set"].get_axis_num("mc") + q2 = self["tmpw" + "_mc_set"].data.map_blocks( + lambda x: np.percentile(x, q=conf_ints, axis=avg_axis), + chunks=new_chunks_weighted, # Explicitly define output chunks + drop_axis=avg_axis, # avg dimensions are dropped + new_axis=0, + dtype=float, + ) # The new CI dimension is added as first axis + self["tmpw" + "_mc"] = (("CI", "x", "time"), q2) # Clean up the garbage. All arrays with a Monte Carlo dimension. if mc_remove_set_flag: @@ -4352,19 +4247,12 @@ def create_da_ta2(no, i_splice, direction="fw", chunks=None): "alpha_mc", "df_mc", "db_mc", - "x_avg", - "time_avg", - "mc", ] for i in ["tmpf", "tmpb", "tmpw"]: - remove_mc_set.append(i + "_avgsec") remove_mc_set.append(i + "_mc_set") - remove_mc_set.append(i + "_mc_avg2_set") - remove_mc_set.append(i + "_mc_avgx2_set") - remove_mc_set.append(i + "_mc_avgsec_var") - if self.trans_att.size: + if nta: remove_mc_set.append('talpha"_fw_mc') remove_mc_set.append('talpha"_bw_mc') @@ -4373,6 +4261,52 @@ def create_da_ta2(no, i_splice, direction="fw", chunks=None): del self[k] pass + def in_confidence_interval(self, ci_label, conf_ints=None, sections=None): + """ + Returns an array with bools wether the temperature of the reference + sections are within the confidence intervals + + Parameters + ---------- + sections : Dict[str, List[slice]] + ci_label : str + The label of the data containing the confidence intervals. + conf_ints : Tuple + A tuple containing two floats between 0 and 1, representing the + levels between which the reference temperature should lay. + + Returns + ------- + + """ + if sections is None: + sections = self.sections + else: + sections = self.validate_sections(sections) + + if conf_ints is None: + conf_ints = self[ci_label].values + + assert len(conf_ints) == 2, "Please define conf_ints" + + tmp_dn = self[ci_label].sel(CI=conf_ints[0], method="nearest") + tmp_up = self[ci_label].sel(CI=conf_ints[1], method="nearest") + + ref = self.ufunc_per_section( + sections=sections, label="st", ref_temp_broadcasted=True, calc_per="all" + ) + ix_resid = self.ufunc_per_section( + sections=sections, x_indices=True, calc_per="all" + ) + ref_sorted = np.full(shape=tmp_dn.shape, fill_value=np.nan) + ref_sorted[ix_resid, :] = ref + ref_da = xr.DataArray(data=ref_sorted, coords=tmp_dn.coords) + + mask_dn = ref_da >= tmp_dn + mask_up = ref_da <= tmp_up + + return np.logical_and(mask_dn, mask_up) + def temperature_residuals(self, label=None, sections=None): """ Compute the temperature residuals, between the known temperature of the @@ -4572,4 +4506,7 @@ def ufunc_per_section( return out def resample_datastore(*args, **kwargs): - raise "ds.resample_datastore() is deprecated. Use from dtscalibration import DataStore; " "DataStore(ds.resample()) instead. See example notebook 2." + raise ( + "ds.resample_datastore() is deprecated. Use from dtscalibration import DataStore; " + "DataStore(ds.resample()) instead. See example notebook 2." + ) diff --git a/src/dtscalibration/datastore_utils.py b/src/dtscalibration/datastore_utils.py index 5afb4fa0..dc2daad3 100644 --- a/src/dtscalibration/datastore_utils.py +++ b/src/dtscalibration/datastore_utils.py @@ -55,1088 +55,1155 @@ def check_dims( ) -def get_netcdf_encoding( - ds: "DataStore", zlib: bool = True, complevel: int = 5, **kwargs -) -> dict: - """Get default netcdf compression parameters. The same for each data variable. +class ParameterIndexDoubleEnded: + """ + npar = 1 + 2 * nt + nx + 2 * nt * nta + assert pval.size == npar + assert p_var.size == npar + if calc_cov: + assert p_cov.shape == (npar, npar) + gamma = pval[0] + d_fw = pval[1:nt + 1] + d_bw = pval[1 + nt:1 + 2 * nt] + alpha = pval[1 + 2 * nt:1 + 2 * nt + nx] + # store calibration parameters in DataStore + self["gamma"] = (tuple(), gamma) + self["alpha"] = (('x',), alpha) + self["df"] = (('time',), d_fw) + self["db"] = (('time',), d_bw) + if nta > 0: + ta = pval[1 + 2 * nt + nx:].reshape((nt, 2, nta), order='F') + self['talpha_fw'] = (('time', 'trans_att'), ta[:, 0, :]) + self['talpha_bw'] = (('time', 'trans_att'), ta[:, 1, :]) + """ - TODO: Truncate precision to XML precision per data variable + def __init__(self, nt, nx, nta, fix_gamma=False, fix_alpha=False): + self.nt = nt + self.nx = nx + self.nta = nta + self.fix_gamma = fix_gamma + self.fix_alpha = fix_alpha + @property + def all(self): + return np.concatenate( + (self.gamma, self.df, self.db, self.alpha, self.ta.flatten(order="F")) + ) - Parameters - ---------- - zlib - complevel - ds : DataStore + @property + def npar(self): + if not self.fix_gamma and not self.fix_alpha: + return 1 + 2 * self.nt + self.nx + 2 * self.nt * self.nta + elif self.fix_gamma and not self.fix_alpha: + return 2 * self.nt + self.nx + 2 * self.nt * self.nta + elif not self.fix_gamma and self.fix_alpha: + return 1 + 2 * self.nt + 2 * self.nt * self.nta + elif self.fix_gamma and self.fix_alpha: + return 2 * self.nt + 2 * self.nt * self.nta - Returns - ------- - encoding: - Encoding dictionary. - """ - comp = dict(zlib=zlib, complevel=complevel) - comp.update(kwargs) - encoding = {var: comp for var in ds.data_vars} + @property + def gamma(self): + if self.fix_gamma: + return [] + else: + return [0] - return encoding + @property + def df(self): + if self.fix_gamma: + return list(range(self.nt)) + else: + return list(range(1, self.nt + 1)) + @property + def db(self): + if self.fix_gamma: + return list(range(self.nt, 2 * self.nt)) + else: + return list(range(1 + self.nt, 1 + 2 * self.nt)) -def check_timestep_allclose(ds: "DataStore", eps: float = 0.01) -> None: - """ - Check if all timesteps are of equal size. For now it is not possible to calibrate - over timesteps if the acquisition time of timesteps varies, as the Stokes variance - would change over time. + @property + def alpha(self): + if self.fix_alpha: + return [] + elif self.fix_gamma: + return list(range(2 * self.nt, 1 + 2 * self.nt + self.nx)) + elif not self.fix_gamma: + return list(range(1 + 2 * self.nt, 1 + 2 * self.nt + self.nx)) - The acquisition time is stored for single ended measurements in userAcquisitionTime, - for double ended measurements in userAcquisitionTimeFW and userAcquisitionTimeBW. + @property + def ta(self): + if self.nta == 0: + return np.zeros((self.nt, 2, 0)) + elif not self.fix_gamma and not self.fix_alpha: + arr = np.arange(1 + 2 * self.nt + self.nx, self.npar) + elif self.fix_gamma and not self.fix_alpha: + arr = np.arange(2 * self.nt + self.nx, self.npar) + elif not self.fix_gamma and self.fix_alpha: + arr = np.arange(1 + 2 * self.nt, self.npar) + elif self.fix_gamma and self.fix_alpha: + arr = np.arange(2 * self.nt, self.npar) - Parameters - ---------- - ds : DataStore - eps : float - Default accepts 1% of relative variation between min and max acquisition time. + return arr.reshape((self.nt, 2, self.nta), order="F") - Returns - ------- + @property + def taf(self): + """ + Use `.reshape((nt, nta))` to convert array to (time-dim and transatt-dim). Order is the default C order. + ta = pval[1 + 2 * nt + nx:].reshape((nt, 2, nta), order='F') + self['talpha_fw'] = (('time', 'trans_att'), ta[:, 0, :]) + """ + return self.ta[:, 0, :].flatten(order="C") - """ - dim = ds.channel_configuration["chfw"]["acquisitiontime_label"] - dt = ds[dim].data - dtmin = dt.min() - dtmax = dt.max() - dtavg = (dtmin + dtmax) / 2 - assert (dtmax - dtmin) / dtavg < eps, ( - "Acquisition time is Forward channel not equal for all " "time steps" - ) + @property + def tab(self): + """ + Use `.reshape((nt, nta))` to convert array to (time-dim and transatt-dim). Order is the default C order. + ta = pval[1 + 2 * nt + nx:].reshape((nt, 2, nta), order='F') + self['talpha_bw'] = (('time', 'trans_att'), ta[:, 1, :]) + """ + return self.ta[:, 1, :].flatten(order="C") - if ds.is_double_ended: - dim = ds.channel_configuration["chbw"]["acquisitiontime_label"] - dt = ds[dim].data - dtmin = dt.min() - dtmax = dt.max() - dtavg = (dtmin + dtmax) / 2 - assert (dtmax - dtmin) / dtavg < eps, ( - "Acquisition time Backward channel is not equal " "for all time steps" - ) + def get_ta_pars(self, pval): + if self.nta > 0: + if pval.ndim == 1: + return np.take_along_axis(pval[None, None], self.ta, axis=-1) + else: + # assume shape is (a, npar) and returns shape (nt, 2, nta, a) + assert pval.shape[1] == self.npar and pval.ndim == 2 + return np.stack([self.get_ta_pars(v) for v in pval], axis=-1) -def merge_double_ended( - ds_fw: "DataStore", - ds_bw: "DataStore", - cable_length: float, - plot_result: bool = True, - verbose: bool = True, -) -> "DataStore": - """ - Some measurements are not set up on the DTS-device as double-ended - meausurements. This means that the two channels have to be merged manually. + else: + return np.zeros(shape=(self.nt, 2, 0)) - This function can merge two single-ended DataStore objects into a single - double-ended DataStore. There is no interpolation, the two arrays are - flipped and overlayed, based on the entered cable length. This can - introduce spatial inaccuracies with extremely long cables. + def get_taf_pars(self, pval): + """returns taf parameters of shape (nt, nta) or (nt, nta, a)""" + return self.get_ta_pars(pval=pval)[:, 0, :] - Parameters - ---------- - ds_fw : DataSore object - DataStore object representing the forward measurement channel - ds_bw : DataSore object - DataStore object representing the backward measurement channel - cable_length : float - Manually estimated cable length to base alignment on - plot_result : bool - Plot the aligned Stokes of the forward and backward channels + def get_tab_pars(self, pval): + """returns taf parameters of shape (nt, nta) or (nt, nta, a)""" + return self.get_ta_pars(pval=pval)[:, 1, :] - Returns - ------- - ds : DataStore object - With the two channels merged - """ - assert ( - ds_fw.attrs["isDoubleEnded"] == "0" and ds_bw.attrs["isDoubleEnded"] == "0" - ), "(one of the) input DataStores is already double ended" + def get_taf_values(self, pval, x, trans_att, axis=""): + """returns taf parameters of shape (nx, nt)""" + pval = np.atleast_2d(pval) - ds_fw, ds_bw = merge_double_ended_times(ds_fw, ds_bw, verbose=verbose) + assert pval.ndim == 2 and pval.shape[1] == self.npar - ds = ds_fw.copy() - ds_bw = ds_bw.copy() + arr_out = np.zeros((self.nx, self.nt)) - ds_bw["x"] = cable_length - ds_bw["x"].values + if self.nta == 0: + pass - # TODO: check if reindexing matters, and should be used. - # one way to do it is performed below, but this could create artifacts - x_resolution = ds["x"].values[1] - ds["x"].values[0] - ds_bw = ds_bw.reindex( - {"x": ds["x"]}, method="nearest", tolerance=0.99 * x_resolution - ) + elif axis == "": + assert pval.shape[0] == 1 - ds_bw = ds_bw.sortby("x") + arr = np.transpose(self.get_taf_pars(pval), axes=(1, 2, 0)) # (nta, 1, nt) - ds["rst"] = (["x", "time"], ds_bw["st"].values) - ds["rast"] = (["x", "time"], ds_bw["ast"].values) + for tai, taxi in zip(arr, trans_att): + arr_out[x >= taxi] += tai - ds = ds.dropna(dim="x") + elif axis == "x": + assert pval.shape[0] == self.nx - ds.attrs["isDoubleEnded"] = "1" - ds["userAcquisitionTimeBW"] = ("time", ds_bw["userAcquisitionTimeFW"].values) + arr = np.transpose(self.get_taf_pars(pval), axes=(1, 2, 0)) # (nta, nx, nt) - if plot_result: - _, ax = plt.subplots() - ds["st"].isel(time=0).plot(ax=ax, label="Stokes forward") - ds["rst"].isel(time=0).plot(ax=ax, label="Stokes backward") - ax.legend() + for tai, taxi in zip(arr, trans_att): + # loop over nta, tai has shape (x, t) + arr_out[x >= taxi] += tai[x >= taxi] - return ds + elif axis == "time": + assert pval.shape[0] == self.nt + # arr (nt, nta, nt) to have shape (nta, nt, nt) + arr = np.transpose(self.get_taf_pars(pval), axes=(1, 2, 0)) -def merge_double_ended_times( - ds_fw: "DataStore", - ds_bw: "DataStore", - verify_timedeltas: bool = True, - verbose: bool = True, -) -> tuple["DataStore", "DataStore"]: - """Helper for `merge_double_ended()` to deal with missing measurements. The - number of measurements of the forward and backward channels might get out - of sync if the device shuts down before the measurement of the last channel - is complete. This skips all measurements that are not accompanied by a partner - channel. + for tai, taxi in zip(arr, trans_att): + # loop over nta, tai has shape (t, t) + arr_out[x >= taxi] += np.diag(tai)[None] - Provides little protection against swapping fw and bw. + return arr_out - If all measurements are recorded: fw_t0, bw_t0, fw_t1, bw_t1, fw_t2, bw_t2, .. - > all are passed - - If some are missing the accompanying measurement is skipped: - - fw_t0, bw_t0, bw_t1, fw_t2, bw_t2, .. > fw_t0, bw_t0, fw_t2, bw_t2, .. - - fw_t0, bw_t0, fw_t1, fw_t2, bw_t2, .. > fw_t0, bw_t0, fw_t2, bw_t2, .. - - fw_t0, bw_t0, bw_t1, fw_t2, fw_t3, bw_t3, .. > fw_t0, bw_t0, fw_t3, bw_t3, - - Mixing forward and backward channels can be problematic when there is a pause - after measuring all channels. This function is not perfect as the following - situation is not caught: - - fw_t0, bw_t0, fw_t1, bw_t2, fw_t3, bw_t3, .. - > fw_t0, bw_t0, fw_t1, bw_t2, fw_t3, bw_t3, .. - - This routine checks that the lowest channel - number is measured first (aka the forward channel), but it doesn't catch the - last case as it doesn't know that fw_t1 and bw_t2 belong to different cycles. + def get_tab_values(self, pval, x, trans_att, axis=""): + """returns tab parameters of shape (nx, nt)""" + assert pval.shape[-1] == self.npar - Parameters - ---------- - ds_fw : DataSore object - DataStore object representing the forward measurement channel - ds_bw : DataSore object - DataStore object representing the backward measurement channel - verify_timedeltas : bool - Check whether times between forward and backward measurements are similar to - those of neighboring measurements - verbose : bool - Print additional information to screen + arr_out = np.zeros((self.nx, self.nt)) - Returns - ------- - ds_fw_sel : DataSore object - DataStore object representing the forward measurement channel with - only times for which there is also a ds_bw measurement - ds_bw_sel : DataSore object - DataStore object representing the backward measurement channel with - only times for which there is also a ds_fw measurement - """ - if "forward channel" in ds_fw.attrs and "forward channel" in ds_bw.attrs: - assert ( - ds_fw.attrs["forward channel"] < ds_bw.attrs["forward channel"] - ), "ds_fw and ds_bw are swapped" - elif ( - "forwardMeasurementChannel" in ds_fw.attrs - and "forwardMeasurementChannel" in ds_bw.attrs - ): - assert ( - ds_fw.attrs["forwardMeasurementChannel"] - < ds_bw.attrs["forwardMeasurementChannel"] - ), "ds_fw and ds_bw are swapped" + if self.nta == 0: + pass - # Are all dt's within 1.5 seconds from one another? - if (ds_bw.time.size == ds_fw.time.size) and np.all( - ds_bw.time.values > ds_fw.time.values - ): - if verify_timedeltas: - dt_ori = (ds_bw.time.values - ds_fw.time.values) / np.array( - 1, dtype="timedelta64[s]" - ) - dt_all_close = np.allclose(dt_ori, dt_ori[0], atol=1.5, rtol=0.0) - else: - dt_all_close = True + elif axis == "": + pval = np.squeeze(pval) + assert pval.ndim == 1 + arr = np.transpose(self.get_tab_pars(pval), axes=(1, 0)) - if dt_all_close: - return ds_fw, ds_bw + for tai, taxi in zip(arr, trans_att): + arr_out[x < taxi] += tai - iuse_chfw = list() - iuse_chbw = list() + elif axis == "x": + assert pval.ndim == 2 and pval.shape[0] == self.nx - times_fw = {k: ("fw", i) for i, k in enumerate(ds_fw.time.values)} - times_bw = {k: ("bw", i) for i, k in enumerate(ds_bw.time.values)} - times_all = dict(sorted(({**times_fw, **times_bw}).items())) - times_all_val = list(times_all.values()) + arr = np.transpose(self.get_tab_pars(pval), axes=(1, 2, 0)) - for (direction, ind), (direction_next, ind_next) in zip( - times_all_val[:-1], times_all_val[1:] - ): - if direction == "fw" and direction_next == "bw": - iuse_chfw.append(ind) - iuse_chbw.append(ind_next) + for tai, taxi in zip(arr, trans_att): + # loop over nta, tai has shape (x, t) + arr_out[x < taxi] += tai[x < taxi] - elif direction == "bw" and direction_next == "fw": - pass + elif axis == "time": + assert pval.ndim == 2 and pval.shape[0] == self.nt - elif direction == "fw" and direction_next == "fw": - if verbose: - print( - f"Missing backward measurement beween {ds_fw.time.values[ind]} and {ds_fw.time.values[ind_next]}" - ) + # arr (nt, nta, nt) to have shape (nta, nt, nt) + arr = np.transpose(self.get_tab_pars(pval), axes=(1, 2, 0)) - elif direction == "bw" and direction_next == "bw": - if verbose: - print( - f"Missing forward measurement beween {ds_bw.time.values[ind]} and {ds_bw.time.values[ind_next]}" - ) + for tai, taxi in zip(arr, trans_att): + # loop over nta, tai has shape (t, t) + arr_out[x < taxi] += np.diag(tai) - # throw out is dt differs from its neighbors - if verify_timedeltas: - dt = ( - ds_bw.isel(time=iuse_chbw).time.values - - ds_fw.isel(time=iuse_chfw).time.values - ) / np.timedelta64(1, "s") - leaveout = np.zeros_like(dt, dtype=bool) - leaveout[1:-1] = np.isclose(dt[:-2], dt[2:], atol=1.5, rtol=0.0) * ~np.isclose( - dt[:-2], dt[1:-1], atol=1.5, rtol=0.0 - ) - iuse_chfw2 = np.array(iuse_chfw)[~leaveout] - iuse_chbw2 = np.array(iuse_chbw)[~leaveout] + return arr_out - if verbose: - for itfw, itbw in zip( - np.array(iuse_chfw)[leaveout], np.array(iuse_chbw)[leaveout] - ): - print( - "The following measurements do not belong together, as the time difference\n" - "between the\forward and backward measurements is more than 1.5 seconds\n" - "larger than the neighboring measurements.\n" - f"FW: {ds_fw.isel(time=itfw).time.values} and BW: {ds_bw.isel(time=itbw).time.values}" - ) - return ds_fw.isel(time=iuse_chfw2), ds_bw.isel(time=iuse_chbw2) - return ds_fw.isel(time=iuse_chfw), ds_bw.isel(time=iuse_chbw) +class ParameterIndexSingleEnded: + """ + if parameter fixed, they are not in + npar = 1 + 1 + nt + nta * nt + """ + def __init__(self, nt, nx, nta, includes_alpha=False, includes_dalpha=True): + assert not ( + includes_alpha and includes_dalpha + ), "Cannot hold both dalpha and alpha" + self.nt = nt + self.nx = nx + self.nta = nta + self.includes_alpha = includes_alpha + self.includes_dalpha = includes_dalpha -def shift_double_ended( - ds: "DataStore", i_shift: int, verbose: bool = True -) -> "DataStore": - """ - The cable length was initially configured during the DTS measurement. For double ended - measurements it is important to enter the correct length so that the forward channel and the - backward channel are aligned. + @property + def all(self): + return np.concatenate( + (self.gamma, self.dalpha, self.alpha, self.c, self.ta.flatten(order="F")) + ) - This function can be used to shift the backward channel so that the forward channel and the - backward channel are aligned. The backward channel is shifted per index - along the x dimension. + @property + def npar(self): + if self.includes_alpha: + return 1 + self.nx + self.nt + self.nta * self.nt + elif self.includes_dalpha: + return 1 + 1 + self.nt + self.nta * self.nt + else: + return 1 + self.nt + self.nta * self.nt - The DataStore object that is returned only has values for the backscatter that are measured - for both the forward channel and the backward channel in the shifted object + @property + def gamma(self): + return [0] - There is no interpolation, as this would alter the accuracy. + @property + def dalpha(self): + if self.includes_dalpha: + return [1] + else: + return [] + @property + def alpha(self): + if self.includes_alpha: + return list(range(1, 1 + self.nx)) + else: + return [] - Parameters - ---------- - ds : DataSore object - DataStore object that needs to be shifted - i_shift : int - if i_shift < 0, the cable was configured to be too long and there is too much data - recorded. If i_shift > 0, the cable was configured to be too short and part of the cable is - not measured - verbose: bool - If True, the function will inform the user which variables are - dropped. If False, the function will silently drop the variables. + @property + def c(self): + if self.includes_alpha: + return list(range(1 + self.nx, 1 + self.nx + self.nt)) + elif self.includes_dalpha: + return list(range(1 + 1, 1 + 1 + self.nt)) + else: + return list(range(1, 1 + self.nt)) - Returns - ------- - ds2 : DataStore object - With a shifted x-axis - """ - from dtscalibration import DataStore + @property + def taf(self): + """returns taf parameters of shape (nt, nta) or (nt, nta, a)""" + # ta = p_val[-nt * nta:].reshape((nt, nta), order='F') + # self["talpha"] = (('time', 'trans_att'), ta[:, :]) + if self.includes_alpha: + return np.arange( + 1 + self.nx + self.nt, 1 + self.nx + self.nt + self.nt * self.nta + ).reshape((self.nt, self.nta), order="F") + elif self.includes_dalpha: + return np.arange( + 1 + 1 + self.nt, 1 + 1 + self.nt + self.nt * self.nta + ).reshape((self.nt, self.nta), order="F") + else: + return np.arange(1 + self.nt, 1 + self.nt + self.nt * self.nta).reshape( + (self.nt, self.nta), order="F" + ) - assert isinstance(i_shift, (int, np.integer)) + def get_taf_pars(self, pval): + if self.nta > 0: + if pval.ndim == 1: + # returns shape (nta, nt) + assert len(pval) == self.npar, "Length of pval is incorrect" + return np.stack([pval[tafi] for tafi in self.taf.T]) - nx = ds.x.size - nx2 = nx - i_shift - if i_shift < 0: - # The cable was configured to be too long. - # There is too much data recorded. - st = ds.st.data[:i_shift] - ast = ds.ast.data[:i_shift] - rst = ds.rst.data[-i_shift:] - rast = ds.rast.data[-i_shift:] - x2 = ds.x.data[:i_shift] - # TMP2 = ds.tmp.data[:i_shift] + else: + # assume shape is (a, npar) and returns shape (nta, nt, a) + assert pval.shape[1] == self.npar and pval.ndim == 2 + return np.stack([self.get_taf_pars(v) for v in pval], axis=-1) - else: - # The cable was configured to be too short. - # Part of the cable is not measured. - st = ds["st"].data[i_shift:] - ast = ds["ast"].data[i_shift:] - rst = ds["rst"].data[:nx2] - rast = ds["rast"].data[:nx2] - x2 = ds["x"].data[i_shift:] - # TMP2 = ds.tmp.data[i_shift:] + else: + return np.zeros(shape=(self.nt, 0)) - d2_coords = dict(ds.coords) - d2_coords["x"] = xr.DataArray(data=x2, dims=("x",), attrs=ds["x"].attrs) + def get_taf_values(self, pval, x, trans_att, axis=""): + """returns taf parameters of shape (nx, nt)""" + # assert pval.ndim == 2 and pval.shape[1] == self.npar - d2_data = dict(ds.data_vars) - for k in ds.data_vars: - if "x" in ds[k].dims and k in d2_data: - del d2_data[k] + arr_out = np.zeros((self.nx, self.nt)) - new_data = (("st", st), ("ast", ast), ("rst", rst), ("rast", rast)) + if self.nta == 0: + pass - for k, v in new_data: - d2_data[k] = xr.DataArray(data=v, dims=ds[k].dims, attrs=ds[k].attrs) + elif axis == "": + pval = pval.flatten() + assert pval.shape == (self.npar,) + arr = self.get_taf_pars(pval) + assert arr.shape == ( + self.nta, + self.nt, + ) - not_included = [k for k in ds.data_vars if k not in d2_data] - if not_included and verbose: - print("I dont know what to do with the following data", not_included) + for tai, taxi in zip(arr, trans_att): + arr_out[x >= taxi] += tai - return DataStore(data_vars=d2_data, coords=d2_coords, attrs=ds.attrs) + elif axis == "x": + assert pval.shape == (self.nx, self.npar) + arr = self.get_taf_pars(pval) + assert arr.shape == (self.nta, self.nx, self.nt) + for tai, taxi in zip(arr, trans_att): + # loop over nta, tai has shape (x, t) + arr_out[x >= taxi] += tai[x >= taxi] -def suggest_cable_shift_double_ended( - ds: "DataStore", - irange: npt.NDArray[np.int_], - plot_result: bool = True, - **fig_kwargs, -) -> tuple[int, int]: - """The cable length was initially configured during the DTS measurement. - For double ended measurements it is important to enter the correct length - so that the forward channel and the backward channel are aligned. + elif axis == "time": + assert pval.shape == (self.nt, self.npar) + arr = self.get_taf_pars(pval) + assert arr.shape == (self.nta, self.nt, self.nt) - This function can be used to find the shift of the backward channel so - that the forward channel and the backward channel are aligned. The shift - index refers to the x-dimension. + for tai, taxi in zip(arr, trans_att): + # loop over nta, tai has shape (t, t) + arr_out[x >= taxi] += np.diag(tai)[None] - The attenuation should be approximately a straight line with jumps at the - splices. Temperature independent and invariant over time. The following - objective functions seems to do the job at determining the best shift for - which the attenuation is most straight. + return arr_out - Err1 sums the first derivative. Is a proxy for the length of the - attenuation line. Err2 sums the second derivative. Is a proxy for the - wiggelyness of the line. - The top plot shows the origional Stokes and the origional and shifted - anti-Stokes The bottom plot is generated that shows the two objective - functions +def check_deprecated_kwargs(kwargs): + """ + Internal function that parses the `kwargs` for depreciated keyword + arguments. + Depreciated keywords raise an error, pending to be depreciated do not. + But this requires that the code currently deals with those arguments. Parameters ---------- - ds : DataSore object - DataStore object that needs to be shifted - irange : array-like - a numpy array with data of type int. Containing all the shift index - that are tested. - Example: np.arange(-250, 200, 1, dtype=int). It shifts the return - scattering with 250 indices. Calculates err1 and err2. Then shifts - the return scattering with 249 indices. Calculates err1 and err2. The - lowest err1 and err2 are suggested as best shift options. - plot_result : bool - Plot the summed error as a function of the shift. + kwargs : Dict + A dictionary with keyword arguments. Returns ------- - ishift1: int - Suggested shift based on Err1 - ishift2: int - Suggested shift based on Err2 + """ - err1 = [] - err2 = [] + msg = """Previously, it was possible to manually set the label from + which the Stokes and anti-Stokes were read within the DataStore + object. To reduce the clutter in the code base and be able to + maintain it, this option was removed. + See: https://github.com/dtscalibration/python-dts-calibration/issues/81 + + The new **fixed** names are: st, ast, rst, rast. + + It is still possible to use the previous defaults, for example when + reading stored measurements from netCDF, by renaming the labels. The + old default labels were ST, AST, REV-ST, REV-AST. + + ``` + ds = open_datastore(path_to_old_file) + ds = ds.rename_labels() + ds.calibration_double_ended( + st_var=1.5, + ast_var=1.5, + rst_var=1., + rast_var=1., + method='wls') + ``` + + ds.tmpw.plot() + """ + list_of_depr = [ + "st_label", + "ast_label", + "rst_label", + "rast_label", + "transient_asym_att_x", + "transient_att_x", + ] + list_of_pending_depr = [] + + kwargs = {k: v for k, v in kwargs.items() if k not in list_of_pending_depr} + + for k in kwargs: + if k in list_of_depr: + raise NotImplementedError(msg) + + if len(kwargs) != 0: + raise NotImplementedError( + "The following keywords are not " + "supported: " + ", ".join(kwargs.keys()) + ) - for shift in irange: - i_shift = int(shift) # int() because iterating irange does not narrow type. - nx = ds["x"].size - nx2 = nx - i_shift - if i_shift < 0: - # The cable was configured to be too long. There is too much data recorded. - st = ds["st"].data[:i_shift] - ast = ds["ast"].data[:i_shift] - rst = ds["rst"].data[-i_shift:] - rast = ds["rast"].data[-i_shift:] - x2 = ds["x"].data[:i_shift] - else: - # The cable was configured to be too short. Part of the cable is not measured. - st = ds["st"].data[i_shift:] - ast = ds["ast"].data[i_shift:] - rst = ds["rst"].data[:nx2] - rast = ds["rast"].data[:nx2] - x2 = ds["x"].data[i_shift:] + pass - i_f = np.log(st / ast) - i_b = np.log(rst / rast) - att = (i_b - i_f) / 2 # varianble E in article +def check_timestep_allclose(ds: "DataStore", eps: float = 0.01) -> None: + """ + Check if all timesteps are of equal size. For now it is not possible to calibrate + over timesteps if the acquisition time of timesteps varies, as the Stokes variance + would change over time. - att_dif1 = np.diff(att, n=1, axis=0) - att_x_dif1 = 0.5 * x2[1:] + 0.5 * x2[:-1] - err1_mask = np.logical_and(att_x_dif1 > 1.0, att_x_dif1 < 150.0) - err1.append(np.nansum(np.abs(att_dif1[err1_mask]))) + The acquisition time is stored for single ended measurements in userAcquisitionTime, + for double ended measurements in userAcquisitionTimeFW and userAcquisitionTimeBW. - att_dif2 = np.diff(att, n=2, axis=0) - att_x_dif2 = x2[1:-1] - err2_mask = np.logical_and(att_x_dif2 > 1.0, att_x_dif2 < 150.0) - err2.append(np.nansum(np.abs(att_dif2[err2_mask]))) + Parameters + ---------- + ds : DataStore + eps : float + Default accepts 1% of relative variation between min and max acquisition time. - # int() is required for typing. - ishift1 = int(irange[np.argmin(err1, axis=0)]) - ishift2 = int(irange[np.argmin(err2, axis=0)]) + Returns + ------- - if plot_result: - if fig_kwargs is None: - fig_kwargs = {} + """ + dim = ds.channel_configuration["chfw"]["acquisitiontime_label"] + dt = ds[dim].data + dtmin = dt.min() + dtmax = dt.max() + dtavg = (dtmin + dtmax) / 2 + assert (dtmax - dtmin) / dtavg < eps, ( + "Acquisition time is Forward channel not equal for all " "time steps" + ) - f, (ax0, ax1) = plt.subplots(2, 1, sharex=False, **fig_kwargs) - f.suptitle(f"best shift is {ishift1} or {ishift2}") + if ds.is_double_ended: + dim = ds.channel_configuration["chbw"]["acquisitiontime_label"] + dt = ds[dim].data + dtmin = dt.min() + dtmax = dt.max() + dtavg = (dtmin + dtmax) / 2 + assert (dtmax - dtmin) / dtavg < eps, ( + "Acquisition time Backward channel is not equal " "for all time steps" + ) - dt = ds.isel(time=0) - x = dt["x"].data - y = dt["st"].data - ax0.plot(x, y, label="ST original") - y = dt["rst"].data - ax0.plot(x, y, label="REV-ST original") - dtsh1 = shift_double_ended(dt, ishift1) - dtsh2 = shift_double_ended(dt, ishift2) - x1 = dtsh1["x"].data - x2 = dtsh2["x"].data - y1 = dtsh1["rst"].data - y2 = dtsh2["rst"].data - ax0.plot(x1, y1, label=f"ST i_shift={ishift1}") - ax0.plot(x2, y2, label=f"ST i_shift={ishift2}") - ax0.set_xlabel("x (m)") - ax0.legend() +def get_netcdf_encoding( + ds: "DataStore", zlib: bool = True, complevel: int = 5, **kwargs +) -> dict: + """Get default netcdf compression parameters. The same for each data variable. - ax2 = ax1.twinx() - ax1.plot(irange, err1, c="red", label="1 deriv") - ax2.plot(irange, err2, c="blue", label="2 deriv") - ax1.axvline( - ishift1, c="red", linewidth=0.8, label=f"1 deriv. i_shift={ishift1}" - ) - ax2.axvline( - ishift2, c="blue", linewidth=0.8, label=f"2 deriv. i_shift={ishift1}" - ) - ax1.set_xlabel("i_shift") - ax1.legend(loc=2) # left axis - ax2.legend(loc=1) # right axis + TODO: Truncate precision to XML precision per data variable - plt.tight_layout() - return ishift1, ishift2 + Parameters + ---------- + zlib + complevel + ds : DataStore + Returns + ------- + encoding: + Encoding dictionary. + """ + comp = dict(zlib=zlib, complevel=complevel) + comp.update(kwargs) + encoding = {var: comp for var in ds.data_vars} -def ufunc_per_section_helper( - sections=None, - func=None, - x_coords=None, - reference_dataset=None, - dataarray=None, - subtract_from_dataarray=None, - subtract_reference_from_dataarray=False, - ref_temp_broadcasted=False, - calc_per="stretch", - **func_kwargs, -): + return encoding + + +def merge_double_ended( + ds_fw: "DataStore", + ds_bw: "DataStore", + cable_length: float, + plot_result: bool = True, + verbose: bool = True, +) -> "DataStore": """ - User function applied to parts of the cable. Super useful, - many options and slightly - complicated. + Some measurements are not set up on the DTS-device as double-ended + meausurements. This means that the two channels have to be merged manually. - The function `func` is taken over all the timesteps and calculated - per `calc_per`. This - is returned as a dictionary + This function can merge two single-ended DataStore objects into a single + double-ended DataStore. There is no interpolation, the two arrays are + flipped and overlayed, based on the entered cable length. This can + introduce spatial inaccuracies with extremely long cables. Parameters ---------- - sections : Dict[str, List[slice]], optional - If `None` is supplied, `ds.sections` is used. Define calibration - sections. Each section requires a reference temperature time series, - such as the temperature measured by an external temperature sensor. - They should already be part of the DataStore object. `sections` - is defined with a dictionary with its keywords of the - names of the reference temperature time series. Its values are - lists of slice objects, where each slice object is a fiber stretch - that has the reference temperature. Afterwards, `sections` is stored - under `ds.sections`. - func : callable, str - A numpy function, or lambda function to apply to each 'calc_per'. - x_coords : xarray.DataArray, optional - x-coordinates, stored as ds.x. If supplied, returns the x-indices of - the reference sections. - reference_dataset : xarray.Dataset or Dict, optional - Contains the reference temperature timeseries refered to in `sections`. - Not required if `x_indices`. - dataarray : xarray.DataArray, optional - Pass your DataArray of which you want to compute the statistics. Has an - (x,) dimension or (x, time) dimensions. - subtract_from_dataarray : xarray.DataArray, optional - Pass your DataArray of which you want to subtract from `dataarray` before you - compute the statistics. Has an (x,) dimension or (x, time) dimensions. - subtract_reference_from_dataarray : bool - If True the reference temperature according to sections is subtracted from - dataarray before computing statistics - ref_temp_broadcasted : bool - Use if you want to return the reference temperature of shape of the reference - sections - calc_per : {'all', 'section', 'stretch'} - func_kwargs : dict - Dictionary with options that are passed to func + ds_fw : DataSore object + DataStore object representing the forward measurement channel + ds_bw : DataSore object + DataStore object representing the backward measurement channel + cable_length : float + Manually estimated cable length to base alignment on + plot_result : bool + Plot the aligned Stokes of the forward and backward channels + verbose : bool Returns ------- + ds : DataStore object + With the two channels merged + """ + assert ( + ds_fw.attrs["isDoubleEnded"] == "0" and ds_bw.attrs["isDoubleEnded"] == "0" + ), "(one of the) input DataStores is already double ended" - Examples - -------- + ds_fw, ds_bw = merge_double_ended_times(ds_fw, ds_bw, verbose=verbose) - 1. Calculate the variance of the residuals in the along ALL the\ - reference sections wrt the temperature of the water baths + ds = ds_fw.copy() + ds_bw = ds_bw.copy() - >>> tmpf_var = ufunc_per_section_helper( - >>> func='var', - >>> calc_per='all', - >>> dataarray=d['tmpf'], - >>> subtract_reference_from_dataarray=True) + ds_bw["x"] = cable_length - ds_bw["x"].values - 2. Calculate the variance of the residuals in the along PER\ - reference section wrt the temperature of the water baths + # TODO: check if reindexing matters, and should be used. + # one way to do it is performed below, but this could create artifacts + x_resolution = ds["x"].values[1] - ds["x"].values[0] + ds_bw = ds_bw.reindex( + {"x": ds["x"]}, method="nearest", tolerance=0.99 * x_resolution + ) - >>> tmpf_var = ufunc_per_section_helper - >>> func='var', - >>> calc_per='stretch', - >>> dataarray=d['tmpf'], - >>> subtract_reference_from_dataarray=True) + ds_bw = ds_bw.sortby("x") - 3. Calculate the variance of the residuals in the along PER\ - water bath wrt the temperature of the water baths + ds["rst"] = (["x", "time"], ds_bw["st"].values) + ds["rast"] = (["x", "time"], ds_bw["ast"].values) - >>> tmpf_var = ufunc_per_section_helper( - >>> func='var', - >>> calc_per='section', - >>> dataarray=d['tmpf'], - >>> subtract_reference_from_dataarray=True) + ds = ds.dropna(dim="x") - 4. Obtain the coordinates of the measurements per section + ds.attrs["isDoubleEnded"] = "1" + ds["userAcquisitionTimeBW"] = ("time", ds_bw["userAcquisitionTimeFW"].values) - >>> locs = ufunc_per_section_helper( - >>> func=None, - >>> dataarray=d.x, - >>> subtract_reference_from_dataarray=False, - >>> ref_temp_broadcasted=False, - >>> calc_per='stretch') + if plot_result: + _, ax = plt.subplots() + ds["st"].isel(time=0).plot(ax=ax, label="Stokes forward") + ds["rst"].isel(time=0).plot(ax=ax, label="Stokes backward") + ax.legend() - 5. Number of observations per stretch + return ds - >>> nlocs = ufunc_per_section_helper( - >>> func=len, - >>> dataarray=d.x, - >>> subtract_reference_from_dataarray=False, - >>> ref_temp_broadcasted=False, - >>> calc_per='stretch') - 6. broadcast the temperature of the reference sections to\ - stretch/section/all dimensions. The value of the reference\ - temperature (a timeseries) is broadcasted to the shape of self[\ - label]. The dataarray is not used for anything else. +def merge_double_ended_times( + ds_fw: "DataStore", + ds_bw: "DataStore", + verify_timedeltas: bool = True, + verbose: bool = True, +) -> tuple["DataStore", "DataStore"]: + """Helper for `merge_double_ended()` to deal with missing measurements. The + number of measurements of the forward and backward channels might get out + of sync if the device shuts down before the measurement of the last channel + is complete. This skips all measurements that are not accompanied by a partner + channel. - >>> temp_ref = ufunc_per_section_helper( - >>> dataarray=d["st"], - >>> ref_temp_broadcasted=True, - >>> calc_per='all') + Provides little protection against swapping fw and bw. - 7. x-coordinate index + If all measurements are recorded: fw_t0, bw_t0, fw_t1, bw_t1, fw_t2, bw_t2, .. + > all are passed - >>> ix_loc = ufunc_per_section_helperx_coords=d.x) + If some are missing the accompanying measurement is skipped: + - fw_t0, bw_t0, bw_t1, fw_t2, bw_t2, .. > fw_t0, bw_t0, fw_t2, bw_t2, .. + - fw_t0, bw_t0, fw_t1, fw_t2, bw_t2, .. > fw_t0, bw_t0, fw_t2, bw_t2, .. + - fw_t0, bw_t0, bw_t1, fw_t2, fw_t3, bw_t3, .. > fw_t0, bw_t0, fw_t3, bw_t3, + + Mixing forward and backward channels can be problematic when there is a pause + after measuring all channels. This function is not perfect as the following + situation is not caught: + - fw_t0, bw_t0, fw_t1, bw_t2, fw_t3, bw_t3, .. + > fw_t0, bw_t0, fw_t1, bw_t2, fw_t3, bw_t3, .. + This routine checks that the lowest channel + number is measured first (aka the forward channel), but it doesn't catch the + last case as it doesn't know that fw_t1 and bw_t2 belong to different cycles. - Note - ---- - If `dataarray` or `subtract_from_dataarray` is a Dask array, a Dask - array is returned else a numpy array is returned + Parameters + ---------- + ds_fw : DataSore object + DataStore object representing the forward measurement channel + ds_bw : DataSore object + DataStore object representing the backward measurement channel + verify_timedeltas : bool + Check whether times between forward and backward measurements are similar to + those of neighboring measurements + verbose : bool + Print additional information to screen + + Returns + ------- + ds_fw_sel : DataSore object + DataStore object representing the forward measurement channel with + only times for which there is also a ds_bw measurement + ds_bw_sel : DataSore object + DataStore object representing the backward measurement channel with + only times for which there is also a ds_fw measurement """ - if not func: + if "forward channel" in ds_fw.attrs and "forward channel" in ds_bw.attrs: + assert ( + ds_fw.attrs["forward channel"] < ds_bw.attrs["forward channel"] + ), "ds_fw and ds_bw are swapped" + elif ( + "forwardMeasurementChannel" in ds_fw.attrs + and "forwardMeasurementChannel" in ds_bw.attrs + ): + assert ( + ds_fw.attrs["forwardMeasurementChannel"] + < ds_bw.attrs["forwardMeasurementChannel"] + ), "ds_fw and ds_bw are swapped" - def func(a): - """ + # Are all dt's within 1.5 seconds from one another? + if (ds_bw.time.size == ds_fw.time.size) and np.all( + ds_bw.time.values > ds_fw.time.values + ): + if verify_timedeltas: + dt_ori = (ds_bw.time.values - ds_fw.time.values) / np.array( + 1, dtype="timedelta64[s]" + ) + dt_all_close = np.allclose(dt_ori, dt_ori[0], atol=1.5, rtol=0.0) + else: + dt_all_close = True - Parameters - ---------- - a + if dt_all_close: + return ds_fw, ds_bw - Returns - ------- + iuse_chfw = list() + iuse_chbw = list() - """ - return a + times_fw = {k: ("fw", i) for i, k in enumerate(ds_fw.time.values)} + times_bw = {k: ("bw", i) for i, k in enumerate(ds_bw.time.values)} + times_all = dict(sorted(({**times_fw, **times_bw}).items())) + times_all_val = list(times_all.values()) + + for (direction, ind), (direction_next, ind_next) in zip( + times_all_val[:-1], times_all_val[1:] + ): + if direction == "fw" and direction_next == "bw": + iuse_chfw.append(ind) + iuse_chbw.append(ind_next) + + elif direction == "bw" and direction_next == "fw": + pass + + elif direction == "fw" and direction_next == "fw": + if verbose: + print( + f"Missing backward measurement beween {ds_fw.time.values[ind]} and {ds_fw.time.values[ind_next]}" + ) + + elif direction == "bw" and direction_next == "bw": + if verbose: + print( + f"Missing forward measurement beween {ds_bw.time.values[ind]} and {ds_bw.time.values[ind_next]}" + ) + + # throw out is dt differs from its neighbors + if verify_timedeltas: + dt = ( + ds_bw.isel(time=iuse_chbw).time.values + - ds_fw.isel(time=iuse_chfw).time.values + ) / np.timedelta64(1, "s") + leaveout = np.zeros_like(dt, dtype=bool) + leaveout[1:-1] = np.isclose(dt[:-2], dt[2:], atol=1.5, rtol=0.0) * ~np.isclose( + dt[:-2], dt[1:-1], atol=1.5, rtol=0.0 + ) + iuse_chfw2 = np.array(iuse_chfw)[~leaveout] + iuse_chbw2 = np.array(iuse_chbw)[~leaveout] - elif isinstance(func, str) and func == "var": + if verbose: + for itfw, itbw in zip( + np.array(iuse_chfw)[leaveout], np.array(iuse_chbw)[leaveout] + ): + print( + "The following measurements do not belong together, as the time difference\n" + "between the\forward and backward measurements is more than 1.5 seconds\n" + "larger than the neighboring measurements.\n" + f"FW: {ds_fw.isel(time=itfw).time.values} and BW: {ds_bw.isel(time=itbw).time.values}" + ) + return ds_fw.isel(time=iuse_chfw2), ds_bw.isel(time=iuse_chbw2) - def func(a): - """ + return ds_fw.isel(time=iuse_chfw), ds_bw.isel(time=iuse_chbw) - Parameters - ---------- - a - Returns - ------- +def shift_double_ended( + ds: "DataStore", i_shift: int, verbose: bool = True +) -> "DataStore": + """ + The cable length was initially configured during the DTS measurement. For double ended + measurements it is important to enter the correct length so that the forward channel and the + backward channel are aligned. - """ - return np.var(a, ddof=1) + This function can be used to shift the backward channel so that the forward channel and the + backward channel are aligned. The backward channel is shifted per index + along the x dimension. - else: - assert callable(func) + The DataStore object that is returned only has values for the backscatter that are measured + for both the forward channel and the backward channel in the shifted object - assert calc_per in ["all", "section", "stretch"] + There is no interpolation, as this would alter the accuracy. - if x_coords is None and ( - (dataarray is not None and hasattr(dataarray.data, "chunks")) - or (subtract_from_dataarray and hasattr(subtract_from_dataarray.data, "chunks")) - ): - concat = da.concatenate - else: - concat = np.concatenate - out = dict() + Parameters + ---------- + ds : DataSore object + DataStore object that needs to be shifted + i_shift : int + if i_shift < 0, the cable was configured to be too long and there is too much data + recorded. If i_shift > 0, the cable was configured to be too short and part of the cable is + not measured + verbose: bool + If True, the function will inform the user which variables are + dropped. If False, the function will silently drop the variables. - for k, section in sections.items(): - out[k] = [] - for stretch in section: - if x_coords is not None: - # get indices from stretches - assert subtract_from_dataarray is None - assert not subtract_reference_from_dataarray - assert not ref_temp_broadcasted - # so it is slicable with x-indices - _x_indices = x_coords.astype(int) * 0 + np.arange(x_coords.size) - arg1 = _x_indices.sel(x=stretch).data - out[k].append(arg1) + Returns + ------- + ds2 : DataStore object + With a shifted x-axis + """ + from dtscalibration import DataStore - elif ( - subtract_from_dataarray is not None - and not subtract_reference_from_dataarray - and not ref_temp_broadcasted - ): - # calculate std wrt other series - arg1 = dataarray.sel(x=stretch).data - arg2 = subtract_from_dataarray.sel(x=stretch).data - out[k].append(arg1 - arg2) + assert isinstance(i_shift, (int, np.integer)) - elif ( - subtract_from_dataarray is None - and subtract_reference_from_dataarray - and not ref_temp_broadcasted - ): - # calculate std wrt reference temperature of the corresponding bath - arg1 = dataarray.sel(x=stretch).data - arg2 = reference_dataset[k].data - out[k].append(arg1 - arg2) + nx = ds.x.size + nx2 = nx - i_shift + if i_shift < 0: + # The cable was configured to be too long. + # There is too much data recorded. + st = ds.st.data[:i_shift] + ast = ds.ast.data[:i_shift] + rst = ds.rst.data[-i_shift:] + rast = ds.rast.data[-i_shift:] + x2 = ds.x.data[:i_shift] + # TMP2 = ds.tmp.data[:i_shift] - elif ( - subtract_from_dataarray is None - and not subtract_reference_from_dataarray - and ref_temp_broadcasted - ): - # Broadcast the reference temperature to the length of the stretch - arg1 = dataarray.sel(x=stretch).data - arg2 = da.broadcast_to(reference_dataset[k].data, arg1.shape) - out[k].append(arg2) + else: + # The cable was configured to be too short. + # Part of the cable is not measured. + st = ds["st"].data[i_shift:] + ast = ds["ast"].data[i_shift:] + rst = ds["rst"].data[:nx2] + rast = ds["rast"].data[:nx2] + x2 = ds["x"].data[i_shift:] + # TMP2 = ds.tmp.data[i_shift:] - elif ( - subtract_from_dataarray is None - and not subtract_reference_from_dataarray - and not ref_temp_broadcasted - ): - # calculate std wrt mean value - arg1 = dataarray.sel(x=stretch).data - out[k].append(arg1) + d2_coords = dict(ds.coords) + d2_coords["x"] = xr.DataArray(data=x2, dims=("x",), attrs=ds["x"].attrs) - if calc_per == "stretch": - out[k] = [func(argi, **func_kwargs) for argi in out[k]] + d2_data = dict(ds.data_vars) + for k in ds.data_vars: + if "x" in ds[k].dims and k in d2_data: + del d2_data[k] - elif calc_per == "section": - # flatten the out_dict to sort them - start = [i.start for i in section] - i_sorted = np.argsort(start) - out_flat_sort = [out[k][i] for i in i_sorted] - out[k] = func(concat(out_flat_sort), **func_kwargs) + new_data = (("st", st), ("ast", ast), ("rst", rst), ("rast", rast)) - elif calc_per == "all": - pass + for k, v in new_data: + d2_data[k] = xr.DataArray(data=v, dims=ds[k].dims, attrs=ds[k].attrs) - if calc_per == "all": - # flatten the out_dict to sort them - start = [item.start for sublist in sections.values() for item in sublist] - i_sorted = np.argsort(start) - out_flat = [item for sublist in out.values() for item in sublist] - out_flat_sort = [out_flat[i] for i in i_sorted] - out = func(concat(out_flat_sort, axis=0), **func_kwargs) + not_included = [k for k in ds.data_vars if k not in d2_data] + if not_included and verbose: + print("I dont know what to do with the following data", not_included) - if hasattr(out, "chunks") and len(out.chunks) > 0 and "x" in dataarray.dims: - # also sum the chunksize in the x dimension - # first find out where the x dim is - ixdim = dataarray.dims.index("x") - c_old = out.chunks - c_new = list(c_old) - c_new[ixdim] = sum(c_old[ixdim]) - out = out.rechunk(c_new) + return DataStore(data_vars=d2_data, coords=d2_coords, attrs=ds.attrs) - return out +def suggest_cable_shift_double_ended( + ds: "DataStore", + irange: npt.NDArray[np.int_], + plot_result: bool = True, + **fig_kwargs, +) -> tuple[int, int]: + """The cable length was initially configured during the DTS measurement. + For double ended measurements it is important to enter the correct length + so that the forward channel and the backward channel are aligned. -class ParameterIndexDoubleEnded: - """ - npar = 1 + 2 * nt + nx + 2 * nt * nta - assert pval.size == npar - assert p_var.size == npar - if calc_cov: - assert p_cov.shape == (npar, npar) - gamma = pval[0] - d_fw = pval[1:nt + 1] - d_bw = pval[1 + nt:1 + 2 * nt] - alpha = pval[1 + 2 * nt:1 + 2 * nt + nx] - # store calibration parameters in DataStore - self["gamma"] = (tuple(), gamma) - self["alpha"] = (('x',), alpha) - self["df"] = (('time',), d_fw) - self["db"] = (('time',), d_bw) - if nta > 0: - ta = pval[1 + 2 * nt + nx:].reshape((nt, 2, nta), order='F') - self['talpha_fw'] = (('time', 'trans_att'), ta[:, 0, :]) - self['talpha_bw'] = (('time', 'trans_att'), ta[:, 1, :]) - """ + This function can be used to find the shift of the backward channel so + that the forward channel and the backward channel are aligned. The shift + index refers to the x-dimension. - def __init__(self, nt, nx, nta, fix_gamma=False, fix_alpha=False): - self.nt = nt - self.nx = nx - self.nta = nta - self.fix_gamma = fix_gamma - self.fix_alpha = fix_alpha + The attenuation should be approximately a straight line with jumps at the + splices. Temperature independent and invariant over time. The following + objective functions seems to do the job at determining the best shift for + which the attenuation is most straight. - @property - def all(self): - return np.concatenate( - (self.gamma, self.df, self.db, self.alpha, self.ta.flatten(order="F")) - ) + Err1 sums the first derivative. Is a proxy for the length of the + attenuation line. Err2 sums the second derivative. Is a proxy for the + wiggelyness of the line. - @property - def npar(self): - if not self.fix_gamma and not self.fix_alpha: - return 1 + 2 * self.nt + self.nx + 2 * self.nt * self.nta - elif self.fix_gamma and not self.fix_alpha: - return 2 * self.nt + self.nx + 2 * self.nt * self.nta - elif not self.fix_gamma and self.fix_alpha: - return 1 + 2 * self.nt + 2 * self.nt * self.nta - elif self.fix_gamma and self.fix_alpha: - return 2 * self.nt + 2 * self.nt * self.nta + The top plot shows the origional Stokes and the origional and shifted + anti-Stokes The bottom plot is generated that shows the two objective + functions - @property - def gamma(self): - if self.fix_gamma: - return [] - else: - return [0] - @property - def df(self): - if self.fix_gamma: - return list(range(self.nt)) - else: - return list(range(1, self.nt + 1)) + Parameters + ---------- + ds : DataSore object + DataStore object that needs to be shifted + irange : array-like + a numpy array with data of type int. Containing all the shift index + that are tested. + Example: np.arange(-250, 200, 1, dtype=int). It shifts the return + scattering with 250 indices. Calculates err1 and err2. Then shifts + the return scattering with 249 indices. Calculates err1 and err2. The + lowest err1 and err2 are suggested as best shift options. + plot_result : bool + Plot the summed error as a function of the shift. - @property - def db(self): - if self.fix_gamma: - return list(range(self.nt, 2 * self.nt)) - else: - return list(range(1 + self.nt, 1 + 2 * self.nt)) + Returns + ------- + ishift1: int + Suggested shift based on Err1 + ishift2: int + Suggested shift based on Err2 + """ + err1 = [] + err2 = [] - @property - def alpha(self): - if self.fix_alpha: - return [] - elif self.fix_gamma: - return list(range(2 * self.nt, 1 + 2 * self.nt + self.nx)) - elif not self.fix_gamma: - return list(range(1 + 2 * self.nt, 1 + 2 * self.nt + self.nx)) + for shift in irange: + i_shift = int(shift) # int() because iterating irange does not narrow type. + nx = ds["x"].size + nx2 = nx - i_shift + if i_shift < 0: + # The cable was configured to be too long. There is too much data recorded. + st = ds["st"].data[:i_shift] + ast = ds["ast"].data[:i_shift] + rst = ds["rst"].data[-i_shift:] + rast = ds["rast"].data[-i_shift:] + x2 = ds["x"].data[:i_shift] + else: + # The cable was configured to be too short. Part of the cable is not measured. + st = ds["st"].data[i_shift:] + ast = ds["ast"].data[i_shift:] + rst = ds["rst"].data[:nx2] + rast = ds["rast"].data[:nx2] + x2 = ds["x"].data[i_shift:] - @property - def ta(self): - if self.nta == 0: - return np.zeros((self.nt, 2, 0)) - elif not self.fix_gamma and not self.fix_alpha: - arr = np.arange(1 + 2 * self.nt + self.nx, self.npar) - elif self.fix_gamma and not self.fix_alpha: - arr = np.arange(2 * self.nt + self.nx, self.npar) - elif not self.fix_gamma and self.fix_alpha: - arr = np.arange(1 + 2 * self.nt, self.npar) - elif self.fix_gamma and self.fix_alpha: - arr = np.arange(2 * self.nt, self.npar) + i_f = np.log(st / ast) + i_b = np.log(rst / rast) - return arr.reshape((self.nt, 2, self.nta), order="F") + att = (i_b - i_f) / 2 # varianble E in article - @property - def taf(self): - """ - Use `.reshape((nt, nta))` to convert array to (time-dim and transatt-dim). Order is the default C order. - ta = pval[1 + 2 * nt + nx:].reshape((nt, 2, nta), order='F') - self['talpha_fw'] = (('time', 'trans_att'), ta[:, 0, :]) - """ - return self.ta[:, 0, :].flatten(order="C") + att_dif1 = np.diff(att, n=1, axis=0) + att_x_dif1 = 0.5 * x2[1:] + 0.5 * x2[:-1] + err1_mask = np.logical_and(att_x_dif1 > 1.0, att_x_dif1 < 150.0) + err1.append(np.nansum(np.abs(att_dif1[err1_mask]))) - @property - def tab(self): - """ - Use `.reshape((nt, nta))` to convert array to (time-dim and transatt-dim). Order is the default C order. - ta = pval[1 + 2 * nt + nx:].reshape((nt, 2, nta), order='F') - self['talpha_bw'] = (('time', 'trans_att'), ta[:, 1, :]) - """ - return self.ta[:, 1, :].flatten(order="C") + att_dif2 = np.diff(att, n=2, axis=0) + att_x_dif2 = x2[1:-1] + err2_mask = np.logical_and(att_x_dif2 > 1.0, att_x_dif2 < 150.0) + err2.append(np.nansum(np.abs(att_dif2[err2_mask]))) - def get_ta_pars(self, pval): - if self.nta > 0: - if pval.ndim == 1: - return np.take_along_axis(pval[None, None], self.ta, axis=-1) + # int() is required for typing. + ishift1 = int(irange[np.argmin(err1, axis=0)]) + ishift2 = int(irange[np.argmin(err2, axis=0)]) - else: - # assume shape is (a, npar) and returns shape (nt, 2, nta, a) - assert pval.shape[1] == self.npar and pval.ndim == 2 - return np.stack([self.get_ta_pars(v) for v in pval], axis=-1) + if plot_result: + if fig_kwargs is None: + fig_kwargs = {} - else: - return np.zeros(shape=(self.nt, 2, 0)) + f, (ax0, ax1) = plt.subplots(2, 1, sharex=False, **fig_kwargs) + f.suptitle(f"best shift is {ishift1} or {ishift2}") - def get_taf_pars(self, pval): - """returns taf parameters of shape (nt, nta) or (nt, nta, a)""" - return self.get_ta_pars(pval=pval)[:, 0, :] + dt = ds.isel(time=0) + x = dt["x"].data + y = dt["st"].data + ax0.plot(x, y, label="ST original") + y = dt["rst"].data + ax0.plot(x, y, label="REV-ST original") - def get_tab_pars(self, pval): - """returns taf parameters of shape (nt, nta) or (nt, nta, a)""" - return self.get_ta_pars(pval=pval)[:, 1, :] + dtsh1 = shift_double_ended(dt, ishift1) + dtsh2 = shift_double_ended(dt, ishift2) + x1 = dtsh1["x"].data + x2 = dtsh2["x"].data + y1 = dtsh1["rst"].data + y2 = dtsh2["rst"].data + ax0.plot(x1, y1, label=f"ST i_shift={ishift1}") + ax0.plot(x2, y2, label=f"ST i_shift={ishift2}") + ax0.set_xlabel("x (m)") + ax0.legend() - def get_taf_values(self, pval, x, trans_att, axis=""): - """returns taf parameters of shape (nx, nt)""" - pval = np.atleast_2d(pval) + ax2 = ax1.twinx() + ax1.plot(irange, err1, c="red", label="1 deriv") + ax2.plot(irange, err2, c="blue", label="2 deriv") + ax1.axvline( + ishift1, c="red", linewidth=0.8, label=f"1 deriv. i_shift={ishift1}" + ) + ax2.axvline( + ishift2, c="blue", linewidth=0.8, label=f"2 deriv. i_shift={ishift1}" + ) + ax1.set_xlabel("i_shift") + ax1.legend(loc=2) # left axis + ax2.legend(loc=1) # right axis - assert pval.ndim == 2 and pval.shape[1] == self.npar + plt.tight_layout() - arr_out = np.zeros((self.nx, self.nt)) + return ishift1, ishift2 - if self.nta == 0: - pass - elif axis == "": - assert pval.shape[0] == 1 +def ufunc_per_section_helper( + sections=None, + func=None, + x_coords=None, + reference_dataset=None, + dataarray=None, + subtract_from_dataarray=None, + subtract_reference_from_dataarray=False, + ref_temp_broadcasted=False, + calc_per="stretch", + **func_kwargs, +): + """ + User function applied to parts of the cable. Super useful, + many options and slightly + complicated. - arr = np.transpose(self.get_taf_pars(pval), axes=(1, 2, 0)) # (nta, 1, nt) + The function `func` is taken over all the timesteps and calculated + per `calc_per`. This + is returned as a dictionary - for tai, taxi in zip(arr, trans_att): - arr_out[x >= taxi] += tai + Parameters + ---------- + sections : Dict[str, List[slice]], optional + If `None` is supplied, `ds.sections` is used. Define calibration + sections. Each section requires a reference temperature time series, + such as the temperature measured by an external temperature sensor. + They should already be part of the DataStore object. `sections` + is defined with a dictionary with its keywords of the + names of the reference temperature time series. Its values are + lists of slice objects, where each slice object is a fiber stretch + that has the reference temperature. Afterwards, `sections` is stored + under `ds.sections`. + func : callable, str + A numpy function, or lambda function to apply to each 'calc_per'. + x_coords : xarray.DataArray, optional + x-coordinates, stored as ds.x. If supplied, returns the x-indices of + the reference sections. + reference_dataset : xarray.Dataset or Dict, optional + Contains the reference temperature timeseries refered to in `sections`. + Not required if `x_indices`. + dataarray : xarray.DataArray, optional + Pass your DataArray of which you want to compute the statistics. Has an + (x,) dimension or (x, time) dimensions. + subtract_from_dataarray : xarray.DataArray, optional + Pass your DataArray of which you want to subtract from `dataarray` before you + compute the statistics. Has an (x,) dimension or (x, time) dimensions. + subtract_reference_from_dataarray : bool + If True the reference temperature according to sections is subtracted from + dataarray before computing statistics + ref_temp_broadcasted : bool + Use if you want to return the reference temperature of shape of the reference + sections + calc_per : {'all', 'section', 'stretch'} + func_kwargs : dict + Dictionary with options that are passed to func - elif axis == "x": - assert pval.shape[0] == self.nx + Returns + ------- - arr = np.transpose(self.get_taf_pars(pval), axes=(1, 2, 0)) # (nta, nx, nt) + Examples + -------- - for tai, taxi in zip(arr, trans_att): - # loop over nta, tai has shape (x, t) - arr_out[x >= taxi] += tai[x >= taxi] + 1. Calculate the variance of the residuals in the along ALL the\ + reference sections wrt the temperature of the water baths - elif axis == "time": - assert pval.shape[0] == self.nt + >>> tmpf_var = ufunc_per_section_helper( + >>> func='var', + >>> calc_per='all', + >>> dataarray=d['tmpf'], + >>> subtract_reference_from_dataarray=True) - # arr (nt, nta, nt) to have shape (nta, nt, nt) - arr = np.transpose(self.get_taf_pars(pval), axes=(1, 2, 0)) + 2. Calculate the variance of the residuals in the along PER\ + reference section wrt the temperature of the water baths - for tai, taxi in zip(arr, trans_att): - # loop over nta, tai has shape (t, t) - arr_out[x >= taxi] += np.diag(tai)[None] + >>> tmpf_var = ufunc_per_section_helper + >>> func='var', + >>> calc_per='stretch', + >>> dataarray=d['tmpf'], + >>> subtract_reference_from_dataarray=True) - return arr_out + 3. Calculate the variance of the residuals in the along PER\ + water bath wrt the temperature of the water baths - def get_tab_values(self, pval, x, trans_att, axis=""): - """returns tab parameters of shape (nx, nt)""" - assert pval.shape[-1] == self.npar + >>> tmpf_var = ufunc_per_section_helper( + >>> func='var', + >>> calc_per='section', + >>> dataarray=d['tmpf'], + >>> subtract_reference_from_dataarray=True) - arr_out = np.zeros((self.nx, self.nt)) + 4. Obtain the coordinates of the measurements per section - if self.nta == 0: - pass + >>> locs = ufunc_per_section_helper( + >>> func=None, + >>> dataarray=d.x, + >>> subtract_reference_from_dataarray=False, + >>> ref_temp_broadcasted=False, + >>> calc_per='stretch') - elif axis == "": - pval = np.squeeze(pval) - assert pval.ndim == 1 - arr = np.transpose(self.get_tab_pars(pval), axes=(1, 0)) + 5. Number of observations per stretch - for tai, taxi in zip(arr, trans_att): - arr_out[x < taxi] += tai + >>> nlocs = ufunc_per_section_helper( + >>> func=len, + >>> dataarray=d.x, + >>> subtract_reference_from_dataarray=False, + >>> ref_temp_broadcasted=False, + >>> calc_per='stretch') - elif axis == "x": - assert pval.ndim == 2 and pval.shape[0] == self.nx + 6. broadcast the temperature of the reference sections to\ + stretch/section/all dimensions. The value of the reference\ + temperature (a timeseries) is broadcasted to the shape of self[\ + label]. The dataarray is not used for anything else. - arr = np.transpose(self.get_tab_pars(pval), axes=(1, 2, 0)) + >>> temp_ref = ufunc_per_section_helper( + >>> dataarray=d["st"], + >>> ref_temp_broadcasted=True, + >>> calc_per='all') - for tai, taxi in zip(arr, trans_att): - # loop over nta, tai has shape (x, t) - arr_out[x < taxi] += tai[x < taxi] + 7. x-coordinate index - elif axis == "time": - assert pval.ndim == 2 and pval.shape[0] == self.nt + >>> ix_loc = ufunc_per_section_helperx_coords=d.x) - # arr (nt, nta, nt) to have shape (nta, nt, nt) - arr = np.transpose(self.get_tab_pars(pval), axes=(1, 2, 0)) - for tai, taxi in zip(arr, trans_att): - # loop over nta, tai has shape (t, t) - arr_out[x < taxi] += np.diag(tai) + Note + ---- + If `dataarray` or `subtract_from_dataarray` is a Dask array, a Dask + array is returned else a numpy array is returned + """ + if not func: - return arr_out + def func(a): + """ + Parameters + ---------- + a -class ParameterIndexSingleEnded: - """ - if parameter fixed, they are not in - npar = 1 + 1 + nt + nta * nt - """ + Returns + ------- - def __init__(self, nt, nx, nta, includes_alpha=False, includes_dalpha=True): - assert not ( - includes_alpha and includes_dalpha - ), "Cannot hold both dalpha and alpha" - self.nt = nt - self.nx = nx - self.nta = nta - self.includes_alpha = includes_alpha - self.includes_dalpha = includes_dalpha + """ + return a - @property - def all(self): - return np.concatenate( - (self.gamma, self.dalpha, self.alpha, self.c, self.ta.flatten(order="F")) - ) + elif isinstance(func, str) and func == "var": - @property - def npar(self): - if self.includes_alpha: - return 1 + self.nx + self.nt + self.nta * self.nt - elif self.includes_dalpha: - return 1 + 1 + self.nt + self.nta * self.nt - else: - return 1 + self.nt + self.nta * self.nt + def func(a): + """ - @property - def gamma(self): - return [0] + Parameters + ---------- + a - @property - def dalpha(self): - if self.includes_dalpha: - return [1] - else: - return [] + Returns + ------- - @property - def alpha(self): - if self.includes_alpha: - return list(range(1, 1 + self.nx)) - else: - return [] + """ + return np.var(a, ddof=1) - @property - def c(self): - if self.includes_alpha: - return list(range(1 + self.nx, 1 + self.nx + self.nt)) - elif self.includes_dalpha: - return list(range(1 + 1, 1 + 1 + self.nt)) - else: - return list(range(1, 1 + self.nt)) + else: + assert callable(func) - @property - def taf(self): - """returns taf parameters of shape (nt, nta) or (nt, nta, a)""" - # ta = p_val[-nt * nta:].reshape((nt, nta), order='F') - # self["talpha"] = (('time', 'trans_att'), ta[:, :]) - if self.includes_alpha: - return np.arange( - 1 + self.nx + self.nt, 1 + self.nx + self.nt + self.nt * self.nta - ).reshape((self.nt, self.nta), order="F") - elif self.includes_dalpha: - return np.arange( - 1 + 1 + self.nt, 1 + 1 + self.nt + self.nt * self.nta - ).reshape((self.nt, self.nta), order="F") - else: - return np.arange(1 + self.nt, 1 + self.nt + self.nt * self.nta).reshape( - (self.nt, self.nta), order="F" - ) + assert calc_per in ["all", "section", "stretch"] - def get_taf_pars(self, pval): - if self.nta > 0: - if pval.ndim == 1: - # returns shape (nta, nt) - assert len(pval) == self.npar, "Length of pval is incorrect" - return np.stack([pval[tafi] for tafi in self.taf.T]) + if x_coords is None and ( + (dataarray is not None and hasattr(dataarray.data, "chunks")) + or (subtract_from_dataarray and hasattr(subtract_from_dataarray.data, "chunks")) + ): + concat = da.concatenate + else: + concat = np.concatenate - else: - # assume shape is (a, npar) and returns shape (nta, nt, a) - assert pval.shape[1] == self.npar and pval.ndim == 2 - return np.stack([self.get_taf_pars(v) for v in pval], axis=-1) + out = dict() - else: - return np.zeros(shape=(self.nt, 0)) + for k, section in sections.items(): + out[k] = [] + for stretch in section: + if x_coords is not None: + # get indices from stretches + assert subtract_from_dataarray is None + assert not subtract_reference_from_dataarray + assert not ref_temp_broadcasted + # so it is slicable with x-indices + _x_indices = x_coords.astype(int) * 0 + np.arange(x_coords.size) + arg1 = _x_indices.sel(x=stretch).data + out[k].append(arg1) - def get_taf_values(self, pval, x, trans_att, axis=""): - """returns taf parameters of shape (nx, nt)""" - # assert pval.ndim == 2 and pval.shape[1] == self.npar + elif ( + subtract_from_dataarray is not None + and not subtract_reference_from_dataarray + and not ref_temp_broadcasted + ): + # calculate std wrt other series + arg1 = dataarray.sel(x=stretch).data + arg2 = subtract_from_dataarray.sel(x=stretch).data + out[k].append(arg1 - arg2) - arr_out = np.zeros((self.nx, self.nt)) + elif ( + subtract_from_dataarray is None + and subtract_reference_from_dataarray + and not ref_temp_broadcasted + ): + # calculate std wrt reference temperature of the corresponding bath + arg1 = dataarray.sel(x=stretch).data + arg2 = reference_dataset[k].data + out[k].append(arg1 - arg2) - if self.nta == 0: - pass + elif ( + subtract_from_dataarray is None + and not subtract_reference_from_dataarray + and ref_temp_broadcasted + ): + # Broadcast the reference temperature to the length of the stretch + arg1 = dataarray.sel(x=stretch).data + arg2 = da.broadcast_to(reference_dataset[k].data, arg1.shape) + out[k].append(arg2) - elif axis == "": - pval = pval.flatten() - assert pval.shape == (self.npar,) - arr = self.get_taf_pars(pval) - assert arr.shape == ( - self.nta, - self.nt, - ) + elif ( + subtract_from_dataarray is None + and not subtract_reference_from_dataarray + and not ref_temp_broadcasted + ): + # calculate std wrt mean value + arg1 = dataarray.sel(x=stretch).data + out[k].append(arg1) - for tai, taxi in zip(arr, trans_att): - arr_out[x >= taxi] += tai + if calc_per == "stretch": + out[k] = [func(argi, **func_kwargs) for argi in out[k]] - elif axis == "x": - assert pval.shape == (self.nx, self.npar) - arr = self.get_taf_pars(pval) - assert arr.shape == (self.nta, self.nx, self.nt) + elif calc_per == "section": + # flatten the out_dict to sort them + start = [i.start for i in section] + i_sorted = np.argsort(start) + out_flat_sort = [out[k][i] for i in i_sorted] + out[k] = func(concat(out_flat_sort), **func_kwargs) - for tai, taxi in zip(arr, trans_att): - # loop over nta, tai has shape (x, t) - arr_out[x >= taxi] += tai[x >= taxi] + elif calc_per == "all": + pass - elif axis == "time": - assert pval.shape == (self.nt, self.npar) - arr = self.get_taf_pars(pval) - assert arr.shape == (self.nta, self.nt, self.nt) + if calc_per == "all": + # flatten the out_dict to sort them + start = [item.start for sublist in sections.values() for item in sublist] + i_sorted = np.argsort(start) + out_flat = [item for sublist in out.values() for item in sublist] + out_flat_sort = [out_flat[i] for i in i_sorted] + out = func(concat(out_flat_sort, axis=0), **func_kwargs) - for tai, taxi in zip(arr, trans_att): - # loop over nta, tai has shape (t, t) - arr_out[x >= taxi] += np.diag(tai)[None] + if hasattr(out, "chunks") and len(out.chunks) > 0 and "x" in dataarray.dims: + # also sum the chunksize in the x dimension + # first find out where the x dim is + ixdim = dataarray.dims.index("x") + c_old = out.chunks + c_new = list(c_old) + c_new[ixdim] = sum(c_old[ixdim]) + out = out.rechunk(c_new) - return arr_out + return out From 155fafe16c30a73d2099692716c63ada9f257b75 Mon Sep 17 00:00:00 2001 From: Bas des Tombe Date: Tue, 8 Aug 2023 19:12:43 +0200 Subject: [PATCH 24/26] Removed installation page from docs --- docs/index.rst | 1 - docs/installation.rst | 8 -------- 2 files changed, 9 deletions(-) delete mode 100644 docs/installation.rst diff --git a/docs/index.rst b/docs/index.rst index f184b6f8..894b4e21 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -6,7 +6,6 @@ Contents :maxdepth: 3 readme - installation learn_by_examples reference/index contributing diff --git a/docs/installation.rst b/docs/installation.rst deleted file mode 100644 index 98bf9bbc..00000000 --- a/docs/installation.rst +++ /dev/null @@ -1,8 +0,0 @@ -============ -Installation -============ - -At the command line:: - - pip install dtscalibration - From 62ffb6c44816f7b2b8a1e486536e99c1df7d7ccc Mon Sep 17 00:00:00 2001 From: Bas des Tombe Date: Tue, 8 Aug 2023 19:14:14 +0200 Subject: [PATCH 25/26] Reformulated CHANGELOG.rst entry Suggested by BSchilperoort Co-authored-by: Bart Schilperoort --- CHANGELOG.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index d8bf41cb..c5eeb81f 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -12,7 +12,7 @@ Added Fixed * Single-ended measurements with `fix_alpha` failed due to a bug introduced in v2.0.0 ([#173](https://github.com/dtscalibration/python-dts-calibration/pull/173)). -* Fixes headers in example notebooks and their appearance in the docs +* Headers in example notebooks and their appearance in the docs are now at correct levels Changed From 0c8b1f0e228e2d1be84138f88f66eda3d04c90c0 Mon Sep 17 00:00:00 2001 From: Bas des Tombe Date: Tue, 8 Aug 2023 19:43:39 +0200 Subject: [PATCH 26/26] Cleared old dangling readthedocs files --- docs/_config.yml | 1 - docs/conf.py | 2 +- docs/nb_examples_to_docs.py | 95 ------------------------------------- docs/requirements.txt | 1 - 4 files changed, 1 insertion(+), 98 deletions(-) delete mode 100644 docs/_config.yml delete mode 100644 docs/nb_examples_to_docs.py delete mode 100644 docs/requirements.txt diff --git a/docs/_config.yml b/docs/_config.yml deleted file mode 100644 index c4192631..00000000 --- a/docs/_config.yml +++ /dev/null @@ -1 +0,0 @@ -theme: jekyll-theme-cayman \ No newline at end of file diff --git a/docs/conf.py b/docs/conf.py index 5e49338e..6e28eadd 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -70,7 +70,7 @@ napoleon_use_param = False # sphinx_automodapi.automodapi -numpydoc_show_class_members = False +numpydoc_show_class_members = True # -- nbsphinx configuration -- nbsphinx_allow_errors = False diff --git a/docs/nb_examples_to_docs.py b/docs/nb_examples_to_docs.py deleted file mode 100644 index ac6a6a3e..00000000 --- a/docs/nb_examples_to_docs.py +++ /dev/null @@ -1,95 +0,0 @@ -# coding=utf-8 -import glob -import os -import shutil -from subprocess import check_call - -clean_nb = False # save clean notebook to github - -try: - # this file is excecuted as script - wd = os.path.dirname(os.path.realpath(__file__)) - print("run as script: wd", wd) -except: - # Excecuted from console. pwd = ./docs - wd = os.getcwd() - print("run from console: wd", wd) - pass - -file_ext = "*.ipynb" - -# ./examples/notebooks -inpath = os.path.join(wd, "..", "examples", "notebooks") - -# ./docs/examples/notebooks -outpath = os.path.join(wd, "examples", "notebooks") -fp_index = os.path.join(wd, "examples", "index.rst") - -# clean outputdir -shutil.rmtree(outpath) -os.makedirs(outpath) - -filepathlist = sorted(glob.glob(os.path.join(inpath, file_ext))) -filenamelist = [os.path.basename(path) for path in filepathlist] - -for fp, fn in zip(filepathlist, filenamelist): - if clean_nb: - # save clean notebook to github - check_call( - [ - "jupyter", - "nbconvert", - "--clear-output", - "--ClearOutputPreprocessor.enabled=True", - "--inplace", - fp, - ] - ) - else: - check_call( - [ - "jupyter", - "nbconvert", - "--execute", - "--ExecutePreprocessor.kernel_name=python", - "--KernelGatewayApp.force_kernel_name=python", - "--ExecutePreprocessor.timeout=60", - "--inplace", - fp, - ] - ) - # run the notebook to: - # 1) check whether no errors occur. - # 2) save and show outputconvert notebook to rst for documentation - # outfilepath = os.path.join(outpath, fn) - check_call( - [ - "jupyter", - "nbconvert", - "--execute", - "--to", - "rst", - "--ExecutePreprocessor.kernel_name=python", - "--KernelGatewayApp.force_kernel_name=python", - "--ExecutePreprocessor.timeout=60", - "--output-dir", - outpath, - "--output", - fn, - fp, - ] - ) - -# write index file to toc -fp_index = os.path.join(wd, "examples", "index.rst") -s = """Learn by Examples -================= - -.. toctree:: -""" - -with open(fp_index, "w+") as fh: - fh.write(s) - for fn in filenamelist: - sf = " notebooks/{}.rst\n".format(fn) - fh.write(sf) diff --git a/docs/requirements.txt b/docs/requirements.txt deleted file mode 100644 index 142b6ca3..00000000 --- a/docs/requirements.txt +++ /dev/null @@ -1 +0,0 @@ --e .[docs]