echotools/GUTILS sync

* __init__.py * SciPy 1.13.0 warnings * nc.py * Skip writing extras file for echograms that are short * slocum/__init__.py * enable deeper debugging * slocum/bin/convertDbds.sh * enforce grep to use ascii on binary files * slocum/echotools/teledyne.py * allow PYTHONWARNINGS to override coded warnings * utilize dbdreader skip_inital_line feature * numpy datetime conversion warning * flake8 file matching update * ensure file descriptors are released * merge duplicate time coordinate data from DBD * flake8 updates
jr3cermak · Jul 10, 2023 · cdd8f41 · cdd8f41
1 parent dca53e2
commit cdd8f41
Show file tree

Hide file tree

Showing 5 changed files with 132 additions and 27 deletions.
diff --git a/gutils/__init__.py b/gutils/__init__.py
@@ -12,7 +12,12 @@
 
 import numpy as np
 import pandas as pd
-from scipy.signal import boxcar, convolve
+try:
+    # SciPy 1.13.0 DepricationWarning
+    from scipy.signal.windows import boxcar
+    from scipy.signal import convolve
+except ImportError:
+    from scipy.signal import boxcar, convolve
 
 from pocean.meta import MetaInterface
 from pocean.utils import (

diff --git a/gutils/nc.py b/gutils/nc.py
@@ -396,10 +396,13 @@ def create_netcdf(attrs, data, output_path, mode, profile_id_type=ProfileIdTypes
 
         if not extras_df.empty:
 
+            # Skip extra flag
+            skip_extra = False
+
             # Write the extras dimension to a new profile file
             profile_extras = extras_df.loc[extras_df.profile == pi].copy()
             if profile_extras.empty:
-                continue
+                skip_extra = True
 
             # Standardize the columns of the "extras" from the matched profile
             profile_extras.loc[:, 't'] = profile_extras.index
@@ -420,9 +423,18 @@ def create_netcdf(attrs, data, output_path, mode, profile_id_type=ProfileIdTypes
                     profile_extras.loc[:, c] = profile_extras[c].astype(profile[c].dtype)
             """
 
+            # If there is less than 3 time coordinates with an echogram, do not
+            # write out the profile.
+            if 'echogram_sv' in profile_extras.columns:
+                if 't' not in profile_extras.columns:
+                    skip_extra = True
+                if len(pd.unique(profile_extras['t'])) < 3:
+                    skip_extra = True
+
             try:
-                cr = create_profile_netcdf(attrs, profile_extras, output_path, mode + '_extra', profile_id_type)
-                written_files.append(cr)
+                if not skip_extra:
+                    cr = create_profile_netcdf(attrs, profile_extras, output_path, mode + '_extra', profile_id_type)
+                    written_files.append(cr)
             except BaseException:
                 L.exception('Error creating extra netCDF profile {}. Skipping.'.format(pi))
                 continue

diff --git a/gutils/slocum/__init__.py b/gutils/slocum/__init__.py
@@ -672,6 +672,9 @@ def slocum_binary_sorter(x):
         # of a reader.
         self.extra_kwargs = self.attrs.pop('extra_kwargs', {})
 
+        if self.extra_kwargs.get('enable_debug', False):
+            L.setLevel(logging.DEBUG)
+
     def __del__(self):
         # Remove tmpdir
         shutil.rmtree(self.tmpdir, ignore_errors=True)
@@ -868,7 +871,7 @@ def convert(self):
                 # upstream expects *.dat files, so produce
                 # a *.pq file and link with to *.dat file
                 try:
-                    os.symlink(pqFile, datFile)
+                    os.symlink(os.path.basename(pqFile), datFile)
                 except FileExistsError:
                     L.error(f"Symlink already exists for {datFile}")
         else:
@@ -888,7 +891,7 @@ def convert(self):
             echograms_attrs = self.extra_kwargs.get('echograms', {})
             enable_ascii = echograms_attrs.get('enable_ascii', False)
             enable_image = echograms_attrs.get('enable_image', False)
-            enable_debug = echograms_attrs.get('enable_debug', False)
+            enable_debug = self.extra_kwargs.get('enable_debug', False)
 
             if enable_debug:
                 pargs.remove('-q')

diff --git a/gutils/slocum/bin/convertDbds.sh b/gutils/slocum/bin/convertDbds.sh
@@ -425,7 +425,7 @@ do
 
     # dbdSource must have the ascii header line dbd_label: to be a valid *bd
     # file
-    is_dbd=$(grep 'dbd_label:' $dbdSource);
+    is_dbd=$(grep -a 'dbd_label:' $dbdSource);
     if [ -z "$is_dbd" ]
     then
         echo "Invalid flight source file: $dbdSource" >&2;
@@ -471,7 +471,7 @@ do
 
         # dbdSource must have the ascii header line dbd_label: to be a valid *bd
         # file
-        is_dbd=$(grep 'dbd_label:' $sciSource);
+        is_dbd=$(grep -a 'dbd_label:' $sciSource);
         if [ -z "$is_dbd" ]
         then
             echo "Invalid science source file: $sciSource" >&2;

diff --git a/gutils/slocum/echotools/teledyne.py b/gutils/slocum/echotools/teledyne.py
@@ -8,6 +8,7 @@
 import sys
 import struct
 import subprocess
+import warnings
 import numpy as np
 from matplotlib import pyplot as plt
 import matplotlib as mpl
@@ -23,6 +24,21 @@
 logging.getLogger("dbdreader").setLevel(logging.WARNING)
 logging.getLogger("matplotlib").setLevel(logging.WARNING)
 
+# Default
+if os.environ.get('PYTHONWARNINGS', None):
+    pass
+else:
+    warnings.simplefilter("default")
+
+# Development
+#warnings.resetwarnings()
+#warnings.filterwarnings("error")
+
+# netcdf4 (obscure error)
+warnings.filterwarnings("ignore", message='numpy.*size changed.*')
+# TODO: Plotting breaks down when there are multiple profiles
+warnings.filterwarnings("ignore", message='.*coordinates to pcolormesh.*')
+
 
 class Glider:
     '''
@@ -293,7 +309,7 @@ def createFileInventory(self, fileList, cache_dir):
                 plabel = f"PATH{ct:04d}"
                 self.data['inventory_paths'][abspath] = plabel
 
-            dbdFp = dbdreader.DBD(infile, cacheDir=cache_dir)
+            dbdFp = dbdreader.DBD(infile, cacheDir=cache_dir, skip_initial_line=False)
             dbdData = dbdFp.get(*dbdFp.parameterNames, return_nans=True)
             cacheFile = f"{dbdFp.cacheID}.cac"
 
@@ -365,8 +381,8 @@ def filterFiles(self, start_time=None, end_time=None):
         # Convert 0000-00-00 00:00:00 to nan
         mask = ds['End'] == '0000-00-00 00:00:00'
         ds['End'][mask] = np.nan
-        ds['Start_dt'] = ds['Start'].astype('datetime64', errors='ignore')
-        ds['End_dt'] = ds['End'].astype('datetime64', errors='ignore')
+        ds['Start_dt'] = ds['Start'].astype('datetime64[ns]', errors='ignore')
+        ds['End_dt'] = ds['End'].astype('datetime64[ns]', errors='ignore')
 
         ds_start_time = pd.to_datetime(start_time).to_datetime64()
         ds_end_time = pd.to_datetime(end_time).to_datetime64()
@@ -420,9 +436,9 @@ def filterFiles(self, start_time=None, end_time=None):
         # and pull through files that match the base file for
         # completeness.
         for gkey in groupList.keys():
-            match = "%s\\." % (os.path.basename(gkey))
-            ss1 = ds.loc[ds['File'].str.contains(match)]
-            ss2 = df.loc[df['File'].str.contains(match)]
+            match = "%s." % (os.path.basename(gkey))
+            ss1 = ds.loc[ds['File'].str.contains(match, regex=False)]
+            ss2 = df.loc[df['File'].str.contains(match, regex=False)]
             df = pd.concat([df, ss1, ss2]).drop_duplicates()
         inv.data['inventory'] = df
         fileList = inv.getFullFilenamesFromFileInventory()
@@ -639,6 +655,7 @@ def loadFileInventory(self, fname):
                     df = pd.concat([df, pd.Series(rec).to_frame().T], ignore_index=True)
 
         self.data['inventory'] = df
+        fn.close()
 
         return
 
@@ -662,7 +679,8 @@ def loadMetadata(self):
         # Attempt to read echotools.json configuration file
         try:
             echotoolsFile = os.path.join(self.args['deploymentDir'], 'echotools.json')
-            testLoad = json.load(open(echotoolsFile))
+            with open(echotoolsFile) as fp:
+                testLoad = json.load(fp)
             self.echotools = testLoad
         except Exception:
             print("WARNING: Unable to parse json echotools file: %s" % (echotoolsFile))
@@ -671,7 +689,8 @@ def loadMetadata(self):
         # Attempt to read deployment.json
         try:
             deploymentFile = os.path.join(self.args['deploymentDir'], 'deployment.json')
-            testLoad = json.load(open(deploymentFile))
+            with open(deploymentFile, "r") as fp:
+                testLoad = json.load(fp)
             self.deployment = testLoad
         except Exception:
             print("ERROR: Unable to parse json deployment file: %s" % (deploymentFile))
@@ -680,7 +699,8 @@ def loadMetadata(self):
         # Attempt to read instruments.json
         try:
             instrumentsFile = os.path.join(self.args['deploymentDir'], 'instruments.json')
-            testLoad = json.load(open(instrumentsFile))
+            with open(instrumentsFile, "r") as fp:
+                testLoad = json.load(fp)
             self.instruments = testLoad
         except Exception as err:
             print(f"ERROR: Unable to parse json instruments file: {instrumentsFile} {err=}")
@@ -699,7 +719,8 @@ def loadMetadata(self):
             # Attempt to read <template>.json
             try:
                 templateFile = os.path.join(self.args['templateDir'], self.args['template'])
-                testLoad = json.load(open(templateFile))
+                with open(templateFile, "r") as fp:
+                    testLoad = json.load(fp)
                 self.template = testLoad
             except Exception:
                 print("ERROR: Unable to parse json template file: %s" % (templateFile))
@@ -710,7 +731,8 @@ def loadMetadata(self):
             # Attempt to read <dacOverlay>.json
             try:
                 dacOverlayFile = os.path.join(self.args['deploymentDir'], self.args['dacOverlay'])
-                testLoad = json.load(open(dacOverlayFile))
+                with open(dacOverlayFile) as fp:
+                    testLoad = json.load(fp)
                 self.dacOverlay = testLoad
             except Exception:
                 print("ERROR: Unable to parse json DAC overlay metadata file: %s" % (dacOverlayFile))
@@ -1138,7 +1160,7 @@ def handleImage(self):
 
         # Default colorbar ylabel and size
         #default_cb_ylabel = r'Sv (dB re 1 $\bf{m^2}$/$\bf{m^3}$)'
-        default_cb_ylabel = r'$\bf{Sv}$ $\bf{(dB}$ $\bf{re}$ $\bf{1}$ $\bf{m^2}$/$\bf{m^3}$$\bf{)}$'
+        default_cb_ylabel = r"$\bf{Sv}$ $\bf{(dB}$ $\bf{re}$ $\bf{1}$ $\bf{m^2}$/$\bf{m^3}$$\bf{)}$"
         default_cb_shrink = 0.60
 
         # Default plot parameters
@@ -1308,7 +1330,11 @@ def handleImage(self):
                 # Color bar using vmin,vmax
                 #cx = fig.colorbar(px, ticks=dB_ticks)
                 #cx = fig.colorbar(px, shrink=default_cb_shrink)
-                cx = fig.colorbar(mpl.cm.ScalarMappable(norm=norm, cmap=cmap), shrink=default_cb_shrink)
+                cx = fig.colorbar(
+                    mpl.cm.ScalarMappable(norm=norm, cmap=cmap),
+                    ax=plt.gca(),
+                    shrink=default_cb_shrink
+                )
                 cx.ax.get_yaxis().labelpad = 15
                 cx.ax.set_ylabel(default_cb_ylabel)
 
@@ -1376,6 +1402,7 @@ def handleImage(self):
                 #cbar = plt.colorbar(orientation='vertical', label=default_cb_ylabel, shrink=default_cb_shrink)
                 plt.colorbar(
                     mpl.cm.ScalarMappable(norm=norm, cmap=cmap),
+                    ax=plt.gca(),
                     orientation='vertical', label=default_cb_ylabel, shrink=default_cb_shrink)
                 plt.gca().invert_yaxis()
                 plt.ylabel('depth (m)')
@@ -1429,6 +1456,8 @@ def handleImage(self):
                 if len(plotDataShape) == 1:
                     if self.debugFlag:
                         print("WARNING: Not enough pings to produce binned plot.")
+
+                    plt.close()
                     return
                 #breakpoint()
                 plt.imshow(plotData, cmap=cmap, interpolation='none')
@@ -1513,6 +1542,7 @@ def handleImage(self):
             # Determine if we are writing to stdout
             if stdoutFlag:
                 plt.savefig(sys.stdout.buffer, dpi=100)
+                plt.close()
             else:
                 # Plot image
                 if self.args['outDir']:
@@ -1589,8 +1619,8 @@ def createEchogramSpreadsheet(self):
                 source_depth_data, columns=['m_present_time', 'm_depth'],
                 ignoreNaNColumns=['m_depth'])
             if barData is not None:
-                #self.stopToDebug()
-                barData = np.append(barData, depthData, axis=0)
+                if depthData is not None:
+                    barData = np.append(barData, depthData, axis=0)
             else:
                 barData = depthData
             #self.stopToDebug()
@@ -1626,8 +1656,10 @@ def createEchogramSpreadsheet(self):
         depthBinLength = echogramRange / numberDepthBins
 
         # Determine time range to plot based on depthTimes
-        if len(depthTimes) == 0:
-            print("WARNING: No usable depth information found!")
+        # Requires at least 3 depth coordinates.
+        if len(depthTimes) < 3:
+            if self.debugFlag:
+                print("WARNING: Insufficient depth information found!")
             self.data['depthBinLength'] = None
             self.data['timeBinLength'] = None
             self.data['spreadsheet'] = None
@@ -1739,6 +1771,38 @@ def createEchogramSpreadsheet(self):
 
         return
 
+    def mergeDuplicateValues(self, dbdData):
+        '''
+        This takes a given dbdData array and walks through all the data.
+        If a duplicate time entry is found, the duplicate row changes any
+        existing data.
+        '''
+
+        retData = []
+
+        for p in dbdData:
+            t = p[0]
+            v = p[1]
+            tt = []
+            vv = []
+
+            for i in range(0, len(t)):
+                if i == 0:
+                    tt.append(t[i])
+                    vv.append(v[i])
+                else:
+                    # Duplicate, existing values overwrite
+                    if t[i] == tt[-1]:
+                        if not np.isnan(v[i]):
+                            vv[-1] = v[i]
+                    else:
+                        tt.append(t[i])
+                        vv.append(v[i])
+
+            retData.append([np.array(tt), np.array(vv)])
+
+        return retData
+
     def readDbd(self, **kwargs):
         '''
         This function reads any DBD glider file using the dbdreader python library.
@@ -1793,7 +1857,7 @@ def readDbd(self, **kwargs):
         #dbdFp = dbdreader.DBD(inputFile, cacheDir=cacheDir)
         dbdFp = None
         try:
-            dbdFp = dbdreader.DBD(inputFile, cacheDir=cacheDir)
+            dbdFp = dbdreader.DBD(inputFile, cacheDir=cacheDir, skip_initial_line=False)
         except Exception:
             print("WARNING: Unable to read glider DBD file: %s" % (inputFile))
 
@@ -1832,6 +1896,14 @@ def readDbd(self, **kwargs):
                 dbdFp.close()
                 return
 
+            # Check for duplicate rows on the time axis and collapse first
+            uval, cval = np.unique(dbdData[dbdFp.parameterNames.index(timeDimension)][0], return_counts=True)
+            dval = uval[cval > 1]
+            if len(dval) > 0:
+                if self.debugFlag:
+                    print(f"Merging data for {inputFile}")
+                dbdData = self.mergeDuplicateValues(dbdData)
+
             timeIdx = dbdFp.parameterNames.index(timeDimension)
             timeLen = len(dbdData[timeIdx][1])
 
@@ -1863,7 +1935,8 @@ def readDbd(self, **kwargs):
                         tempVar = np.array([np.nan] * timeLen)
                         tempVar[0:len(dbdData[idx][1])] = dbdData[idx][1]
                         data = tempVar
-                        print("  Resized:", dbdFp.parameterNames[idx])
+                        if self.debugFlag:
+                            print("  Resized:", dbdFp.parameterNames[idx])
                     dataObj[p] = (("time"), data)
                     #self.stopToDebug()
                     collectedParameters.append(p)
@@ -1875,6 +1948,15 @@ def readDbd(self, **kwargs):
                     collectedParameters.append(p)
                     collectedUnits.append(dbdFp.parameterUnits[p])
 
+            # Detect duplicate data rows, usually two
+            # Row 1 information is overwritten by Row 2
+            #uval, cval = np.unique(dataObj[timeDimension], return_counts=True)
+            #dval = uval[cval > 1]
+            #if len(dval) > 0:
+            #    if self.debugFlag:
+            #        print(f"Merging data for {inputFile}")
+            #    dataObj = self.mergeDuplicateValues(dataObj, dval, timeDimension)
+
             # Final assignments into object .data object
             self.data[dbdType] = dataObj
             self.data['columns'][dbdType] = collectedParameters
@@ -2649,6 +2731,7 @@ def apply_thresholds(b):
 
         # Sync failed
         if not success:
+            vbsFP.close()
             return
 
         #print(tsMetric, data)
@@ -2731,6 +2814,8 @@ def apply_thresholds(b):
             if endOfFile:
                 break
 
+        vbsFP.close()
+
     def applyDeploymentGlobalMetadata(self, ncDS):
         '''
         This function applies the deployment.json global metadata to the xarray Dataset object.