Skip to content

Commit

Permalink
echotools/GUTILS sync
Browse files Browse the repository at this point in the history
 * __init__.py
   * SciPy 1.13.0 warnings
 * nc.py
   * Skip writing extras file for echograms that are short
 * slocum/__init__.py
   * enable deeper debugging
 * slocum/bin/convertDbds.sh
   * enforce grep to use ascii on binary files
 * slocum/echotools/teledyne.py
   * allow PYTHONWARNINGS to override coded warnings
   * utilize dbdreader skip_inital_line feature
   * numpy datetime conversion warning
   * flake8 file matching update
   * ensure file descriptors are released
   * merge duplicate time coordinate data from DBD
   * flake8 updates
  • Loading branch information
jr3cermak committed Jul 10, 2023
1 parent dca53e2 commit cdd8f41
Show file tree
Hide file tree
Showing 5 changed files with 132 additions and 27 deletions.
7 changes: 6 additions & 1 deletion gutils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,12 @@

import numpy as np
import pandas as pd
from scipy.signal import boxcar, convolve
try:
# SciPy 1.13.0 DepricationWarning
from scipy.signal.windows import boxcar
from scipy.signal import convolve
except ImportError:
from scipy.signal import boxcar, convolve

from pocean.meta import MetaInterface
from pocean.utils import (
Expand Down
18 changes: 15 additions & 3 deletions gutils/nc.py
Original file line number Diff line number Diff line change
Expand Up @@ -396,10 +396,13 @@ def create_netcdf(attrs, data, output_path, mode, profile_id_type=ProfileIdTypes

if not extras_df.empty:

# Skip extra flag
skip_extra = False

# Write the extras dimension to a new profile file
profile_extras = extras_df.loc[extras_df.profile == pi].copy()
if profile_extras.empty:
continue
skip_extra = True

# Standardize the columns of the "extras" from the matched profile
profile_extras.loc[:, 't'] = profile_extras.index
Expand All @@ -420,9 +423,18 @@ def create_netcdf(attrs, data, output_path, mode, profile_id_type=ProfileIdTypes
profile_extras.loc[:, c] = profile_extras[c].astype(profile[c].dtype)
"""

# If there is less than 3 time coordinates with an echogram, do not
# write out the profile.
if 'echogram_sv' in profile_extras.columns:
if 't' not in profile_extras.columns:
skip_extra = True
if len(pd.unique(profile_extras['t'])) < 3:
skip_extra = True

try:
cr = create_profile_netcdf(attrs, profile_extras, output_path, mode + '_extra', profile_id_type)
written_files.append(cr)
if not skip_extra:
cr = create_profile_netcdf(attrs, profile_extras, output_path, mode + '_extra', profile_id_type)
written_files.append(cr)
except BaseException:
L.exception('Error creating extra netCDF profile {}. Skipping.'.format(pi))
continue
Expand Down
7 changes: 5 additions & 2 deletions gutils/slocum/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -672,6 +672,9 @@ def slocum_binary_sorter(x):
# of a reader.
self.extra_kwargs = self.attrs.pop('extra_kwargs', {})

if self.extra_kwargs.get('enable_debug', False):
L.setLevel(logging.DEBUG)

def __del__(self):
# Remove tmpdir
shutil.rmtree(self.tmpdir, ignore_errors=True)
Expand Down Expand Up @@ -868,7 +871,7 @@ def convert(self):
# upstream expects *.dat files, so produce
# a *.pq file and link with to *.dat file
try:
os.symlink(pqFile, datFile)
os.symlink(os.path.basename(pqFile), datFile)
except FileExistsError:
L.error(f"Symlink already exists for {datFile}")
else:
Expand All @@ -888,7 +891,7 @@ def convert(self):
echograms_attrs = self.extra_kwargs.get('echograms', {})
enable_ascii = echograms_attrs.get('enable_ascii', False)
enable_image = echograms_attrs.get('enable_image', False)
enable_debug = echograms_attrs.get('enable_debug', False)
enable_debug = self.extra_kwargs.get('enable_debug', False)

if enable_debug:
pargs.remove('-q')
Expand Down
4 changes: 2 additions & 2 deletions gutils/slocum/bin/convertDbds.sh
Original file line number Diff line number Diff line change
Expand Up @@ -425,7 +425,7 @@ do

# dbdSource must have the ascii header line dbd_label: to be a valid *bd
# file
is_dbd=$(grep 'dbd_label:' $dbdSource);
is_dbd=$(grep -a 'dbd_label:' $dbdSource);
if [ -z "$is_dbd" ]
then
echo "Invalid flight source file: $dbdSource" >&2;
Expand Down Expand Up @@ -471,7 +471,7 @@ do

# dbdSource must have the ascii header line dbd_label: to be a valid *bd
# file
is_dbd=$(grep 'dbd_label:' $sciSource);
is_dbd=$(grep -a 'dbd_label:' $sciSource);
if [ -z "$is_dbd" ]
then
echo "Invalid science source file: $sciSource" >&2;
Expand Down
123 changes: 104 additions & 19 deletions gutils/slocum/echotools/teledyne.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import sys
import struct
import subprocess
import warnings
import numpy as np
from matplotlib import pyplot as plt
import matplotlib as mpl
Expand All @@ -23,6 +24,21 @@
logging.getLogger("dbdreader").setLevel(logging.WARNING)
logging.getLogger("matplotlib").setLevel(logging.WARNING)

# Default
if os.environ.get('PYTHONWARNINGS', None):
pass
else:
warnings.simplefilter("default")

# Development
#warnings.resetwarnings()
#warnings.filterwarnings("error")

# netcdf4 (obscure error)
warnings.filterwarnings("ignore", message='numpy.*size changed.*')
# TODO: Plotting breaks down when there are multiple profiles
warnings.filterwarnings("ignore", message='.*coordinates to pcolormesh.*')


class Glider:
'''
Expand Down Expand Up @@ -293,7 +309,7 @@ def createFileInventory(self, fileList, cache_dir):
plabel = f"PATH{ct:04d}"
self.data['inventory_paths'][abspath] = plabel

dbdFp = dbdreader.DBD(infile, cacheDir=cache_dir)
dbdFp = dbdreader.DBD(infile, cacheDir=cache_dir, skip_initial_line=False)
dbdData = dbdFp.get(*dbdFp.parameterNames, return_nans=True)
cacheFile = f"{dbdFp.cacheID}.cac"

Expand Down Expand Up @@ -365,8 +381,8 @@ def filterFiles(self, start_time=None, end_time=None):
# Convert 0000-00-00 00:00:00 to nan
mask = ds['End'] == '0000-00-00 00:00:00'
ds['End'][mask] = np.nan
ds['Start_dt'] = ds['Start'].astype('datetime64', errors='ignore')
ds['End_dt'] = ds['End'].astype('datetime64', errors='ignore')
ds['Start_dt'] = ds['Start'].astype('datetime64[ns]', errors='ignore')
ds['End_dt'] = ds['End'].astype('datetime64[ns]', errors='ignore')

ds_start_time = pd.to_datetime(start_time).to_datetime64()
ds_end_time = pd.to_datetime(end_time).to_datetime64()
Expand Down Expand Up @@ -420,9 +436,9 @@ def filterFiles(self, start_time=None, end_time=None):
# and pull through files that match the base file for
# completeness.
for gkey in groupList.keys():
match = "%s\\." % (os.path.basename(gkey))
ss1 = ds.loc[ds['File'].str.contains(match)]
ss2 = df.loc[df['File'].str.contains(match)]
match = "%s." % (os.path.basename(gkey))
ss1 = ds.loc[ds['File'].str.contains(match, regex=False)]
ss2 = df.loc[df['File'].str.contains(match, regex=False)]
df = pd.concat([df, ss1, ss2]).drop_duplicates()
inv.data['inventory'] = df
fileList = inv.getFullFilenamesFromFileInventory()
Expand Down Expand Up @@ -639,6 +655,7 @@ def loadFileInventory(self, fname):
df = pd.concat([df, pd.Series(rec).to_frame().T], ignore_index=True)

self.data['inventory'] = df
fn.close()

return

Expand All @@ -662,7 +679,8 @@ def loadMetadata(self):
# Attempt to read echotools.json configuration file
try:
echotoolsFile = os.path.join(self.args['deploymentDir'], 'echotools.json')
testLoad = json.load(open(echotoolsFile))
with open(echotoolsFile) as fp:
testLoad = json.load(fp)
self.echotools = testLoad
except Exception:
print("WARNING: Unable to parse json echotools file: %s" % (echotoolsFile))
Expand All @@ -671,7 +689,8 @@ def loadMetadata(self):
# Attempt to read deployment.json
try:
deploymentFile = os.path.join(self.args['deploymentDir'], 'deployment.json')
testLoad = json.load(open(deploymentFile))
with open(deploymentFile, "r") as fp:
testLoad = json.load(fp)
self.deployment = testLoad
except Exception:
print("ERROR: Unable to parse json deployment file: %s" % (deploymentFile))
Expand All @@ -680,7 +699,8 @@ def loadMetadata(self):
# Attempt to read instruments.json
try:
instrumentsFile = os.path.join(self.args['deploymentDir'], 'instruments.json')
testLoad = json.load(open(instrumentsFile))
with open(instrumentsFile, "r") as fp:
testLoad = json.load(fp)
self.instruments = testLoad
except Exception as err:
print(f"ERROR: Unable to parse json instruments file: {instrumentsFile} {err=}")
Expand All @@ -699,7 +719,8 @@ def loadMetadata(self):
# Attempt to read <template>.json
try:
templateFile = os.path.join(self.args['templateDir'], self.args['template'])
testLoad = json.load(open(templateFile))
with open(templateFile, "r") as fp:
testLoad = json.load(fp)
self.template = testLoad
except Exception:
print("ERROR: Unable to parse json template file: %s" % (templateFile))
Expand All @@ -710,7 +731,8 @@ def loadMetadata(self):
# Attempt to read <dacOverlay>.json
try:
dacOverlayFile = os.path.join(self.args['deploymentDir'], self.args['dacOverlay'])
testLoad = json.load(open(dacOverlayFile))
with open(dacOverlayFile) as fp:
testLoad = json.load(fp)
self.dacOverlay = testLoad
except Exception:
print("ERROR: Unable to parse json DAC overlay metadata file: %s" % (dacOverlayFile))
Expand Down Expand Up @@ -1138,7 +1160,7 @@ def handleImage(self):

# Default colorbar ylabel and size
#default_cb_ylabel = r'Sv (dB re 1 $\bf{m^2}$/$\bf{m^3}$)'
default_cb_ylabel = r'$\bf{Sv}$ $\bf{(dB}$ $\bf{re}$ $\bf{1}$ $\bf{m^2}$/$\bf{m^3}$$\bf{)}$'
default_cb_ylabel = r"$\bf{Sv}$ $\bf{(dB}$ $\bf{re}$ $\bf{1}$ $\bf{m^2}$/$\bf{m^3}$$\bf{)}$"
default_cb_shrink = 0.60

# Default plot parameters
Expand Down Expand Up @@ -1308,7 +1330,11 @@ def handleImage(self):
# Color bar using vmin,vmax
#cx = fig.colorbar(px, ticks=dB_ticks)
#cx = fig.colorbar(px, shrink=default_cb_shrink)
cx = fig.colorbar(mpl.cm.ScalarMappable(norm=norm, cmap=cmap), shrink=default_cb_shrink)
cx = fig.colorbar(
mpl.cm.ScalarMappable(norm=norm, cmap=cmap),
ax=plt.gca(),
shrink=default_cb_shrink
)
cx.ax.get_yaxis().labelpad = 15
cx.ax.set_ylabel(default_cb_ylabel)

Expand Down Expand Up @@ -1376,6 +1402,7 @@ def handleImage(self):
#cbar = plt.colorbar(orientation='vertical', label=default_cb_ylabel, shrink=default_cb_shrink)
plt.colorbar(
mpl.cm.ScalarMappable(norm=norm, cmap=cmap),
ax=plt.gca(),
orientation='vertical', label=default_cb_ylabel, shrink=default_cb_shrink)
plt.gca().invert_yaxis()
plt.ylabel('depth (m)')
Expand Down Expand Up @@ -1429,6 +1456,8 @@ def handleImage(self):
if len(plotDataShape) == 1:
if self.debugFlag:
print("WARNING: Not enough pings to produce binned plot.")

plt.close()
return
#breakpoint()
plt.imshow(plotData, cmap=cmap, interpolation='none')
Expand Down Expand Up @@ -1513,6 +1542,7 @@ def handleImage(self):
# Determine if we are writing to stdout
if stdoutFlag:
plt.savefig(sys.stdout.buffer, dpi=100)
plt.close()
else:
# Plot image
if self.args['outDir']:
Expand Down Expand Up @@ -1589,8 +1619,8 @@ def createEchogramSpreadsheet(self):
source_depth_data, columns=['m_present_time', 'm_depth'],
ignoreNaNColumns=['m_depth'])
if barData is not None:
#self.stopToDebug()
barData = np.append(barData, depthData, axis=0)
if depthData is not None:
barData = np.append(barData, depthData, axis=0)
else:
barData = depthData
#self.stopToDebug()
Expand Down Expand Up @@ -1626,8 +1656,10 @@ def createEchogramSpreadsheet(self):
depthBinLength = echogramRange / numberDepthBins

# Determine time range to plot based on depthTimes
if len(depthTimes) == 0:
print("WARNING: No usable depth information found!")
# Requires at least 3 depth coordinates.
if len(depthTimes) < 3:
if self.debugFlag:
print("WARNING: Insufficient depth information found!")
self.data['depthBinLength'] = None
self.data['timeBinLength'] = None
self.data['spreadsheet'] = None
Expand Down Expand Up @@ -1739,6 +1771,38 @@ def createEchogramSpreadsheet(self):

return

def mergeDuplicateValues(self, dbdData):
'''
This takes a given dbdData array and walks through all the data.
If a duplicate time entry is found, the duplicate row changes any
existing data.
'''

retData = []

for p in dbdData:
t = p[0]
v = p[1]
tt = []
vv = []

for i in range(0, len(t)):
if i == 0:
tt.append(t[i])
vv.append(v[i])
else:
# Duplicate, existing values overwrite
if t[i] == tt[-1]:
if not np.isnan(v[i]):
vv[-1] = v[i]
else:
tt.append(t[i])
vv.append(v[i])

retData.append([np.array(tt), np.array(vv)])

return retData

def readDbd(self, **kwargs):
'''
This function reads any DBD glider file using the dbdreader python library.
Expand Down Expand Up @@ -1793,7 +1857,7 @@ def readDbd(self, **kwargs):
#dbdFp = dbdreader.DBD(inputFile, cacheDir=cacheDir)
dbdFp = None
try:
dbdFp = dbdreader.DBD(inputFile, cacheDir=cacheDir)
dbdFp = dbdreader.DBD(inputFile, cacheDir=cacheDir, skip_initial_line=False)
except Exception:
print("WARNING: Unable to read glider DBD file: %s" % (inputFile))

Expand Down Expand Up @@ -1832,6 +1896,14 @@ def readDbd(self, **kwargs):
dbdFp.close()
return

# Check for duplicate rows on the time axis and collapse first
uval, cval = np.unique(dbdData[dbdFp.parameterNames.index(timeDimension)][0], return_counts=True)
dval = uval[cval > 1]
if len(dval) > 0:
if self.debugFlag:
print(f"Merging data for {inputFile}")
dbdData = self.mergeDuplicateValues(dbdData)

timeIdx = dbdFp.parameterNames.index(timeDimension)
timeLen = len(dbdData[timeIdx][1])

Expand Down Expand Up @@ -1863,7 +1935,8 @@ def readDbd(self, **kwargs):
tempVar = np.array([np.nan] * timeLen)
tempVar[0:len(dbdData[idx][1])] = dbdData[idx][1]
data = tempVar
print(" Resized:", dbdFp.parameterNames[idx])
if self.debugFlag:
print(" Resized:", dbdFp.parameterNames[idx])
dataObj[p] = (("time"), data)
#self.stopToDebug()
collectedParameters.append(p)
Expand All @@ -1875,6 +1948,15 @@ def readDbd(self, **kwargs):
collectedParameters.append(p)
collectedUnits.append(dbdFp.parameterUnits[p])

# Detect duplicate data rows, usually two
# Row 1 information is overwritten by Row 2
#uval, cval = np.unique(dataObj[timeDimension], return_counts=True)
#dval = uval[cval > 1]
#if len(dval) > 0:
# if self.debugFlag:
# print(f"Merging data for {inputFile}")
# dataObj = self.mergeDuplicateValues(dataObj, dval, timeDimension)

# Final assignments into object .data object
self.data[dbdType] = dataObj
self.data['columns'][dbdType] = collectedParameters
Expand Down Expand Up @@ -2649,6 +2731,7 @@ def apply_thresholds(b):

# Sync failed
if not success:
vbsFP.close()
return

#print(tsMetric, data)
Expand Down Expand Up @@ -2731,6 +2814,8 @@ def apply_thresholds(b):
if endOfFile:
break

vbsFP.close()

def applyDeploymentGlobalMetadata(self, ncDS):
'''
This function applies the deployment.json global metadata to the xarray Dataset object.
Expand Down

0 comments on commit cdd8f41

Please sign in to comment.