diff --git a/notebooks/CODA-database-sandbox.ipynb b/notebooks/CODA-database-sandbox.ipynb index b1a55fc..c546201 100644 --- a/notebooks/CODA-database-sandbox.ipynb +++ b/notebooks/CODA-database-sandbox.ipynb @@ -34,7 +34,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "id": "b8fdfb1c-d68b-4693-938a-028037db64b7", "metadata": {}, "outputs": [ @@ -42,21 +42,20 @@ "name": "stdout", "output_type": "stream", "text": [ - "the platform we are working on is HPC with hostname: gadi-cpu-bdw-0084.gadi.nci.org.au\n" + "the platform we are working on is HPC with hostname: gadi-cpu-bdw-0760.gadi.nci.org.au\n" ] } ], "source": [ "import os\n", "import socket\n", - "\n", + " \n", "def get_platform():\n", " hostname = socket.gethostname()\n", " if \"gadi\" in hostname: # Adjust this condition to fit your HPC's hostname or unique identifier\n", " return 'HPC',hostname\n", " else:\n", " return 'Laptop',hostname\n", - " \n", "[platform,hostname] = get_platform()\n", "print('the platform we are working on is '+platform+' with hostname: '+hostname)" ] @@ -71,7 +70,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "id": "61c387ac-757c-488b-b34b-50c6b2389578", "metadata": {}, "outputs": [], @@ -83,7 +82,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 4, "id": "d248ad4e-3a28-4610-b4ef-56191d13da4f", "metadata": {}, "outputs": [ @@ -94,7 +93,7 @@ "
\n", "
\n", "

Client

\n", - "

Client-9854c9ed-73f2-11ef-aa11-000007c5fe80

\n", + "

Client-e020c82f-756e-11ef-adb9-000007b2fe80

\n", " \n", "\n", " \n", @@ -129,7 +128,7 @@ " \n", "
\n", "

LocalCluster

\n", - "

c832da56

\n", + "

3cf8427d

\n", "
\n", " \n", "
\n", @@ -166,11 +165,11 @@ "
\n", "
\n", "

Scheduler

\n", - "

Scheduler-3e9f3fd9-8ca7-44d5-8e8c-cbb85640f6a6

\n", + "

Scheduler-33f0b421-14b7-44db-97f5-ec66fa5f9443

\n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", "
\n", - " Comm: tcp://127.0.0.1:36139\n", + " Comm: tcp://127.0.0.1:34431\n", " \n", " Workers: 7\n", @@ -212,7 +211,7 @@ " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", @@ -257,7 +256,7 @@ "
\n", - " Comm: tcp://127.0.0.1:45293\n", + " Comm: tcp://127.0.0.1:33447\n", " \n", " Total threads: 4\n", @@ -220,7 +219,7 @@ "
\n", - " Dashboard: /proxy/40377/status\n", + " Dashboard: /proxy/33871/status\n", " \n", " Memory: 35.88 GiB\n", @@ -228,13 +227,13 @@ "
\n", - " Nanny: tcp://127.0.0.1:39775\n", + " Nanny: tcp://127.0.0.1:36435\n", "
\n", - " Local directory: /jobfs/124768528.gadi-pbs/dask-scratch-space/worker-ni5_ej3q\n", + " Local directory: /jobfs/124913893.gadi-pbs/dask-scratch-space/worker-ou1nx43j\n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", @@ -302,7 +301,7 @@ "
\n", - " Comm: tcp://127.0.0.1:45359\n", + " Comm: tcp://127.0.0.1:43641\n", " \n", " Total threads: 4\n", @@ -265,7 +264,7 @@ "
\n", - " Dashboard: /proxy/39511/status\n", + " Dashboard: /proxy/41611/status\n", " \n", " Memory: 35.88 GiB\n", @@ -273,13 +272,13 @@ "
\n", - " Nanny: tcp://127.0.0.1:37841\n", + " Nanny: tcp://127.0.0.1:39373\n", "
\n", - " Local directory: /jobfs/124768528.gadi-pbs/dask-scratch-space/worker-fiw8xirx\n", + " Local directory: /jobfs/124913893.gadi-pbs/dask-scratch-space/worker-qwhl1i4s\n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", @@ -347,7 +346,7 @@ "
\n", - " Comm: tcp://127.0.0.1:38833\n", + " Comm: tcp://127.0.0.1:34947\n", " \n", " Total threads: 4\n", @@ -310,7 +309,7 @@ "
\n", - " Dashboard: /proxy/37037/status\n", + " Dashboard: /proxy/37605/status\n", " \n", " Memory: 35.88 GiB\n", @@ -318,13 +317,13 @@ "
\n", - " Nanny: tcp://127.0.0.1:33625\n", + " Nanny: tcp://127.0.0.1:38353\n", "
\n", - " Local directory: /jobfs/124768528.gadi-pbs/dask-scratch-space/worker-htii7iy7\n", + " Local directory: /jobfs/124913893.gadi-pbs/dask-scratch-space/worker-aovf5c_l\n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", @@ -392,7 +391,7 @@ "
\n", - " Comm: tcp://127.0.0.1:33277\n", + " Comm: tcp://127.0.0.1:39111\n", " \n", " Total threads: 4\n", @@ -355,7 +354,7 @@ "
\n", - " Dashboard: /proxy/39877/status\n", + " Dashboard: /proxy/34339/status\n", " \n", " Memory: 35.88 GiB\n", @@ -363,13 +362,13 @@ "
\n", - " Nanny: tcp://127.0.0.1:38301\n", + " Nanny: tcp://127.0.0.1:43993\n", "
\n", - " Local directory: /jobfs/124768528.gadi-pbs/dask-scratch-space/worker-9a2favk7\n", + " Local directory: /jobfs/124913893.gadi-pbs/dask-scratch-space/worker-rfhvo1b4\n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", @@ -437,7 +436,7 @@ "
\n", - " Comm: tcp://127.0.0.1:44693\n", + " Comm: tcp://127.0.0.1:38337\n", " \n", " Total threads: 4\n", @@ -400,7 +399,7 @@ "
\n", - " Dashboard: /proxy/36771/status\n", + " Dashboard: /proxy/36309/status\n", " \n", " Memory: 35.88 GiB\n", @@ -408,13 +407,13 @@ "
\n", - " Nanny: tcp://127.0.0.1:39299\n", + " Nanny: tcp://127.0.0.1:40805\n", "
\n", - " Local directory: /jobfs/124768528.gadi-pbs/dask-scratch-space/worker-xhinlsct\n", + " Local directory: /jobfs/124913893.gadi-pbs/dask-scratch-space/worker-fnqkd4h0\n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", @@ -482,7 +481,7 @@ "
\n", - " Comm: tcp://127.0.0.1:46701\n", + " Comm: tcp://127.0.0.1:39843\n", " \n", " Total threads: 4\n", @@ -445,7 +444,7 @@ "
\n", - " Dashboard: /proxy/42775/status\n", + " Dashboard: /proxy/40125/status\n", " \n", " Memory: 35.88 GiB\n", @@ -453,13 +452,13 @@ "
\n", - " Nanny: tcp://127.0.0.1:37993\n", + " Nanny: tcp://127.0.0.1:43003\n", "
\n", - " Local directory: /jobfs/124768528.gadi-pbs/dask-scratch-space/worker-esxbpjcw\n", + " Local directory: /jobfs/124913893.gadi-pbs/dask-scratch-space/worker-admp5xxw\n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", @@ -531,32 +530,12 @@ "" ], "text/plain": [ - "" + "" ] }, - "execution_count": 3, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2024-09-16 17:11:36,503 - distributed.scheduler - WARNING - Received heartbeat from unregistered worker 'tcp://127.0.0.1:33415'.\n", - "2024-09-16 17:11:36,505 - distributed.scheduler - WARNING - Received heartbeat from unregistered worker 'tcp://127.0.0.1:38833'.\n", - "2024-09-16 17:11:36,506 - distributed.scheduler - WARNING - Received heartbeat from unregistered worker 'tcp://127.0.0.1:45359'.\n", - "2024-09-16 17:11:36,508 - distributed.scheduler - WARNING - Received heartbeat from unregistered worker 'tcp://127.0.0.1:33277'.\n", - "2024-09-16 17:11:36,509 - distributed.scheduler - WARNING - Received heartbeat from unregistered worker 'tcp://127.0.0.1:46701'.\n", - "2024-09-16 17:11:36,512 - distributed.scheduler - WARNING - Received heartbeat from unregistered worker 'tcp://127.0.0.1:45293'.\n", - "2024-09-16 17:11:36,514 - distributed.scheduler - WARNING - Received heartbeat from unregistered worker 'tcp://127.0.0.1:44693'.\n", - "2024-09-16 17:11:37,760 - distributed.nanny - WARNING - Restarting worker\n", - "2024-09-16 17:11:37,826 - distributed.nanny - WARNING - Restarting worker\n", - "2024-09-16 17:11:37,830 - distributed.nanny - WARNING - Restarting worker\n", - "2024-09-16 17:11:37,837 - distributed.nanny - WARNING - Restarting worker\n", - "2024-09-16 17:11:37,846 - distributed.nanny - WARNING - Restarting worker\n", - "2024-09-16 17:11:37,854 - distributed.nanny - WARNING - Restarting worker\n", - "2024-09-16 17:11:37,861 - distributed.nanny - WARNING - Restarting worker\n" - ] } ], "source": [ @@ -594,7 +573,31 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, + "id": "f7670731-edad-47aa-8cb9-916f184b229c", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "14416ea0-4723-4480-9aa2-d966069134f3", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "622648ff-84c7-4d52-b223-144486a91aef", + "metadata": {}, + "source": [ + "# build PQ & CSV versions from NetCDF" + ] + }, + { + "cell_type": "code", + "execution_count": null, "id": "4aee8e81-0b29-4949-9849-855f253de129", "metadata": {}, "outputs": [], @@ -604,5162 +607,100 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "id": "b51d9673-5b4c-4763-b1e8-315e2f7a9204", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
<xarray.Dataset> Size: 5GB\n",
-       "Dimensions:                 (cast: 64100, z_index: 1005)\n",
-       "Coordinates:\n",
-       "  * cast                    (cast) int64 513kB 1 2 3 4 ... 64098 64099 64100\n",
-       "    time                    (cast) datetime64[ns] 513kB dask.array<chunksize=(64100,), meta=np.ndarray>\n",
-       "    lat                     (cast) float32 256kB dask.array<chunksize=(64100,), meta=np.ndarray>\n",
-       "    lon                     (cast) float32 256kB dask.array<chunksize=(64100,), meta=np.ndarray>\n",
-       "  * z_index                 (z_index) int64 8kB 0 1 2 3 ... 1001 1002 1003 1004\n",
-       "    z                       (cast, z_index) float32 258MB dask.array<chunksize=(46280, 725), meta=np.ndarray>\n",
-       "Data variables: (12/29)\n",
-       "    WOD_id                  (cast) int32 256kB dask.array<chunksize=(64100,), meta=np.ndarray>\n",
-       "    origflagset             (cast) |S100 6MB dask.array<chunksize=(64100,), meta=np.ndarray>\n",
-       "    country                 (cast) |S100 6MB dask.array<chunksize=(64100,), meta=np.ndarray>\n",
-       "    dataset                 (cast) |S100 6MB dask.array<chunksize=(64100,), meta=np.ndarray>\n",
-       "    Access_no               (cast) float64 513kB dask.array<chunksize=(64100,), meta=np.ndarray>\n",
-       "    Recorder                (cast) |S100 6MB dask.array<chunksize=(64100,), meta=np.ndarray>\n",
-       "    ...                      ...\n",
-       "    Chlorophyll_WODflag     (cast, z_index) float64 515MB dask.array<chunksize=(32766, 512), meta=np.ndarray>\n",
-       "    Chlorophyll_origflag    (cast, z_index) float32 258MB dask.array<chunksize=(46280, 725), meta=np.ndarray>\n",
-       "    z_WODflag               (cast, z_index) float64 515MB dask.array<chunksize=(32766, 512), meta=np.ndarray>\n",
-       "    z_origflag              (cast, z_index) float32 258MB dask.array<chunksize=(46280, 725), meta=np.ndarray>\n",
-       "    crs                     int32 4B ...\n",
-       "    CODA_id                 (cast) |S20 1MB dask.array<chunksize=(64100,), meta=np.ndarray>\n",
-       "Attributes:\n",
-       "    Parent ragged array file:  wod_pfl_2005.nc
\n", - " Comm: tcp://127.0.0.1:33415\n", + " Comm: tcp://127.0.0.1:44797\n", " \n", " Total threads: 4\n", @@ -490,7 +489,7 @@ "
\n", - " Dashboard: /proxy/37011/status\n", + " Dashboard: /proxy/38231/status\n", " \n", " Memory: 35.88 GiB\n", @@ -498,13 +497,13 @@ "
\n", - " Nanny: tcp://127.0.0.1:38091\n", + " Nanny: tcp://127.0.0.1:35455\n", "
\n", - " Local directory: /jobfs/124768528.gadi-pbs/dask-scratch-space/worker-w9gtc9ig\n", + " Local directory: /jobfs/124913893.gadi-pbs/dask-scratch-space/worker-4ng3uasu\n", "
\n", - " \n", - " \n", - " \n", - " \n", - "
\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Array Chunk
Bytes 500.78 kiB 500.78 kiB
Shape (64100,) (64100,)
Dask graph 1 chunks in 2 graph layers
Data type datetime64[ns] numpy.ndarray
\n", - "
\n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - "\n", - " \n", - " 64100\n", - " 1\n", - "\n", - "
  • lat
    (cast)
    float32
    dask.array<chunksize=(64100,), meta=np.ndarray>
    standard_name :
    latitude
    long_name :
    latitude
    units :
    degrees_north
    \n", - " \n", - " \n", - " \n", - " \n", - "
    \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
    Array Chunk
    Bytes 250.39 kiB 250.39 kiB
    Shape (64100,) (64100,)
    Dask graph 1 chunks in 2 graph layers
    Data type float32 numpy.ndarray
    \n", - "
    \n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - "\n", - " \n", - " 64100\n", - " 1\n", - "\n", - "
  • lon
    (cast)
    float32
    dask.array<chunksize=(64100,), meta=np.ndarray>
    standard_name :
    longitude
    long_name :
    longitude
    units :
    degrees_east
    \n", - " \n", - " \n", - " \n", - " \n", - "
    \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
    Array Chunk
    Bytes 250.39 kiB 250.39 kiB
    Shape (64100,) (64100,)
    Dask graph 1 chunks in 2 graph layers
    Data type float32 numpy.ndarray
    \n", - "
    \n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - "\n", - " \n", - " 64100\n", - " 1\n", - "\n", - "
  • z_index
    (z_index)
    int64
    0 1 2 3 4 ... 1001 1002 1003 1004
    array([   0,    1,    2, ..., 1002, 1003, 1004])
  • z
    (cast, z_index)
    float32
    dask.array<chunksize=(46280, 725), meta=np.ndarray>
    standard_name :
    depth
    long_name :
    depth_below_sea_surface
    units :
    m
    positive :
    down
    ancillary_variables :
    z_sigfigs z_WODflag z_origflag
    \n", - " \n", - " \n", - " \n", - " \n", - "
    \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
    Array Chunk
    Bytes 245.74 MiB 127.99 MiB
    Shape (64100, 1005) (46280, 725)
    Dask graph 4 chunks in 2 graph layers
    Data type float32 numpy.ndarray
    \n", - "
    \n", - " \n", - "\n", - " \n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - "\n", - " \n", - " 1005\n", - " 64100\n", - "\n", - "
    • WOD_id
      (cast)
      int32
      dask.array<chunksize=(64100,), meta=np.ndarray>
      cf_role :
      profile_id
      \n", - " \n", - " \n", - " \n", - " \n", - "
      \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
      Array Chunk
      Bytes 250.39 kiB 250.39 kiB
      Shape (64100,) (64100,)
      Dask graph 1 chunks in 2 graph layers
      Data type int32 numpy.ndarray
      \n", - "
      \n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - "\n", - " \n", - " 64100\n", - " 1\n", - "\n", - "
    • origflagset
      (cast)
      |S100
      dask.array<chunksize=(64100,), meta=np.ndarray>
      comment :
      set of originators flag codes to use
      \n", - " \n", - " \n", - " \n", - " \n", - "
      \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
      Array Chunk
      Bytes 6.11 MiB 6.11 MiB
      Shape (64100,) (64100,)
      Dask graph 1 chunks in 2 graph layers
      Data type |S100 numpy.ndarray
      \n", - "
      \n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - "\n", - " \n", - " 64100\n", - " 1\n", - "\n", - "
    • country
      (cast)
      |S100
      dask.array<chunksize=(64100,), meta=np.ndarray>
      \n", - " \n", - " \n", - " \n", - " \n", - "
      \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
      Array Chunk
      Bytes 6.11 MiB 6.11 MiB
      Shape (64100,) (64100,)
      Dask graph 1 chunks in 2 graph layers
      Data type |S100 numpy.ndarray
      \n", - "
      \n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - "\n", - " \n", - " 64100\n", - " 1\n", - "\n", - "
    • dataset
      (cast)
      |S100
      dask.array<chunksize=(64100,), meta=np.ndarray>
      long_name :
      WOD_dataset
      \n", - " \n", - " \n", - " \n", - " \n", - "
      \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
      Array Chunk
      Bytes 6.11 MiB 6.11 MiB
      Shape (64100,) (64100,)
      Dask graph 1 chunks in 2 graph layers
      Data type |S100 numpy.ndarray
      \n", - "
      \n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - "\n", - " \n", - " 64100\n", - " 1\n", - "\n", - "
    • Access_no
      (cast)
      float64
      dask.array<chunksize=(64100,), meta=np.ndarray>
      long_name :
      NODC_accession_number
      units_wod :
      NODC_code
      comment :
      used to find original data at NODC
      \n", - " \n", - " \n", - " \n", - " \n", - "
      \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
      Array Chunk
      Bytes 500.78 kiB 500.78 kiB
      Shape (64100,) (64100,)
      Dask graph 1 chunks in 2 graph layers
      Data type float64 numpy.ndarray
      \n", - "
      \n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - "\n", - " \n", - " 64100\n", - " 1\n", - "\n", - "
    • Recorder
      (cast)
      |S100
      dask.array<chunksize=(64100,), meta=np.ndarray>
      long_name :
      Recorder
      units_wod :
      WMO code 4770
      comment :
      Device which recorded measurements
      \n", - " \n", - " \n", - " \n", - " \n", - "
      \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
      Array Chunk
      Bytes 6.11 MiB 6.11 MiB
      Shape (64100,) (64100,)
      Dask graph 1 chunks in 2 graph layers
      Data type |S100 numpy.ndarray
      \n", - "
      \n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - "\n", - " \n", - " 64100\n", - " 1\n", - "\n", - "
    • dbase_orig
      (cast)
      |S100
      dask.array<chunksize=(64100,), meta=np.ndarray>
      long_name :
      database_origin
      comment :
      Database from which data were extracted
      \n", - " \n", - " \n", - " \n", - " \n", - "
      \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
      Array Chunk
      Bytes 6.11 MiB 6.11 MiB
      Shape (64100,) (64100,)
      Dask graph 1 chunks in 2 graph layers
      Data type |S100 numpy.ndarray
      \n", - "
      \n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - "\n", - " \n", - " 64100\n", - " 1\n", - "\n", - "
    • Platform
      (cast)
      |S100
      dask.array<chunksize=(64100,), meta=np.ndarray>
      long_name :
      Platform_name
      comment :
      name of platform from which measurements were taken
      \n", - " \n", - " \n", - " \n", - " \n", - "
      \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
      Array Chunk
      Bytes 6.11 MiB 6.11 MiB
      Shape (64100,) (64100,)
      Dask graph 1 chunks in 2 graph layers
      Data type |S100 numpy.ndarray
      \n", - "
      \n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - "\n", - " \n", - " 64100\n", - " 1\n", - "\n", - "
    • Project
      (cast)
      |S100
      dask.array<chunksize=(64100,), meta=np.ndarray>
      long_name :
      Project_name
      comment :
      name or acronym of project under which data were measured
      \n", - " \n", - " \n", - " \n", - " \n", - "
      \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
      Array Chunk
      Bytes 6.11 MiB 6.11 MiB
      Shape (64100,) (64100,)
      Dask graph 1 chunks in 2 graph layers
      Data type |S100 numpy.ndarray
      \n", - "
      \n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - "\n", - " \n", - " 64100\n", - " 1\n", - "\n", - "
    • WOD_cruise_identifier
      (cast)
      |S100
      dask.array<chunksize=(64100,), meta=np.ndarray>
      comment :
      two byte country code + WOD cruise number (unique to country code)
      long_name :
      WOD_cruise_identifier
      \n", - " \n", - " \n", - " \n", - " \n", - "
      \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
      Array Chunk
      Bytes 6.11 MiB 6.11 MiB
      Shape (64100,) (64100,)
      Dask graph 1 chunks in 2 graph layers
      Data type |S100 numpy.ndarray
      \n", - "
      \n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - "\n", - " \n", - " 64100\n", - " 1\n", - "\n", - "
    • Institute
      (cast)
      |S100
      dask.array<chunksize=(64100,), meta=np.ndarray>
      long_name :
      Responsible_institute
      comment :
      name of institute which collected data
      \n", - " \n", - " \n", - " \n", - " \n", - "
      \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
      Array Chunk
      Bytes 6.11 MiB 6.11 MiB
      Shape (64100,) (64100,)
      Dask graph 1 chunks in 2 graph layers
      Data type |S100 numpy.ndarray
      \n", - "
      \n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - "\n", - " \n", - " 64100\n", - " 1\n", - "\n", - "
    • Ocean_Vehicle
      (cast)
      |S100
      dask.array<chunksize=(64100,), meta=np.ndarray>
      comment :
      Ocean_vehicle
      \n", - " \n", - " \n", - " \n", - " \n", - "
      \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
      Array Chunk
      Bytes 6.11 MiB 6.11 MiB
      Shape (64100,) (64100,)
      Dask graph 1 chunks in 2 graph layers
      Data type |S100 numpy.ndarray
      \n", - "
      \n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - "\n", - " \n", - " 64100\n", - " 1\n", - "\n", - "
    • Temperature_Instrument
      (cast)
      |S100
      dask.array<chunksize=(64100,), meta=np.ndarray>
      long_name :
      Instrument
      comment :
      Device used for measurement
      \n", - " \n", - " \n", - " \n", - " \n", - "
      \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
      Array Chunk
      Bytes 6.11 MiB 6.11 MiB
      Shape (64100,) (64100,)
      Dask graph 1 chunks in 2 graph layers
      Data type |S100 numpy.ndarray
      \n", - "
      \n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - "\n", - " \n", - " 64100\n", - " 1\n", - "\n", - "
    • Oxygen
      (cast, z_index)
      float32
      dask.array<chunksize=(46280, 725), meta=np.ndarray>
      long_name :
      volume_fraction_of_oxygen_in_sea_water
      standard_name :
      volume_fraction_of_oxygen_in_sea_water
      units :
      umol/kg
      grid_mapping :
      crs
      ancillary_variables :
      Oxygen_sigfigs Oxygen_WODflag Oxygen_WODprofileflag Oxygen_origflag
      \n", - " \n", - " \n", - " \n", - " \n", - "
      \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
      Array Chunk
      Bytes 245.74 MiB 127.99 MiB
      Shape (64100, 1005) (46280, 725)
      Dask graph 4 chunks in 2 graph layers
      Data type float32 numpy.ndarray
      \n", - "
      \n", - " \n", - "\n", - " \n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - "\n", - " \n", - " 1005\n", - " 64100\n", - "\n", - "
    • Oxygen_WODflag
      (cast, z_index)
      float64
      dask.array<chunksize=(32766, 512), meta=np.ndarray>
      flag_meanings :
      accepted range_out inversion gradient anomaly gradient+inversion range+inversion range+gradient range+anomaly range+inversion+gradient
      standard_name :
      volume_fraction_of_oxygen_in_sea_water status_flag
      long_name :
      WOD_observation_flag
      flag_values :
      [0 1 2 3 4 5 6 7 8 9]
      \n", - " \n", - " \n", - " \n", - " \n", - "
      \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
      Array Chunk
      Bytes 491.49 MiB 127.99 MiB
      Shape (64100, 1005) (32766, 512)
      Dask graph 4 chunks in 2 graph layers
      Data type float64 numpy.ndarray
      \n", - "
      \n", - " \n", - "\n", - " \n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - "\n", - " \n", - " 1005\n", - " 64100\n", - "\n", - "
    • Oxygen_origflag
      (cast, z_index)
      float32
      dask.array<chunksize=(46280, 725), meta=np.ndarray>
      flag_definitions :
      flag definitions dependent on origflagset
      standard_name :
      volume_fraction_of_oxygen_in_sea_water status_flag
      \n", - " \n", - " \n", - " \n", - " \n", - "
      \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
      Array Chunk
      Bytes 245.74 MiB 127.99 MiB
      Shape (64100, 1005) (46280, 725)
      Dask graph 4 chunks in 2 graph layers
      Data type float32 numpy.ndarray
      \n", - "
      \n", - " \n", - "\n", - " \n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - "\n", - " \n", - " 1005\n", - " 64100\n", - "\n", - "
    • Temperature
      (cast, z_index)
      float32
      dask.array<chunksize=(46280, 725), meta=np.ndarray>
      long_name :
      sea_water_temperature
      standard_name :
      sea_water_temperature
      units :
      degree_C
      grid_mapping :
      crs
      ancillary_variables :
      Temperature_sigfigs Temperature_WODflag Temperature_WODprofileflag Temperature_origflag
      \n", - " \n", - " \n", - " \n", - " \n", - "
      \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
      Array Chunk
      Bytes 245.74 MiB 127.99 MiB
      Shape (64100, 1005) (46280, 725)
      Dask graph 4 chunks in 2 graph layers
      Data type float32 numpy.ndarray
      \n", - "
      \n", - " \n", - "\n", - " \n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - "\n", - " \n", - " 1005\n", - " 64100\n", - "\n", - "
    • Temperature_WODflag
      (cast, z_index)
      float64
      dask.array<chunksize=(32766, 512), meta=np.ndarray>
      long_name :
      WOD_observation_flag
      flag_values :
      [0 1 2 3 4 5 6 7 8 9]
      flag_meanings :
      accepted range_out inversion gradient anomaly gradient+inversion range+inversion range+gradient range+anomaly range+inversion+gradient
      standard_name :
      sea_water_temperature status_flag
      \n", - " \n", - " \n", - " \n", - " \n", - "
      \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
      Array Chunk
      Bytes 491.49 MiB 127.99 MiB
      Shape (64100, 1005) (32766, 512)
      Dask graph 4 chunks in 2 graph layers
      Data type float64 numpy.ndarray
      \n", - "
      \n", - " \n", - "\n", - " \n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - "\n", - " \n", - " 1005\n", - " 64100\n", - "\n", - "
    • Temperature_origflag
      (cast, z_index)
      float32
      dask.array<chunksize=(46280, 725), meta=np.ndarray>
      flag_definitions :
      flag definitions dependent on origflagset
      standard_name :
      sea_water_temperature status_flag
      \n", - " \n", - " \n", - " \n", - " \n", - "
      \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
      Array Chunk
      Bytes 245.74 MiB 127.99 MiB
      Shape (64100, 1005) (46280, 725)
      Dask graph 4 chunks in 2 graph layers
      Data type float32 numpy.ndarray
      \n", - "
      \n", - " \n", - "\n", - " \n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - "\n", - " \n", - " 1005\n", - " 64100\n", - "\n", - "
    • Salinity
      (cast, z_index)
      float32
      dask.array<chunksize=(46280, 725), meta=np.ndarray>
      long_name :
      sea_water_salinity
      standard_name :
      sea_water_salinity
      grid_mapping :
      crs
      ancillary_variables :
      Salinity_sigfigs Salinity_WODflag Salinity_WODprofileflag Salinity_origflag
      \n", - " \n", - " \n", - " \n", - " \n", - "
      \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
      Array Chunk
      Bytes 245.74 MiB 127.99 MiB
      Shape (64100, 1005) (46280, 725)
      Dask graph 4 chunks in 2 graph layers
      Data type float32 numpy.ndarray
      \n", - "
      \n", - " \n", - "\n", - " \n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - "\n", - " \n", - " 1005\n", - " 64100\n", - "\n", - "
    • Salinity_WODflag
      (cast, z_index)
      float64
      dask.array<chunksize=(32766, 512), meta=np.ndarray>
      long_name :
      WOD_observation_flag
      flag_values :
      [0 1 2 3 4 5 6 7 8 9]
      flag_meanings :
      accepted range_out inversion gradient anomaly gradient+inversion range+inversion range+gradient range+anomaly range+inversion+gradient
      standard_name :
      sea_water_salinity status_flag
      \n", - " \n", - " \n", - " \n", - " \n", - "
      \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
      Array Chunk
      Bytes 491.49 MiB 127.99 MiB
      Shape (64100, 1005) (32766, 512)
      Dask graph 4 chunks in 2 graph layers
      Data type float64 numpy.ndarray
      \n", - "
      \n", - " \n", - "\n", - " \n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - "\n", - " \n", - " 1005\n", - " 64100\n", - "\n", - "
    • Salinity_origflag
      (cast, z_index)
      float32
      dask.array<chunksize=(46280, 725), meta=np.ndarray>
      flag_definitions :
      flag definitions dependent on origflagset
      standard_name :
      sea_water_salinity status_flag
      \n", - " \n", - " \n", - " \n", - " \n", - "
      \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
      Array Chunk
      Bytes 245.74 MiB 127.99 MiB
      Shape (64100, 1005) (46280, 725)
      Dask graph 4 chunks in 2 graph layers
      Data type float32 numpy.ndarray
      \n", - "
      \n", - " \n", - "\n", - " \n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - "\n", - " \n", - " 1005\n", - " 64100\n", - "\n", - "
    • Chlorophyll
      (cast, z_index)
      float32
      dask.array<chunksize=(46280, 725), meta=np.ndarray>
      long_name :
      mass_concentration_of_chlorophyll_in_sea_water
      standard_name :
      mass_concentration_of_chlorophyll_in_sea_water
      units :
      ugram/l
      grid_mapping :
      crs
      ancillary_variables :
      Chlorophyll_sigfigs Chlorophyll_WODflag Chlorophyll_WODprofileflag Chlorophyll_origflag
      \n", - " \n", - " \n", - " \n", - " \n", - "
      \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
      Array Chunk
      Bytes 245.74 MiB 127.99 MiB
      Shape (64100, 1005) (46280, 725)
      Dask graph 4 chunks in 2 graph layers
      Data type float32 numpy.ndarray
      \n", - "
      \n", - " \n", - "\n", - " \n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - "\n", - " \n", - " 1005\n", - " 64100\n", - "\n", - "
    • Chlorophyll_WODflag
      (cast, z_index)
      float64
      dask.array<chunksize=(32766, 512), meta=np.ndarray>
      long_name :
      WOD_observation_flag
      flag_values :
      [0 1 2 3 4 5 6 7 8 9]
      flag_meanings :
      accepted range_out inversion gradient anomaly gradient+inversion range+inversion range+gradient range+anomaly range+inversion+gradient
      standard_name :
      mass_concentration_of_chlorophyll_in_sea_water status_flag
      \n", - " \n", - " \n", - " \n", - " \n", - "
      \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
      Array Chunk
      Bytes 491.49 MiB 127.99 MiB
      Shape (64100, 1005) (32766, 512)
      Dask graph 4 chunks in 2 graph layers
      Data type float64 numpy.ndarray
      \n", - "
      \n", - " \n", - "\n", - " \n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - "\n", - " \n", - " 1005\n", - " 64100\n", - "\n", - "
    • Chlorophyll_origflag
      (cast, z_index)
      float32
      dask.array<chunksize=(46280, 725), meta=np.ndarray>
      flag_definitions :
      flag definitions dependent on origflagset
      standard_name :
      mass_concentration_of_chlorophyll_in_sea_water status_flag
      \n", - " \n", - " \n", - " \n", - " \n", - "
      \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
      Array Chunk
      Bytes 245.74 MiB 127.99 MiB
      Shape (64100, 1005) (46280, 725)
      Dask graph 4 chunks in 2 graph layers
      Data type float32 numpy.ndarray
      \n", - "
      \n", - " \n", - "\n", - " \n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - "\n", - " \n", - " 1005\n", - " 64100\n", - "\n", - "
    • z_WODflag
      (cast, z_index)
      float64
      dask.array<chunksize=(32766, 512), meta=np.ndarray>
      long_name :
      WOD_depth_level_flag
      flag_values :
      [0 1 2]
      flag_meanings :
      accepted duplicate_or_inversion density_inversion
      standard_name :
      depth status_flag
      \n", - " \n", - " \n", - " \n", - " \n", - "
      \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
      Array Chunk
      Bytes 491.49 MiB 127.99 MiB
      Shape (64100, 1005) (32766, 512)
      Dask graph 4 chunks in 2 graph layers
      Data type float64 numpy.ndarray
      \n", - "
      \n", - " \n", - "\n", - " \n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - "\n", - " \n", - " 1005\n", - " 64100\n", - "\n", - "
    • z_origflag
      (cast, z_index)
      float32
      dask.array<chunksize=(46280, 725), meta=np.ndarray>
      standard_name :
      depth status_flag
      comment :
      Originator flags are dependent on origflagset
      \n", - " \n", - " \n", - " \n", - " \n", - "
      \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
      Array Chunk
      Bytes 245.74 MiB 127.99 MiB
      Shape (64100, 1005) (46280, 725)
      Dask graph 4 chunks in 2 graph layers
      Data type float32 numpy.ndarray
      \n", - "
      \n", - " \n", - "\n", - " \n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - "\n", - " \n", - " 1005\n", - " 64100\n", - "\n", - "
    • crs
      ()
      int32
      ...
      grid_mapping_name :
      latitude_longitude
      epsg_code :
      EPSG:4326
      longitude_of_prime_meridian :
      0.0
      semi_major_axis :
      6378137.0
      inverse_flattening :
      298.25723
      [1 values with dtype=int32]
    • CODA_id
      (cast)
      |S20
      dask.array<chunksize=(64100,), meta=np.ndarray>
      Comment :
      Unique CODA identifier with format <Source Dataset><Obs Platform><Counter for date><YYYYMMDD>
      \n", - " \n", - " \n", - " \n", - " \n", - "
      \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
      Array Chunk
      Bytes 1.22 MiB 1.22 MiB
      Shape (64100,) (64100,)
      Dask graph 1 chunks in 2 graph layers
      Data type |S20 numpy.ndarray
      \n", - "
      \n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - "\n", - " \n", - " 64100\n", - " 1\n", - "\n", - "
    • cast
      PandasIndex
      PandasIndex(Index([    1,     2,     3,     4,     5,     6,     7,     8,     9,    10,\n",
      -       "       ...\n",
      -       "       64091, 64092, 64093, 64094, 64095, 64096, 64097, 64098, 64099, 64100],\n",
      -       "      dtype='int64', name='cast', length=64100))
    • z_index
      PandasIndex
      PandasIndex(Index([   0,    1,    2,    3,    4,    5,    6,    7,    8,    9,\n",
      -       "       ...\n",
      -       "        995,  996,  997,  998,  999, 1000, 1001, 1002, 1003, 1004],\n",
      -       "      dtype='int64', name='z_index', length=1005))
  • Parent ragged array file :
    wod_pfl_2005.nc
  • " - ], - "text/plain": [ - " Size: 5GB\n", - "Dimensions: (cast: 64100, z_index: 1005)\n", - "Coordinates:\n", - " * cast (cast) int64 513kB 1 2 3 4 ... 64098 64099 64100\n", - " time (cast) datetime64[ns] 513kB dask.array\n", - " lat (cast) float32 256kB dask.array\n", - " lon (cast) float32 256kB dask.array\n", - " * z_index (z_index) int64 8kB 0 1 2 3 ... 1001 1002 1003 1004\n", - " z (cast, z_index) float32 258MB dask.array\n", - "Data variables: (12/29)\n", - " WOD_id (cast) int32 256kB dask.array\n", - " origflagset (cast) |S100 6MB dask.array\n", - " country (cast) |S100 6MB dask.array\n", - " dataset (cast) |S100 6MB dask.array\n", - " Access_no (cast) float64 513kB dask.array\n", - " Recorder (cast) |S100 6MB dask.array\n", - " ... ...\n", - " Chlorophyll_WODflag (cast, z_index) float64 515MB dask.array\n", - " Chlorophyll_origflag (cast, z_index) float32 258MB dask.array\n", - " z_WODflag (cast, z_index) float64 515MB dask.array\n", - " z_origflag (cast, z_index) float32 258MB dask.array\n", - " crs int32 4B ...\n", - " CODA_id (cast) |S20 1MB dask.array\n", - "Attributes:\n", - " Parent ragged array file: wod_pfl_2005.nc" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ds" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "ea1b275b-38a5-4ea8-956f-c95945ef690d", - "metadata": {}, - "outputs": [], - "source": [ - "ds = ds.set_coords(['WOD_id',\n", - " 'origflagset',\n", - " 'country',\n", - " 'dataset',\n", - " 'Access_no',\n", - " 'dbase_orig',\n", - " 'Project',\n", - " 'WOD_cruise_identifier',\n", - " 'Institute',\n", - " 'Ocean_Vehicle',\n", - " 'Temperature_Instrument',\n", - " 'CODA_id'])" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "31a7877e-62bf-4a99-88bd-b34f864d0e01", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
    \n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
    <xarray.Dataset> Size: 5GB\n",
    -       "Dimensions:                 (cast: 64100, z_index: 1005)\n",
    -       "Coordinates: (12/18)\n",
    -       "  * cast                    (cast) int64 513kB 1 2 3 4 ... 64098 64099 64100\n",
    -       "    WOD_id                  (cast) int32 256kB dask.array<chunksize=(64100,), meta=np.ndarray>\n",
    -       "    time                    (cast) datetime64[ns] 513kB dask.array<chunksize=(64100,), meta=np.ndarray>\n",
    -       "    lat                     (cast) float32 256kB dask.array<chunksize=(64100,), meta=np.ndarray>\n",
    -       "    lon                     (cast) float32 256kB dask.array<chunksize=(64100,), meta=np.ndarray>\n",
    -       "    origflagset             (cast) |S100 6MB dask.array<chunksize=(64100,), meta=np.ndarray>\n",
    -       "    ...                      ...\n",
    -       "    Institute               (cast) |S100 6MB dask.array<chunksize=(64100,), meta=np.ndarray>\n",
    -       "    Ocean_Vehicle           (cast) |S100 6MB dask.array<chunksize=(64100,), meta=np.ndarray>\n",
    -       "    Temperature_Instrument  (cast) |S100 6MB dask.array<chunksize=(64100,), meta=np.ndarray>\n",
    -       "  * z_index                 (z_index) int64 8kB 0 1 2 3 ... 1001 1002 1003 1004\n",
    -       "    z                       (cast, z_index) float32 258MB dask.array<chunksize=(46280, 725), meta=np.ndarray>\n",
    -       "    CODA_id                 (cast) |S20 1MB dask.array<chunksize=(64100,), meta=np.ndarray>\n",
    -       "Data variables: (12/17)\n",
    -       "    Recorder                (cast) |S100 6MB dask.array<chunksize=(64100,), meta=np.ndarray>\n",
    -       "    Platform                (cast) |S100 6MB dask.array<chunksize=(64100,), meta=np.ndarray>\n",
    -       "    Oxygen                  (cast, z_index) float32 258MB dask.array<chunksize=(46280, 725), meta=np.ndarray>\n",
    -       "    Oxygen_WODflag          (cast, z_index) float64 515MB dask.array<chunksize=(32766, 512), meta=np.ndarray>\n",
    -       "    Oxygen_origflag         (cast, z_index) float32 258MB dask.array<chunksize=(46280, 725), meta=np.ndarray>\n",
    -       "    Temperature             (cast, z_index) float32 258MB dask.array<chunksize=(46280, 725), meta=np.ndarray>\n",
    -       "    ...                      ...\n",
    -       "    Chlorophyll             (cast, z_index) float32 258MB dask.array<chunksize=(46280, 725), meta=np.ndarray>\n",
    -       "    Chlorophyll_WODflag     (cast, z_index) float64 515MB dask.array<chunksize=(32766, 512), meta=np.ndarray>\n",
    -       "    Chlorophyll_origflag    (cast, z_index) float32 258MB dask.array<chunksize=(46280, 725), meta=np.ndarray>\n",
    -       "    z_WODflag               (cast, z_index) float64 515MB dask.array<chunksize=(32766, 512), meta=np.ndarray>\n",
    -       "    z_origflag              (cast, z_index) float32 258MB dask.array<chunksize=(46280, 725), meta=np.ndarray>\n",
    -       "    crs                     int32 4B ...\n",
    -       "Attributes:\n",
    -       "    Parent ragged array file:  wod_pfl_2005.nc
    " - ], - "text/plain": [ - " Size: 5GB\n", - "Dimensions: (cast: 64100, z_index: 1005)\n", - "Coordinates: (12/18)\n", - " * cast (cast) int64 513kB 1 2 3 4 ... 64098 64099 64100\n", - " WOD_id (cast) int32 256kB dask.array\n", - " time (cast) datetime64[ns] 513kB dask.array\n", - " lat (cast) float32 256kB dask.array\n", - " lon (cast) float32 256kB dask.array\n", - " origflagset (cast) |S100 6MB dask.array\n", - " ... ...\n", - " Institute (cast) |S100 6MB dask.array\n", - " Ocean_Vehicle (cast) |S100 6MB dask.array\n", - " Temperature_Instrument (cast) |S100 6MB dask.array\n", - " * z_index (z_index) int64 8kB 0 1 2 3 ... 1001 1002 1003 1004\n", - " z (cast, z_index) float32 258MB dask.array\n", - " CODA_id (cast) |S20 1MB dask.array\n", - "Data variables: (12/17)\n", - " Recorder (cast) |S100 6MB dask.array\n", - " Platform (cast) |S100 6MB dask.array\n", - " Oxygen (cast, z_index) float32 258MB dask.array\n", - " Oxygen_WODflag (cast, z_index) float64 515MB dask.array\n", - " Oxygen_origflag (cast, z_index) float32 258MB dask.array\n", - " Temperature (cast, z_index) float32 258MB dask.array\n", - " ... ...\n", - " Chlorophyll (cast, z_index) float32 258MB dask.array\n", - " Chlorophyll_WODflag (cast, z_index) float64 515MB dask.array\n", - " Chlorophyll_origflag (cast, z_index) float32 258MB dask.array\n", - " z_WODflag (cast, z_index) float64 515MB dask.array\n", - " z_origflag (cast, z_index) float32 258MB dask.array\n", - " crs int32 4B ...\n", - "Attributes:\n", - " Parent ragged array file: wod_pfl_2005.nc" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ds" - ] - }, - { - "cell_type": "markdown", - "id": "879327f9-4242-45fa-bcf5-760b36a4483b", - "metadata": {}, - "source": [ - "# make truncated version of ds" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "0e1b6158-80b6-4475-a2bb-4d2c2ebc4ec6", - "metadata": {}, - "outputs": [], - "source": [ - "ds_truncated = ds#.isel(cast=slice(0,10000))" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "9be97945-cd32-4b18-8a05-09143517a5c5", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "CPU times: user 1min 48s, sys: 52.2 s, total: 2min 41s\n", - "Wall time: 2min 43s\n" - ] - }, - { - "data": { - "text/html": [ - "
    \n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
    castz_indexTemperatureWOD_idtimelatlonorigflagsetcountrydatasetAccess_nodbase_origProjectWOD_cruise_identifierInstituteOcean_VehicleTemperature_InstrumentzCODA_id
    01027.816999104055612005-01-01 00:42:11.160091648-13.64100069.829002b'ARGO profiling floats'b'JAPAN'b'profiling float'42682.0b'US GODAE server (Argo)'b'J-ARGO (JAPAN ARGO)'b'JP031068'b'JAPAN AGENCY FOR MARINE-EARTH SCIENCE AND TE...b'APEX (Autonomous Profiling Explorer, Webb Re...b'CTD: TYPE UNKNOWN'4.473917b'WODpfl200501010001'
    11127.745001104055612005-01-01 00:42:11.160091648-13.64100069.829002b'ARGO profiling floats'b'JAPAN'b'profiling float'42682.0b'US GODAE server (Argo)'b'J-ARGO (JAPAN ARGO)'b'JP031068'b'JAPAN AGENCY FOR MARINE-EARTH SCIENCE AND TE...b'APEX (Autonomous Profiling Explorer, Webb Re...b'CTD: TYPE UNKNOWN'9.444819b'WODpfl200501010001'
    21227.408001104055612005-01-01 00:42:11.160091648-13.64100069.829002b'ARGO profiling floats'b'JAPAN'b'profiling float'42682.0b'US GODAE server (Argo)'b'J-ARGO (JAPAN ARGO)'b'JP031068'b'JAPAN AGENCY FOR MARINE-EARTH SCIENCE AND TE...b'APEX (Autonomous Profiling Explorer, Webb Re...b'CTD: TYPE UNKNOWN'13.918531b'WODpfl200501010001'
    31327.285999104055612005-01-01 00:42:11.160091648-13.64100069.829002b'ARGO profiling floats'b'JAPAN'b'profiling float'42682.0b'US GODAE server (Argo)'b'J-ARGO (JAPAN ARGO)'b'JP031068'b'JAPAN AGENCY FOR MARINE-EARTH SCIENCE AND TE...b'APEX (Autonomous Profiling Explorer, Webb Re...b'CTD: TYPE UNKNOWN'18.889204b'WODpfl200501010001'
    41427.233000104055612005-01-01 00:42:11.160091648-13.64100069.829002b'ARGO profiling floats'b'JAPAN'b'profiling float'42682.0b'US GODAE server (Argo)'b'J-ARGO (JAPAN ARGO)'b'JP031068'b'JAPAN AGENCY FOR MARINE-EARTH SCIENCE AND TE...b'APEX (Autonomous Profiling Explorer, Webb Re...b'CTD: TYPE UNKNOWN'24.257399b'WODpfl200501010001'
    ............................................................
    64420495641001000NaN118987752005-12-31 23:00:57.24563558439.419998132.580002b'ARGO profiling floats'b'KOREA; REPUBLIC OF'b'profiling float'42682.0b'US GODAE server (Argo)'b''b'KR009692'b'KOREAN OCEAN RESEARCH AND DEVELOPMENT INSTIT...b'APEX (Autonomous Profiling Explorer, Webb Re...b'CTD: TYPE UNKNOWN'NaNb'WODpfl200512310207'
    64420496641001001NaN118987752005-12-31 23:00:57.24563558439.419998132.580002b'ARGO profiling floats'b'KOREA; REPUBLIC OF'b'profiling float'42682.0b'US GODAE server (Argo)'b''b'KR009692'b'KOREAN OCEAN RESEARCH AND DEVELOPMENT INSTIT...b'APEX (Autonomous Profiling Explorer, Webb Re...b'CTD: TYPE UNKNOWN'NaNb'WODpfl200512310207'
    64420497641001002NaN118987752005-12-31 23:00:57.24563558439.419998132.580002b'ARGO profiling floats'b'KOREA; REPUBLIC OF'b'profiling float'42682.0b'US GODAE server (Argo)'b''b'KR009692'b'KOREAN OCEAN RESEARCH AND DEVELOPMENT INSTIT...b'APEX (Autonomous Profiling Explorer, Webb Re...b'CTD: TYPE UNKNOWN'NaNb'WODpfl200512310207'
    64420498641001003NaN118987752005-12-31 23:00:57.24563558439.419998132.580002b'ARGO profiling floats'b'KOREA; REPUBLIC OF'b'profiling float'42682.0b'US GODAE server (Argo)'b''b'KR009692'b'KOREAN OCEAN RESEARCH AND DEVELOPMENT INSTIT...b'APEX (Autonomous Profiling Explorer, Webb Re...b'CTD: TYPE UNKNOWN'NaNb'WODpfl200512310207'
    64420499641001004NaN118987752005-12-31 23:00:57.24563558439.419998132.580002b'ARGO profiling floats'b'KOREA; REPUBLIC OF'b'profiling float'42682.0b'US GODAE server (Argo)'b''b'KR009692'b'KOREAN OCEAN RESEARCH AND DEVELOPMENT INSTIT...b'APEX (Autonomous Profiling Explorer, Webb Re...b'CTD: TYPE UNKNOWN'NaNb'WODpfl200512310207'
    \n", - "

    64420500 rows × 19 columns

    \n", - "
    " - ], - "text/plain": [ - " cast z_index Temperature WOD_id time \\\n", - "0 1 0 27.816999 10405561 2005-01-01 00:42:11.160091648 \n", - "1 1 1 27.745001 10405561 2005-01-01 00:42:11.160091648 \n", - "2 1 2 27.408001 10405561 2005-01-01 00:42:11.160091648 \n", - "3 1 3 27.285999 10405561 2005-01-01 00:42:11.160091648 \n", - "4 1 4 27.233000 10405561 2005-01-01 00:42:11.160091648 \n", - "... ... ... ... ... ... \n", - "64420495 64100 1000 NaN 11898775 2005-12-31 23:00:57.245635584 \n", - "64420496 64100 1001 NaN 11898775 2005-12-31 23:00:57.245635584 \n", - "64420497 64100 1002 NaN 11898775 2005-12-31 23:00:57.245635584 \n", - "64420498 64100 1003 NaN 11898775 2005-12-31 23:00:57.245635584 \n", - "64420499 64100 1004 NaN 11898775 2005-12-31 23:00:57.245635584 \n", - "\n", - " lat lon origflagset \\\n", - "0 -13.641000 69.829002 b'ARGO profiling floats' \n", - "1 -13.641000 69.829002 b'ARGO profiling floats' \n", - "2 -13.641000 69.829002 b'ARGO profiling floats' \n", - "3 -13.641000 69.829002 b'ARGO profiling floats' \n", - "4 -13.641000 69.829002 b'ARGO profiling floats' \n", - "... ... ... ... \n", - "64420495 39.419998 132.580002 b'ARGO profiling floats' \n", - "64420496 39.419998 132.580002 b'ARGO profiling floats' \n", - "64420497 39.419998 132.580002 b'ARGO profiling floats' \n", - "64420498 39.419998 132.580002 b'ARGO profiling floats' \n", - "64420499 39.419998 132.580002 b'ARGO profiling floats' \n", - "\n", - " country dataset Access_no \\\n", - "0 b'JAPAN' b'profiling float' 42682.0 \n", - "1 b'JAPAN' b'profiling float' 42682.0 \n", - "2 b'JAPAN' b'profiling float' 42682.0 \n", - "3 b'JAPAN' b'profiling float' 42682.0 \n", - "4 b'JAPAN' b'profiling float' 42682.0 \n", - "... ... ... ... \n", - "64420495 b'KOREA; REPUBLIC OF' b'profiling float' 42682.0 \n", - "64420496 b'KOREA; REPUBLIC OF' b'profiling float' 42682.0 \n", - "64420497 b'KOREA; REPUBLIC OF' b'profiling float' 42682.0 \n", - "64420498 b'KOREA; REPUBLIC OF' b'profiling float' 42682.0 \n", - "64420499 b'KOREA; REPUBLIC OF' b'profiling float' 42682.0 \n", - "\n", - " dbase_orig Project \\\n", - "0 b'US GODAE server (Argo)' b'J-ARGO (JAPAN ARGO)' \n", - "1 b'US GODAE server (Argo)' b'J-ARGO (JAPAN ARGO)' \n", - "2 b'US GODAE server (Argo)' b'J-ARGO (JAPAN ARGO)' \n", - "3 b'US GODAE server (Argo)' b'J-ARGO (JAPAN ARGO)' \n", - "4 b'US GODAE server (Argo)' b'J-ARGO (JAPAN ARGO)' \n", - "... ... ... \n", - "64420495 b'US GODAE server (Argo)' b'' \n", - "64420496 b'US GODAE server (Argo)' b'' \n", - "64420497 b'US GODAE server (Argo)' b'' \n", - "64420498 b'US GODAE server (Argo)' b'' \n", - "64420499 b'US GODAE server (Argo)' b'' \n", - "\n", - " WOD_cruise_identifier \\\n", - "0 b'JP031068' \n", - "1 b'JP031068' \n", - "2 b'JP031068' \n", - "3 b'JP031068' \n", - "4 b'JP031068' \n", - "... ... \n", - "64420495 b'KR009692' \n", - "64420496 b'KR009692' \n", - "64420497 b'KR009692' \n", - "64420498 b'KR009692' \n", - "64420499 b'KR009692' \n", - "\n", - " Institute \\\n", - "0 b'JAPAN AGENCY FOR MARINE-EARTH SCIENCE AND TE... \n", - "1 b'JAPAN AGENCY FOR MARINE-EARTH SCIENCE AND TE... \n", - "2 b'JAPAN AGENCY FOR MARINE-EARTH SCIENCE AND TE... \n", - "3 b'JAPAN AGENCY FOR MARINE-EARTH SCIENCE AND TE... \n", - "4 b'JAPAN AGENCY FOR MARINE-EARTH SCIENCE AND TE... \n", - "... ... \n", - "64420495 b'KOREAN OCEAN RESEARCH AND DEVELOPMENT INSTIT... \n", - "64420496 b'KOREAN OCEAN RESEARCH AND DEVELOPMENT INSTIT... \n", - "64420497 b'KOREAN OCEAN RESEARCH AND DEVELOPMENT INSTIT... \n", - "64420498 b'KOREAN OCEAN RESEARCH AND DEVELOPMENT INSTIT... \n", - "64420499 b'KOREAN OCEAN RESEARCH AND DEVELOPMENT INSTIT... \n", - "\n", - " Ocean_Vehicle \\\n", - "0 b'APEX (Autonomous Profiling Explorer, Webb Re... \n", - "1 b'APEX (Autonomous Profiling Explorer, Webb Re... \n", - "2 b'APEX (Autonomous Profiling Explorer, Webb Re... \n", - "3 b'APEX (Autonomous Profiling Explorer, Webb Re... \n", - "4 b'APEX (Autonomous Profiling Explorer, Webb Re... \n", - "... ... \n", - "64420495 b'APEX (Autonomous Profiling Explorer, Webb Re... \n", - "64420496 b'APEX (Autonomous Profiling Explorer, Webb Re... \n", - "64420497 b'APEX (Autonomous Profiling Explorer, Webb Re... \n", - "64420498 b'APEX (Autonomous Profiling Explorer, Webb Re... \n", - "64420499 b'APEX (Autonomous Profiling Explorer, Webb Re... \n", - "\n", - " Temperature_Instrument z CODA_id \n", - "0 b'CTD: TYPE UNKNOWN' 4.473917 b'WODpfl200501010001' \n", - "1 b'CTD: TYPE UNKNOWN' 9.444819 b'WODpfl200501010001' \n", - "2 b'CTD: TYPE UNKNOWN' 13.918531 b'WODpfl200501010001' \n", - "3 b'CTD: TYPE UNKNOWN' 18.889204 b'WODpfl200501010001' \n", - "4 b'CTD: TYPE UNKNOWN' 24.257399 b'WODpfl200501010001' \n", - "... ... ... ... \n", - "64420495 b'CTD: TYPE UNKNOWN' NaN b'WODpfl200512310207' \n", - "64420496 b'CTD: TYPE UNKNOWN' NaN b'WODpfl200512310207' \n", - "64420497 b'CTD: TYPE UNKNOWN' NaN b'WODpfl200512310207' \n", - "64420498 b'CTD: TYPE UNKNOWN' NaN b'WODpfl200512310207' \n", - "64420499 b'CTD: TYPE UNKNOWN' NaN b'WODpfl200512310207' \n", - "\n", - "[64420500 rows x 19 columns]" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "%%time\n", - "df = ds_truncated[['Temperature']].to_dataframe().reset_index()\n", - "df" + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ea1b275b-38a5-4ea8-956f-c95945ef690d", + "metadata": {}, + "outputs": [], + "source": [ + "ds = ds.set_coords(['WOD_id',\n", + " 'origflagset',\n", + " 'country',\n", + " 'dataset',\n", + " 'Access_no',\n", + " 'dbase_orig',\n", + " 'Project',\n", + " 'WOD_cruise_identifier',\n", + " 'Institute',\n", + " 'Ocean_Vehicle',\n", + " 'Temperature_Instrument',\n", + " 'CODA_id'])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "31a7877e-62bf-4a99-88bd-b34f864d0e01", + "metadata": {}, + "outputs": [], + "source": [ + "ds" ] }, { "cell_type": "markdown", - "id": "ba82816c-c8fe-44ea-8439-6741bd24b0fe", + "id": "879327f9-4242-45fa-bcf5-760b36a4483b", "metadata": {}, "source": [ - "# convert to dask dataframe" + "# make truncated version of ds" ] }, { "cell_type": "code", - "execution_count": 12, - "id": "5fd92d63-a672-4625-9b8e-3a18dfcd423b", + "execution_count": null, + "id": "0e1b6158-80b6-4475-a2bb-4d2c2ebc4ec6", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "CPU times: user 3min 32s, sys: 30 s, total: 4min 2s\n", - "Wall time: 3min 53s\n" - ] - } - ], + "outputs": [], "source": [ - "%%time\n", - "ddf = dd.from_pandas(df, npartitions=500)" + "ds_truncated = ds#.isel(cast=slice(0,10000))" ] }, { "cell_type": "code", - "execution_count": 13, - "id": "b19f879a-5bc9-4ccd-89bc-9edf2487156f", + "execution_count": null, + "id": "9be97945-cd32-4b18-8a05-09143517a5c5", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
    Dask DataFrame Structure:
    \n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
    castz_indexTemperatureWOD_idtimelatlonorigflagsetcountrydatasetAccess_nodbase_origProjectWOD_cruise_identifierInstituteOcean_VehicleTemperature_InstrumentzCODA_id
    npartitions=500
    0int64int64float32int32datetime64[ns]float32float32stringstringstringfloat64stringstringstringstringstringstringfloat32string
    128841.........................................................
    ............................................................
    64291659.........................................................
    64420499.........................................................
    \n", - "
    Dask Name: frompandas, 1 expression
    " - ], - "text/plain": [ - "Dask DataFrame Structure:\n", - " cast z_index Temperature WOD_id time lat lon origflagset country dataset Access_no dbase_orig Project WOD_cruise_identifier Institute Ocean_Vehicle Temperature_Instrument z CODA_id\n", - "npartitions=500 \n", - "0 int64 int64 float32 int32 datetime64[ns] float32 float32 string string string float64 string string string string string string float32 string\n", - "128841 ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...\n", - "... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...\n", - "64291659 ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...\n", - "64420499 ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...\n", - "Dask Name: frompandas, 1 expression\n", - "Expr=df" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], + "source": [ + "%%time\n", + "df = ds_truncated[['Temperature']].to_dataframe().reset_index()\n", + "df" + ] + }, + { + "cell_type": "markdown", + "id": "ba82816c-c8fe-44ea-8439-6741bd24b0fe", + "metadata": {}, + "source": [ + "# convert to dask dataframe" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5fd92d63-a672-4625-9b8e-3a18dfcd423b", + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "ddf = dd.from_pandas(df, npartitions=500)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b19f879a-5bc9-4ccd-89bc-9edf2487156f", + "metadata": {}, + "outputs": [], "source": [ "ddf" ] @@ -5790,20 +731,167 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": null, "id": "6355e01a-b0a9-47b8-8253-4a3a5364a551", "metadata": {}, + "outputs": [], + "source": [ + "ddf.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cba5d048-4ffc-4d8a-a006-92de19d87f10", + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "ddf.info(memory_usage='deep')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0e8524fb-2200-41aa-a79f-fabd680f5196", + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "ddf.to_parquet(write_path+\"2005_pfl_temp.pq\",engine='pyarrow', compression='snappy')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3b2fe17f-0a0f-43bb-abd0-c92b925779f5", + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "df.to_csv(write_path+\"2005_pfl_temp.csv\", index=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e62e8eb2-3203-4a55-be09-3f9b13fdff58", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "434a2c8b-45b2-421c-9b0f-a72ad734fd4c", + "metadata": {}, + "source": [ + "# load parquet data & filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "759f42a1-b38c-4f95-b74e-b89d48606521", + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "temp_pfl_ddf = dd.read_parquet(write_path+\"2005_pfl_temp.pq\")\n", + "# Search for rows where 'column_name' matches a condition\n", + "result_parquet = temp_pfl_ddf[temp_pfl_ddf ['z'] >= 2000].compute()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d2c3b6f4-5dfe-4620-8cda-d97dd5830060", + "metadata": {}, + "outputs": [], + "source": [ + "result_parquet" + ] + }, + { + "cell_type": "markdown", + "id": "6d14b366-3837-429c-a1fd-da2201a2377b", + "metadata": {}, + "source": [ + "# load CSV data and filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c7533621-26ca-4c30-a5e9-31245eb26819", + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "temp_pfl_csv_ddf = dd.read_csv(write_path+\"2005_pfl_temp.csv\", blocksize=25e6) # 25 MB chunks\n", + "# Search for rows where 'column_name' matches a condition\n", + "result_csv = temp_pfl_csv_ddf[temp_pfl_csv_ddf['z'] >= 2000].compute()" + ] + }, + { + "cell_type": "markdown", + "id": "e57d13c5-02f6-400d-bb5f-eaf75c2cd278", + "metadata": {}, + "source": [ + "# lazy load temperature" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "b3eee60a-3925-4993-8086-faa4453a73e6", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 50.3 ms, sys: 31 ms, total: 81.3 ms\n", + "Wall time: 76.3 ms\n" + ] + } + ], + "source": [ + "%%time\n", + "temp_pq_ddf = dd.read_parquet(write_path+\"2005_pfl_temp.pq\")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "b005387c-4967-4b0f-a1e1-b3ee1dfd79c8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 75.2 ms, sys: 9.09 ms, total: 84.3 ms\n", + "Wall time: 75.4 ms\n" + ] + } + ], + "source": [ + "%%time\n", + "temp_csv_ddf = dd.read_csv(write_path+\"2005_pfl_temp.csv\", blocksize=25e6) # 25 MB chunks" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "0499cc16-c9e3-40c5-810a-6b4d1c366a93", + "metadata": {}, "outputs": [ { - "name": "stderr", + "name": "stdout", "output_type": "stream", "text": [ - "/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/site-packages/distributed/client.py:3362: UserWarning: Sending large graph of size 40.94 MiB.\n", - "This may cause some slowdown.\n", - "Consider loading the data with Dask directly\n", - " or using futures or delayed objects to embed the data into the graph without repetition.\n", - "See also https://docs.dask.org/en/stable/best-practices.html#load-data-with-dask for more information.\n", - " warnings.warn(\n" + "CPU times: user 8.09 s, sys: 836 ms, total: 8.92 s\n", + "Wall time: 40.9 s\n" ] }, { @@ -5847,307 +935,387 @@ "
    zCODA_id
    1407115127.490104055762005-01-01 03:17:59.999827968-13.80981.542b''b'UNITED STATES'b'profiling float'1959.0b'GTSP Program'b'U.S. ARGO PROJECT'b'US028807'b'UNIVERSITY OF WASHINGTON; SEATTLE'b'APEX (Autonomous Profiling Explorer, Webb Re...b'CTD: Sea-Bird Electronics, MODEL UNKNOWN'3066.0000b'WODpfl200501010015'
    1643696714.567104056542005-01-01 14:31:51.96098252836.483-14.506b'ARGO profiling floats'b'SPAIN'b'profiling float'42682.0b'US GODAE server (Argo)'b''b'ES001259'b''b'APEX (Autonomous Profiling Explorer, Webb Re...b'CTD: TYPE UNKNOWN'2005.9418b'WODpfl200501010096'
    53666212552.949104057502005-01-02 08:00:57.238769664-26.886-26.415b'ARGO profiling floats'b'GREAT BRITAIN'b'profiling float'42682.0b'US GODAE server (Argo)'b'ARGO UK'b'GB011482'b''b'APEX (Autonomous Profiling Explorer, Webb Re...b'CTD: TYPE UNKNOWN'2003.7826b'WODpfl200501020070'
    26382653713.753104062042005-01-05 10:30:00.00000000044.233-9.640b'ARGO profiling floats'b'SPAIN'b'profiling float'42682.0b'US GODAE server (Argo)'b''b'ES001250'b''b'APEX (Autonomous Profiling Explorer, Webb Re...b'CTD: TYPE UNKNOWN'2004.0397b'WODpfl200501050062'
    1040939021.430104064862005-01-07 06:28:00.000113664-16.613-116.599b''b'UNITED STATES'b'profiling float'1970.0b'GTSP Program'b'U.S. ARGO PROJECT'b'US028886'b'UNIVERSITY OF WASHINGTON; SEATTLE'b'APEX (Autonomous Profiling Explorer, Webb Re...b'CTD: Sea-Bird Electronics, MODEL UNKNOWN'3081.0000b'WODpfl200501070050'
    ............................................................
    01027.816999104055612005-01-01 00:42:11.160091648-13.64169.829002ARGO profiling floatsJAPANprofiling float4535463133552.821106246232005-12-27 08:12:11.160735744-29.524-37.417b'ARGO profiling floats'b'GREAT BRITAIN'b'profiling float'42682.0US GODAE server (Argo)J-ARGO (JAPAN ARGO)JP031068JAPAN AGENCY FOR MARINE-EARTH SCIENCE AND TECH...APEX (Autonomous Profiling Explorer, Webb Rese...CTD: TYPE UNKNOWN4.473917WODpfl200501010001b'US GODAE server (Argo)'b'ARGO UK'b'GB011473'b''b'APEX (Autonomous Profiling Explorer, Webb Re...b'CTD: TYPE UNKNOWN'2004.0653b'WODpfl200512270056'
    11127.745001104055612005-01-01 00:42:11.160091648-13.64169.829002ARGO profiling floatsJAPANprofiling float7607363241913.430106247702005-12-28 00:50:38.03987456048.310-14.918b'ARGO profiling floats'b'FRANCE'b'profiling float'42682.0US GODAE server (Argo)J-ARGO (JAPAN ARGO)JP031068JAPAN AGENCY FOR MARINE-EARTH SCIENCE AND TECH...APEX (Autonomous Profiling Explorer, Webb Rese...CTD: TYPE UNKNOWN9.444819WODpfl200501010001b'US GODAE server (Argo)'b'CONGAS (CONTINENTAL GASCOGNE)'b'FR014222'b''b'PROVOR (free-drifting hydrographic profiler,...b'CTD: TYPE UNKNOWN'2018.9020b'WODpfl200512280005'
    21227.408001104055612005-01-01 00:42:11.160091648-13.64169.829002ARGO profiling floatsJAPANprofiling float66438633091223.516106248362005-12-28 07:04:41.1607357443.241-13.970b'ARGO profiling floats'b'FRANCE'b'profiling float'42682.0US GODAE server (Argo)J-ARGO (JAPAN ARGO)JP031068JAPAN AGENCY FOR MARINE-EARTH SCIENCE AND TECH...APEX (Autonomous Profiling Explorer, Webb Rese...CTD: TYPE UNKNOWN13.918531WODpfl200501010001b'US GODAE server (Argo)'b''b'FR014496'b''b'PROVOR (free-drifting hydrographic profiler,...b'CTD: TYPE UNKNOWN'2004.1403b'WODpfl200512280073'
    31327.285999104055612005-01-01 00:42:11.160091648-13.64169.829002ARGO profiling floatsJAPANprofiling float12471639571223.460106255302005-12-31 07:21:32.761230336-5.114-9.137b'ARGO profiling floats'b'FRANCE'b'profiling float'42682.0US GODAE server (Argo)J-ARGO (JAPAN ARGO)JP031068JAPAN AGENCY FOR MARINE-EARTH SCIENCE AND TECH...APEX (Autonomous Profiling Explorer, Webb Rese...CTD: TYPE UNKNOWN18.889204WODpfl200501010001b'US GODAE server (Argo)'b''b'FR014490'b''b'PROVOR (free-drifting hydrographic profiler,...b'CTD: TYPE UNKNOWN'2010.0000b'WODpfl200512310064'
    41427.233000104055612005-01-01 00:42:11.160091648-13.64169.829002ARGO profiling floatsJAPANprofiling float13471639581173.527106255312005-12-31 07:01:51.960982528-0.2166.174b'ARGO profiling floats'b'FRANCE'b'profiling float'42682.0US GODAE server (Argo)J-ARGO (JAPAN ARGO)JP031068JAPAN AGENCY FOR MARINE-EARTH SCIENCE AND TECH...APEX (Autonomous Profiling Explorer, Webb Rese...CTD: TYPE UNKNOWN24.257399WODpfl200501010001b'US GODAE server (Argo)'b''b'FR014501'b''b'PROVOR (free-drifting hydrographic profiler,...b'CTD: TYPE UNKNOWN'2010.0841b'WODpfl200512310065'
    \n", + "

    232 rows × 19 columns

    \n", "
    " ], "text/plain": [ - " cast z_index Temperature WOD_id time lat \\\n", - "0 1 0 27.816999 10405561 2005-01-01 00:42:11.160091648 -13.641 \n", - "1 1 1 27.745001 10405561 2005-01-01 00:42:11.160091648 -13.641 \n", - "2 1 2 27.408001 10405561 2005-01-01 00:42:11.160091648 -13.641 \n", - "3 1 3 27.285999 10405561 2005-01-01 00:42:11.160091648 -13.641 \n", - "4 1 4 27.233000 10405561 2005-01-01 00:42:11.160091648 -13.641 \n", - "\n", - " lon origflagset country dataset Access_no \\\n", - "0 69.829002 ARGO profiling floats JAPAN profiling float 42682.0 \n", - "1 69.829002 ARGO profiling floats JAPAN profiling float 42682.0 \n", - "2 69.829002 ARGO profiling floats JAPAN profiling float 42682.0 \n", - "3 69.829002 ARGO profiling floats JAPAN profiling float 42682.0 \n", - "4 69.829002 ARGO profiling floats JAPAN profiling float 42682.0 \n", + " cast z_index Temperature WOD_id time \\\n", + "14071 15 1 27.490 10405576 2005-01-01 03:17:59.999827968 \n", + "16436 96 71 4.567 10405654 2005-01-01 14:31:51.960982528 \n", + "53666 212 55 2.949 10405750 2005-01-02 08:00:57.238769664 \n", + "26382 653 71 3.753 10406204 2005-01-05 10:30:00.000000000 \n", + "1040 939 0 21.430 10406486 2005-01-07 06:28:00.000113664 \n", + "... ... ... ... ... ... \n", + "45354 63133 55 2.821 10624623 2005-12-27 08:12:11.160735744 \n", + "76073 63241 91 3.430 10624770 2005-12-28 00:50:38.039874560 \n", + "66438 63309 122 3.516 10624836 2005-12-28 07:04:41.160735744 \n", + "12471 63957 122 3.460 10625530 2005-12-31 07:21:32.761230336 \n", + "13471 63958 117 3.527 10625531 2005-12-31 07:01:51.960982528 \n", + "\n", + " lat lon origflagset country \\\n", + "14071 -13.809 81.542 b'' b'UNITED STATES' \n", + "16436 36.483 -14.506 b'ARGO profiling floats' b'SPAIN' \n", + "53666 -26.886 -26.415 b'ARGO profiling floats' b'GREAT BRITAIN' \n", + "26382 44.233 -9.640 b'ARGO profiling floats' b'SPAIN' \n", + "1040 -16.613 -116.599 b'' b'UNITED STATES' \n", + "... ... ... ... ... \n", + "45354 -29.524 -37.417 b'ARGO profiling floats' b'GREAT BRITAIN' \n", + "76073 48.310 -14.918 b'ARGO profiling floats' b'FRANCE' \n", + "66438 3.241 -13.970 b'ARGO profiling floats' b'FRANCE' \n", + "12471 -5.114 -9.137 b'ARGO profiling floats' b'FRANCE' \n", + "13471 -0.216 6.174 b'ARGO profiling floats' b'FRANCE' \n", + "\n", + " dataset Access_no dbase_orig \\\n", + "14071 b'profiling float' 1959.0 b'GTSP Program' \n", + "16436 b'profiling float' 42682.0 b'US GODAE server (Argo)' \n", + "53666 b'profiling float' 42682.0 b'US GODAE server (Argo)' \n", + "26382 b'profiling float' 42682.0 b'US GODAE server (Argo)' \n", + "1040 b'profiling float' 1970.0 b'GTSP Program' \n", + "... ... ... ... \n", + "45354 b'profiling float' 42682.0 b'US GODAE server (Argo)' \n", + "76073 b'profiling float' 42682.0 b'US GODAE server (Argo)' \n", + "66438 b'profiling float' 42682.0 b'US GODAE server (Argo)' \n", + "12471 b'profiling float' 42682.0 b'US GODAE server (Argo)' \n", + "13471 b'profiling float' 42682.0 b'US GODAE server (Argo)' \n", "\n", - " dbase_orig Project WOD_cruise_identifier \\\n", - "0 US GODAE server (Argo) J-ARGO (JAPAN ARGO) JP031068 \n", - "1 US GODAE server (Argo) J-ARGO (JAPAN ARGO) JP031068 \n", - "2 US GODAE server (Argo) J-ARGO (JAPAN ARGO) JP031068 \n", - "3 US GODAE server (Argo) J-ARGO (JAPAN ARGO) JP031068 \n", - "4 US GODAE server (Argo) J-ARGO (JAPAN ARGO) JP031068 \n", + " Project WOD_cruise_identifier \\\n", + "14071 b'U.S. ARGO PROJECT' b'US028807' \n", + "16436 b'' b'ES001259' \n", + "53666 b'ARGO UK' b'GB011482' \n", + "26382 b'' b'ES001250' \n", + "1040 b'U.S. ARGO PROJECT' b'US028886' \n", + "... ... ... \n", + "45354 b'ARGO UK' b'GB011473' \n", + "76073 b'CONGAS (CONTINENTAL GASCOGNE)' b'FR014222' \n", + "66438 b'' b'FR014496' \n", + "12471 b'' b'FR014490' \n", + "13471 b'' b'FR014501' \n", "\n", - " Institute \\\n", - "0 JAPAN AGENCY FOR MARINE-EARTH SCIENCE AND TECH... \n", - "1 JAPAN AGENCY FOR MARINE-EARTH SCIENCE AND TECH... \n", - "2 JAPAN AGENCY FOR MARINE-EARTH SCIENCE AND TECH... \n", - "3 JAPAN AGENCY FOR MARINE-EARTH SCIENCE AND TECH... \n", - "4 JAPAN AGENCY FOR MARINE-EARTH SCIENCE AND TECH... \n", + " Institute \\\n", + "14071 b'UNIVERSITY OF WASHINGTON; SEATTLE' \n", + "16436 b'' \n", + "53666 b'' \n", + "26382 b'' \n", + "1040 b'UNIVERSITY OF WASHINGTON; SEATTLE' \n", + "... ... \n", + "45354 b'' \n", + "76073 b'' \n", + "66438 b'' \n", + "12471 b'' \n", + "13471 b'' \n", + "\n", + " Ocean_Vehicle \\\n", + "14071 b'APEX (Autonomous Profiling Explorer, Webb Re... \n", + "16436 b'APEX (Autonomous Profiling Explorer, Webb Re... \n", + "53666 b'APEX (Autonomous Profiling Explorer, Webb Re... \n", + "26382 b'APEX (Autonomous Profiling Explorer, Webb Re... \n", + "1040 b'APEX (Autonomous Profiling Explorer, Webb Re... \n", + "... ... \n", + "45354 b'APEX (Autonomous Profiling Explorer, Webb Re... \n", + "76073 b'PROVOR (free-drifting hydrographic profiler,... \n", + "66438 b'PROVOR (free-drifting hydrographic profiler,... \n", + "12471 b'PROVOR (free-drifting hydrographic profiler,... \n", + "13471 b'PROVOR (free-drifting hydrographic profiler,... \n", + "\n", + " Temperature_Instrument z \\\n", + "14071 b'CTD: Sea-Bird Electronics, MODEL UNKNOWN' 3066.0000 \n", + "16436 b'CTD: TYPE UNKNOWN' 2005.9418 \n", + "53666 b'CTD: TYPE UNKNOWN' 2003.7826 \n", + "26382 b'CTD: TYPE UNKNOWN' 2004.0397 \n", + "1040 b'CTD: Sea-Bird Electronics, MODEL UNKNOWN' 3081.0000 \n", + "... ... ... \n", + "45354 b'CTD: TYPE UNKNOWN' 2004.0653 \n", + "76073 b'CTD: TYPE UNKNOWN' 2018.9020 \n", + "66438 b'CTD: TYPE UNKNOWN' 2004.1403 \n", + "12471 b'CTD: TYPE UNKNOWN' 2010.0000 \n", + "13471 b'CTD: TYPE UNKNOWN' 2010.0841 \n", "\n", - " Ocean_Vehicle Temperature_Instrument \\\n", - "0 APEX (Autonomous Profiling Explorer, Webb Rese... CTD: TYPE UNKNOWN \n", - "1 APEX (Autonomous Profiling Explorer, Webb Rese... CTD: TYPE UNKNOWN \n", - "2 APEX (Autonomous Profiling Explorer, Webb Rese... CTD: TYPE UNKNOWN \n", - "3 APEX (Autonomous Profiling Explorer, Webb Rese... CTD: TYPE UNKNOWN \n", - "4 APEX (Autonomous Profiling Explorer, Webb Rese... CTD: TYPE UNKNOWN \n", + " CODA_id \n", + "14071 b'WODpfl200501010015' \n", + "16436 b'WODpfl200501010096' \n", + "53666 b'WODpfl200501020070' \n", + "26382 b'WODpfl200501050062' \n", + "1040 b'WODpfl200501070050' \n", + "... ... \n", + "45354 b'WODpfl200512270056' \n", + "76073 b'WODpfl200512280005' \n", + "66438 b'WODpfl200512280073' \n", + "12471 b'WODpfl200512310064' \n", + "13471 b'WODpfl200512310065' \n", "\n", - " z CODA_id \n", - "0 4.473917 WODpfl200501010001 \n", - "1 9.444819 WODpfl200501010001 \n", - "2 13.918531 WODpfl200501010001 \n", - "3 18.889204 WODpfl200501010001 \n", - "4 24.257399 WODpfl200501010001 " + "[232 rows x 19 columns]" ] }, - "execution_count": 42, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], - "source": [ - "ddf.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 43, - "id": "cba5d048-4ffc-4d8a-a006-92de19d87f10", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/site-packages/distributed/client.py:3362: UserWarning: Sending large graph of size 20.10 GiB.\n", - "This may cause some slowdown.\n", - "Consider loading the data with Dask directly\n", - " or using futures or delayed objects to embed the data into the graph without repetition.\n", - "See also https://docs.dask.org/en/stable/best-practices.html#load-data-with-dask for more information.\n", - " warnings.warn(\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Columns: 19 entries, cast to CODA_id\n", - "dtypes: datetime64[ns](1), float32(4), float64(1), int32(1), int64(2), string(10)\n", - "memory usage: 20.1 GB\n", - "CPU times: user 2min 50s, sys: 1min 27s, total: 4min 17s\n", - "Wall time: 4min 22s\n" - ] - } - ], - "source": [ - "%%time\n", - "ddf.info(memory_usage='deep')" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "0e8524fb-2200-41aa-a79f-fabd680f5196", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/site-packages/distributed/client.py:3362: UserWarning: Sending large graph of size 20.10 GiB.\n", - "This may cause some slowdown.\n", - "Consider loading the data with Dask directly\n", - " or using futures or delayed objects to embed the data into the graph without repetition.\n", - "See also https://docs.dask.org/en/stable/best-practices.html#load-data-with-dask for more information.\n", - " warnings.warn(\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "CPU times: user 6min 3s, sys: 2min 40s, total: 8min 44s\n", - "Wall time: 8min 37s\n" - ] - } - ], - "source": [ - "%%time\n", - "ddf.to_parquet(write_path+\"2005_pfl_temp.pq\",engine='pyarrow', compression='snappy')" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "id": "3b2fe17f-0a0f-43bb-abd0-c92b925779f5", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "CPU times: user 13min 13s, sys: 3min 4s, total: 16min 18s\n", - "Wall time: 13min 19s\n" - ] - } - ], "source": [ "%%time\n", - "df.to_csv(write_path+\"2005_pfl_temp.csv\", index=False)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e62e8eb2-3203-4a55-be09-3f9b13fdff58", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "id": "434a2c8b-45b2-421c-9b0f-a72ad734fd4c", - "metadata": {}, - "source": [ - "# load parquet data & filter" + "temp_csv_ddf[temp_csv_ddf['z'] >= 2000].compute()" ] }, { "cell_type": "code", - "execution_count": 16, - "id": "759f42a1-b38c-4f95-b74e-b89d48606521", + "execution_count": 10, + "id": "78a44dcb-b857-47b5-8239-fa77ba5d2a9b", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 1.68 s, sys: 1.03 s, total: 2.7 s\n", - "Wall time: 3.74 s\n" + "CPU times: user 1.72 s, sys: 107 ms, total: 1.83 s\n", + "Wall time: 3.35 s\n" ] - } - ], - "source": [ - "%%time\n", - "temp_pfl_ddf = dd.read_parquet(write_path+\"2005_pfl_temp.pq\")\n", - "# Search for rows where 'column_name' matches a condition\n", - "result_parquet = temp_pfl_ddf[temp_pfl_ddf ['z'] >= 2000].compute()" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "id": "d2c3b6f4-5dfe-4620-8cda-d97dd5830060", - "metadata": {}, - "outputs": [ + }, { "data": { "text/html": [ @@ -6546,551 +1714,68 @@ "[232 rows x 19 columns]" ] }, - "execution_count": 17, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "result_parquet" + "%%time\n", + "temp_pq_ddf[temp_pq_ddf['z'] >= 2000].compute()" ] }, { "cell_type": "code", - "execution_count": 18, - "id": "66d72102-6fe8-487a-883a-5236789a60f6", + "execution_count": null, + "id": "624492e9-d9e3-4704-b2ac-95e3cf17b0e3", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Exception ignored in sys.unraisablehook: \n", - "Traceback (most recent call last):\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/site-packages/ipykernel/iostream.py\", line 664, in write\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/site-packages/ipykernel/iostream.py\", line 509, in parent_header\n", - "Process Dask Worker process (from Nanny):\n", - "Process Dask Worker process (from Nanny):\n", - "Process Dask Worker process (from Nanny):\n", - "Process Dask Worker process (from Nanny):\n", - "Process Dask Worker process (from Nanny):\n", - "Process Dask Worker process (from Nanny):\n", - "Process Dask Worker process (from Nanny):\n", - " return self._parent_header.get()\n", - "KeyboardInterrupt: \n", - "Traceback (most recent call last):\n", - "Traceback (most recent call last):\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/site-packages/distributed/compatibility.py\", line 236, in asyncio_run\n", - " return loop.run_until_complete(main)\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/asyncio/base_events.py\", line 636, in run_until_complete\n", - " self.run_forever()\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/asyncio/base_events.py\", line 603, in run_forever\n", - " self._run_once()\n", - "Traceback (most recent call last):\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/asyncio/base_events.py\", line 1871, in _run_once\n", - " event_list = self._selector.select(timeout)\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/selectors.py\", line 469, in select\n", - " fd_event_list = self._selector.poll(timeout, max_ev)\n", - "Traceback (most recent call last):\n", - "KeyboardInterrupt\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/site-packages/distributed/compatibility.py\", line 236, in asyncio_run\n", - " return loop.run_until_complete(main)\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/asyncio/base_events.py\", line 636, in run_until_complete\n", - " self.run_forever()\n", - "Traceback (most recent call last):\n", - "Traceback (most recent call last):\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/asyncio/base_events.py\", line 603, in run_forever\n", - " self._run_once()\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/asyncio/base_events.py\", line 1871, in _run_once\n", - " event_list = self._selector.select(timeout)\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/selectors.py\", line 469, in select\n", - " fd_event_list = self._selector.poll(timeout, max_ev)\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/site-packages/distributed/compatibility.py\", line 236, in asyncio_run\n", - " return loop.run_until_complete(main)\n", - "KeyboardInterrupt\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/site-packages/distributed/compatibility.py\", line 236, in asyncio_run\n", - " return loop.run_until_complete(main)\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/site-packages/distributed/nanny.py\", line 1015, in _run\n", - " asyncio_run(run(), loop_factory=get_loop_factory())\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/asyncio/base_events.py\", line 636, in run_until_complete\n", - " self.run_forever()\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/site-packages/distributed/compatibility.py\", line 239, in asyncio_run\n", - " _cancel_all_tasks(loop)\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/asyncio/base_events.py\", line 603, in run_forever\n", - " self._run_once()\n", - "Traceback (most recent call last):\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/asyncio/base_events.py\", line 636, in run_until_complete\n", - " self.run_forever()\n", - "Traceback (most recent call last):\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/site-packages/distributed/compatibility.py\", line 255, in _cancel_all_tasks\n", - " loop.run_until_complete(asyncio.gather(*to_cancel, return_exceptions=True))\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/asyncio/base_events.py\", line 1871, in _run_once\n", - " event_list = self._selector.select(timeout)\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/asyncio/base_events.py\", line 603, in run_forever\n", - " self._run_once()\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/asyncio/base_events.py\", line 636, in run_until_complete\n", - " self.run_forever()\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/selectors.py\", line 469, in select\n", - " fd_event_list = self._selector.poll(timeout, max_ev)\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/asyncio/base_events.py\", line 1871, in _run_once\n", - " event_list = self._selector.select(timeout)\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/asyncio/base_events.py\", line 603, in run_forever\n", - " self._run_once()\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/selectors.py\", line 469, in select\n", - " fd_event_list = self._selector.poll(timeout, max_ev)\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/asyncio/base_events.py\", line 1871, in _run_once\n", - " event_list = self._selector.select(timeout)\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/selectors.py\", line 469, in select\n", - " fd_event_list = self._selector.poll(timeout, max_ev)\n", - "KeyboardInterrupt\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/site-packages/distributed/compatibility.py\", line 236, in asyncio_run\n", - " return loop.run_until_complete(main)\n", - "KeyboardInterrupt\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "KeyboardInterrupt\n", - "Traceback (most recent call last):\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/site-packages/distributed/nanny.py\", line 1015, in _run\n", - " asyncio_run(run(), loop_factory=get_loop_factory())\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/asyncio/base_events.py\", line 636, in run_until_complete\n", - " self.run_forever()\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/site-packages/distributed/compatibility.py\", line 239, in asyncio_run\n", - " _cancel_all_tasks(loop)\n", - "Traceback (most recent call last):\n", - "Traceback (most recent call last):\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/asyncio/base_events.py\", line 603, in run_forever\n", - " self._run_once()\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/site-packages/distributed/compatibility.py\", line 255, in _cancel_all_tasks\n", - " loop.run_until_complete(asyncio.gather(*to_cancel, return_exceptions=True))\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/asyncio/base_events.py\", line 1871, in _run_once\n", - " event_list = self._selector.select(timeout)\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/asyncio/base_events.py\", line 636, in run_until_complete\n", - " self.run_forever()\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/selectors.py\", line 469, in select\n", - " fd_event_list = self._selector.poll(timeout, max_ev)\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/contextlib.py\", line 561, in __exit__\n", - " if cb(*exc_details):\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/asyncio/base_events.py\", line 603, in run_forever\n", - " self._run_once()\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/site-packages/distributed/compatibility.py\", line 236, in asyncio_run\n", - " return loop.run_until_complete(main)\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/asyncio/base_events.py\", line 1871, in _run_once\n", - " event_list = self._selector.select(timeout)\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/contextlib.py\", line 449, in _exit_wrapper\n", - " callback(*args, **kwds)\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/selectors.py\", line 469, in select\n", - " fd_event_list = self._selector.poll(timeout, max_ev)\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/asyncio/base_events.py\", line 636, in run_until_complete\n", - " self.run_forever()\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/threading.py\", line 1100, in join\n", - " self._wait_for_tstate_lock(timeout=max(timeout, 0))\n", - "KeyboardInterrupt\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/threading.py\", line 1116, in _wait_for_tstate_lock\n", - " if lock.acquire(block, timeout):\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/site-packages/distributed/nanny.py\", line 1015, in _run\n", - " asyncio_run(run(), loop_factory=get_loop_factory())\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/asyncio/base_events.py\", line 603, in run_forever\n", - " self._run_once()\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/site-packages/distributed/nanny.py\", line 1015, in _run\n", - " asyncio_run(run(), loop_factory=get_loop_factory())\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/site-packages/distributed/compatibility.py\", line 239, in asyncio_run\n", - " _cancel_all_tasks(loop)\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "KeyboardInterrupt\n", - "KeyboardInterrupt\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/site-packages/distributed/compatibility.py\", line 239, in asyncio_run\n", - " _cancel_all_tasks(loop)\n", - "Traceback (most recent call last):\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/asyncio/base_events.py\", line 1871, in _run_once\n", - " event_list = self._selector.select(timeout)\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/site-packages/distributed/compatibility.py\", line 255, in _cancel_all_tasks\n", - " loop.run_until_complete(asyncio.gather(*to_cancel, return_exceptions=True))\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/site-packages/distributed/compatibility.py\", line 255, in _cancel_all_tasks\n", - " loop.run_until_complete(asyncio.gather(*to_cancel, return_exceptions=True))\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/selectors.py\", line 469, in select\n", - " fd_event_list = self._selector.poll(timeout, max_ev)\n", - "Traceback (most recent call last):\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/asyncio/base_events.py\", line 636, in run_until_complete\n", - " self.run_forever()\n", - "Traceback (most recent call last):\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/asyncio/base_events.py\", line 636, in run_until_complete\n", - " self.run_forever()\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/asyncio/base_events.py\", line 603, in run_forever\n", - " self._run_once()\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/asyncio/base_events.py\", line 603, in run_forever\n", - " self._run_once()\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/asyncio/base_events.py\", line 1871, in _run_once\n", - " event_list = self._selector.select(timeout)\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/asyncio/base_events.py\", line 1871, in _run_once\n", - " event_list = self._selector.select(timeout)\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/contextlib.py\", line 561, in __exit__\n", - " if cb(*exc_details):\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/selectors.py\", line 469, in select\n", - " fd_event_list = self._selector.poll(timeout, max_ev)\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/selectors.py\", line 469, in select\n", - " fd_event_list = self._selector.poll(timeout, max_ev)\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/contextlib.py\", line 449, in _exit_wrapper\n", - " callback(*args, **kwds)\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/threading.py\", line 1100, in join\n", - " self._wait_for_tstate_lock(timeout=max(timeout, 0))\n", - "KeyboardInterrupt\n", - "KeyboardInterrupt\n", - "KeyboardInterrupt\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/threading.py\", line 1116, in _wait_for_tstate_lock\n", - " if lock.acquire(block, timeout):\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - "Traceback (most recent call last):\n", - "KeyboardInterrupt\n", - "Traceback (most recent call last):\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/site-packages/distributed/nanny.py\", line 1015, in _run\n", - " asyncio_run(run(), loop_factory=get_loop_factory())\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/multiprocessing/process.py\", line 314, in _bootstrap\n", - " self.run()\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/site-packages/distributed/compatibility.py\", line 239, in asyncio_run\n", - " _cancel_all_tasks(loop)\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/multiprocessing/process.py\", line 108, in run\n", - " self._target(*self._args, **self._kwargs)\n", - "Traceback (most recent call last):\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/site-packages/distributed/compatibility.py\", line 255, in _cancel_all_tasks\n", - " loop.run_until_complete(asyncio.gather(*to_cancel, return_exceptions=True))\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/site-packages/distributed/process.py\", line 202, in _run\n", - " target(*args, **kwargs)\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/asyncio/base_events.py\", line 636, in run_until_complete\n", - " self.run_forever()\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/site-packages/distributed/nanny.py\", line 989, in _run\n", - " with contextlib.ExitStack() as stack:\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/contextlib.py\", line 561, in __exit__\n", - " if cb(*exc_details):\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/contextlib.py\", line 561, in __exit__\n", - " if cb(*exc_details):\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/asyncio/base_events.py\", line 603, in run_forever\n", - " self._run_once()\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/contextlib.py\", line 576, in __exit__\n", - " raise exc_details[1]\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/contextlib.py\", line 449, in _exit_wrapper\n", - " callback(*args, **kwds)\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/contextlib.py\", line 449, in _exit_wrapper\n", - " callback(*args, **kwds)\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/asyncio/base_events.py\", line 1871, in _run_once\n", - " event_list = self._selector.select(timeout)\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/contextlib.py\", line 561, in __exit__\n", - " if cb(*exc_details):\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/threading.py\", line 1100, in join\n", - " self._wait_for_tstate_lock(timeout=max(timeout, 0))\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/threading.py\", line 1100, in join\n", - " self._wait_for_tstate_lock(timeout=max(timeout, 0))\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/selectors.py\", line 469, in select\n", - " fd_event_list = self._selector.poll(timeout, max_ev)\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/contextlib.py\", line 449, in _exit_wrapper\n", - " callback(*args, **kwds)\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/threading.py\", line 1116, in _wait_for_tstate_lock\n", - " if lock.acquire(block, timeout):\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/site-packages/distributed/nanny.py\", line 1015, in _run\n", - " asyncio_run(run(), loop_factory=get_loop_factory())\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/threading.py\", line 1116, in _wait_for_tstate_lock\n", - " if lock.acquire(block, timeout):\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/site-packages/distributed/nanny.py\", line 994, in close_stop_q\n", - " child_stop_q.put({\"op\": \"stop\"}) # usually redundant\n", - "KeyboardInterrupt\n", - "KeyboardInterrupt\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/site-packages/distributed/compatibility.py\", line 239, in asyncio_run\n", - " _cancel_all_tasks(loop)\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/multiprocessing/queues.py\", line 89, in put\n", - " if not self._sem.acquire(block, timeout):\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "KeyboardInterrupt\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/multiprocessing/process.py\", line 314, in _bootstrap\n", - " self.run()\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/site-packages/distributed/compatibility.py\", line 255, in _cancel_all_tasks\n", - " loop.run_until_complete(asyncio.gather(*to_cancel, return_exceptions=True))\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "KeyboardInterrupt\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/multiprocessing/process.py\", line 108, in run\n", - " self._target(*self._args, **self._kwargs)\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/asyncio/base_events.py\", line 636, in run_until_complete\n", - " self.run_forever()\n", - "Traceback (most recent call last):\n", - "Traceback (most recent call last):\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/site-packages/distributed/process.py\", line 202, in _run\n", - " target(*args, **kwargs)\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/asyncio/base_events.py\", line 603, in run_forever\n", - " self._run_once()\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/site-packages/distributed/nanny.py\", line 989, in _run\n", - " with contextlib.ExitStack() as stack:\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/asyncio/base_events.py\", line 1871, in _run_once\n", - " event_list = self._selector.select(timeout)\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/contextlib.py\", line 561, in __exit__\n", - " if cb(*exc_details):\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/contextlib.py\", line 576, in __exit__\n", - " raise exc_details[1]\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/selectors.py\", line 469, in select\n", - " fd_event_list = self._selector.poll(timeout, max_ev)\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/contextlib.py\", line 449, in _exit_wrapper\n", - " callback(*args, **kwds)\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/contextlib.py\", line 561, in __exit__\n", - " if cb(*exc_details):\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/threading.py\", line 1100, in join\n", - " self._wait_for_tstate_lock(timeout=max(timeout, 0))\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/contextlib.py\", line 449, in _exit_wrapper\n", - " callback(*args, **kwds)\n", - "KeyboardInterrupt\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/threading.py\", line 1116, in _wait_for_tstate_lock\n", - " if lock.acquire(block, timeout):\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/site-packages/distributed/nanny.py\", line 994, in close_stop_q\n", - " child_stop_q.put({\"op\": \"stop\"}) # usually redundant\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/multiprocessing/queues.py\", line 89, in put\n", - " if not self._sem.acquire(block, timeout):\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/multiprocessing/process.py\", line 314, in _bootstrap\n", - " self.run()\n", - "Traceback (most recent call last):\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/multiprocessing/process.py\", line 314, in _bootstrap\n", - " self.run()\n", - "KeyboardInterrupt\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/multiprocessing/process.py\", line 108, in run\n", - " self._target(*self._args, **self._kwargs)\n", - "KeyboardInterrupt\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/multiprocessing/process.py\", line 108, in run\n", - " self._target(*self._args, **self._kwargs)\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/site-packages/distributed/process.py\", line 202, in _run\n", - " target(*args, **kwargs)\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/site-packages/distributed/process.py\", line 202, in _run\n", - " target(*args, **kwargs)\n", - "Traceback (most recent call last):\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/site-packages/distributed/nanny.py\", line 989, in _run\n", - " with contextlib.ExitStack() as stack:\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/site-packages/distributed/nanny.py\", line 989, in _run\n", - " with contextlib.ExitStack() as stack:\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/contextlib.py\", line 576, in __exit__\n", - " raise exc_details[1]\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/contextlib.py\", line 576, in __exit__\n", - " raise exc_details[1]\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/contextlib.py\", line 561, in __exit__\n", - " if cb(*exc_details):\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/contextlib.py\", line 561, in __exit__\n", - " if cb(*exc_details):\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/contextlib.py\", line 561, in __exit__\n", - " if cb(*exc_details):\n", - "Traceback (most recent call last):\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/contextlib.py\", line 449, in _exit_wrapper\n", - " callback(*args, **kwds)\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/contextlib.py\", line 449, in _exit_wrapper\n", - " callback(*args, **kwds)\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/contextlib.py\", line 449, in _exit_wrapper\n", - " callback(*args, **kwds)\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/site-packages/distributed/nanny.py\", line 994, in close_stop_q\n", - " child_stop_q.put({\"op\": \"stop\"}) # usually redundant\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/site-packages/distributed/nanny.py\", line 994, in close_stop_q\n", - " child_stop_q.put({\"op\": \"stop\"}) # usually redundant\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/threading.py\", line 1100, in join\n", - " self._wait_for_tstate_lock(timeout=max(timeout, 0))\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/multiprocessing/queues.py\", line 89, in put\n", - " if not self._sem.acquire(block, timeout):\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/multiprocessing/queues.py\", line 89, in put\n", - " if not self._sem.acquire(block, timeout):\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/threading.py\", line 1116, in _wait_for_tstate_lock\n", - " if lock.acquire(block, timeout):\n", - "KeyboardInterrupt\n", - "KeyboardInterrupt\n", - "KeyboardInterrupt\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/multiprocessing/process.py\", line 314, in _bootstrap\n", - " self.run()\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/multiprocessing/process.py\", line 108, in run\n", - " self._target(*self._args, **self._kwargs)\n", - "Traceback (most recent call last):\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/site-packages/distributed/process.py\", line 202, in _run\n", - " target(*args, **kwargs)\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/site-packages/distributed/nanny.py\", line 989, in _run\n", - " with contextlib.ExitStack() as stack:\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/contextlib.py\", line 576, in __exit__\n", - " raise exc_details[1]\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/contextlib.py\", line 561, in __exit__\n", - " if cb(*exc_details):\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/contextlib.py\", line 449, in _exit_wrapper\n", - " callback(*args, **kwds)\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/site-packages/distributed/nanny.py\", line 994, in close_stop_q\n", - " child_stop_q.put({\"op\": \"stop\"}) # usually redundant\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/multiprocessing/queues.py\", line 89, in put\n", - " if not self._sem.acquire(block, timeout):\n", - "KeyboardInterrupt\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/multiprocessing/process.py\", line 314, in _bootstrap\n", - " self.run()\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/multiprocessing/process.py\", line 108, in run\n", - " self._target(*self._args, **self._kwargs)\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/site-packages/distributed/compatibility.py\", line 236, in asyncio_run\n", - " return loop.run_until_complete(main)\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/site-packages/distributed/process.py\", line 202, in _run\n", - " target(*args, **kwargs)\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/site-packages/distributed/nanny.py\", line 989, in _run\n", - " with contextlib.ExitStack() as stack:\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/asyncio/base_events.py\", line 636, in run_until_complete\n", - " self.run_forever()\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/asyncio/base_events.py\", line 603, in run_forever\n", - " self._run_once()\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/asyncio/base_events.py\", line 1871, in _run_once\n", - " event_list = self._selector.select(timeout)\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/selectors.py\", line 469, in select\n", - " fd_event_list = self._selector.poll(timeout, max_ev)\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/contextlib.py\", line 576, in __exit__\n", - " raise exc_details[1]\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/contextlib.py\", line 561, in __exit__\n", - " if cb(*exc_details):\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/contextlib.py\", line 449, in _exit_wrapper\n", - " callback(*args, **kwds)\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/site-packages/distributed/nanny.py\", line 994, in close_stop_q\n", - " child_stop_q.put({\"op\": \"stop\"}) # usually redundant\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/multiprocessing/queues.py\", line 89, in put\n", - " if not self._sem.acquire(block, timeout):\n", - "KeyboardInterrupt\n", - "KeyboardInterrupt\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/site-packages/distributed/nanny.py\", line 1015, in _run\n", - " asyncio_run(run(), loop_factory=get_loop_factory())\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/site-packages/distributed/compatibility.py\", line 239, in asyncio_run\n", - " _cancel_all_tasks(loop)\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/site-packages/distributed/compatibility.py\", line 255, in _cancel_all_tasks\n", - " loop.run_until_complete(asyncio.gather(*to_cancel, return_exceptions=True))\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/asyncio/base_events.py\", line 636, in run_until_complete\n", - " self.run_forever()\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/asyncio/base_events.py\", line 603, in run_forever\n", - " self._run_once()\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/asyncio/base_events.py\", line 1871, in _run_once\n", - " event_list = self._selector.select(timeout)\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/selectors.py\", line 469, in select\n", - " fd_event_list = self._selector.poll(timeout, max_ev)\n", - "KeyboardInterrupt\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/contextlib.py\", line 561, in __exit__\n", - " if cb(*exc_details):\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/contextlib.py\", line 449, in _exit_wrapper\n", - " callback(*args, **kwds)\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/threading.py\", line 1100, in join\n", - " self._wait_for_tstate_lock(timeout=max(timeout, 0))\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/threading.py\", line 1116, in _wait_for_tstate_lock\n", - " if lock.acquire(block, timeout):\n", - "KeyboardInterrupt\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/multiprocessing/process.py\", line 314, in _bootstrap\n", - " self.run()\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/multiprocessing/process.py\", line 108, in run\n", - " self._target(*self._args, **self._kwargs)\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/site-packages/distributed/process.py\", line 202, in _run\n", - " target(*args, **kwargs)\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/site-packages/distributed/nanny.py\", line 989, in _run\n", - " with contextlib.ExitStack() as stack:\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/contextlib.py\", line 576, in __exit__\n", - " raise exc_details[1]\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/contextlib.py\", line 561, in __exit__\n", - " if cb(*exc_details):\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/contextlib.py\", line 449, in _exit_wrapper\n", - " callback(*args, **kwds)\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/site-packages/distributed/nanny.py\", line 994, in close_stop_q\n", - " child_stop_q.put({\"op\": \"stop\"}) # usually redundant\n", - " File \"/g/data/es60/users/thomas_moore/miniconda3/envs/tabular_oceans/lib/python3.10/multiprocessing/queues.py\", line 89, in put\n", - " if not self._sem.acquire(block, timeout):\n", - "KeyboardInterrupt\n", - "\n", - "KeyboardInterrupt\n", - "\n" - ] - } - ], - "source": [ - "df = pd.read_csv(write_path+\"2005_pfl_temp.csv\")" - ] + "outputs": [], + "source": [] }, { - "cell_type": "markdown", - "id": "6d14b366-3837-429c-a1fd-da2201a2377b", + "cell_type": "code", + "execution_count": null, + "id": "f73438c1-690a-4487-a5ea-3371bd92b31e", "metadata": {}, - "source": [ - "# load CSV data and filter" - ] + "outputs": [], + "source": [] }, { "cell_type": "code", - "execution_count": 20, - "id": "c7533621-26ca-4c30-a5e9-31245eb26819", + "execution_count": null, + "id": "83b3d702-dd37-4308-a74e-7a38762b4e0d", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "CPU times: user 8.5 s, sys: 485 ms, total: 8.99 s\n", - "Wall time: 42.2 s\n" - ] - } - ], - "source": [ - "%%time\n", - "temp_pfl_ddf = dd.read_csv(write_path+\"2005_pfl_temp.csv\", blocksize=25e6) # 25 MB chunks\n", - "# Search for rows where 'column_name' matches a condition\n", - "result_df = temp_pfl_ddf[temp_pfl_ddf['z'] >= 2000].compute()" - ] + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "889d476a-3934-45ed-a64b-50d21d17c840", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "da7c52f0-5f7a-4246-8ef3-8bb04a8dc9a2", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ffb2bf30-d858-48b8-a866-bb9a0662607f", + "metadata": {}, + "outputs": [], + "source": [] }, { "cell_type": "code", "execution_count": null, - "id": "e70e12c2-b877-49a3-931b-6e5da39bc9a9", + "id": "394592f8-f113-42e7-a5ea-28a189d819d3", "metadata": {}, "outputs": [], "source": []