seagliderOG1 demo

The purpose of this notebook is to demonstrate the functionality of seagliderOG1 to convert from Seaglider basestation files to OG1 format.

The test case is to convert sg015 data from the Labrador Sea in September 2004.

The demo is organised to show

  • Conversion of a single dive cycle (single p*.nc file)

  • Conversion for a folder of local dive-cycle files (full mission of p*.nc files)

  • Download from remote server + conversion (directory with full mission of p*.nc files)

Options are provided to only load e.g. 10 files, but note that OG1 format expects a full mission.

[1]:
import pathlib
import sys

script_dir = pathlib.Path().parent.absolute()
parent_dir = script_dir.parents[0]
sys.path.append(str(parent_dir))
sys.path.append(str(parent_dir) + '/seagliderOG1')
print(parent_dir)
print(sys.path)

import xarray as xr
import os
import pooch
from seagliderOG1 import readers, writers, plotters, tools
from seagliderOG1 import convertOG1, vocabularies

/home/runner/work/seagliderOG1/seagliderOG1
['/home/runner/micromamba/envs/TEST/lib/python312.zip', '/home/runner/micromamba/envs/TEST/lib/python3.12', '/home/runner/micromamba/envs/TEST/lib/python3.12/lib-dynload', '', '/home/runner/micromamba/envs/TEST/lib/python3.12/site-packages', '/home/runner/work/seagliderOG1/seagliderOG1', '/home/runner/work/seagliderOG1/seagliderOG1/seagliderOG1']
/home/runner/work/seagliderOG1/seagliderOG1/seagliderOG1/tools.py:472: SyntaxWarning: invalid escape sequence '\d'
  fill_val = 2 ** (int(re.findall("\d+", str(new_dtype))[0]) - 1) - 1
[ ]:

[2]:
# Specify the path for writing datafiles
data_path = os.path.join(parent_dir, 'data')

Reading basestation files

This has three ways to load a glider dataset.

Load an example dataset using seagliderOG1.fetchers.load_sample_dataset

Alternatively, use your own with e.g. ds = xr.open_dataset('/path/to/yourfile.nc')

Load single sample dataset

[3]:
ds = readers.load_sample_dataset()
ds
Downloading file 'p0040034_20031007.nc' from 'https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/015/20040924/p0040034_20031007.nc' to '/home/runner/.cache/seagliderOG1'.
---------------------------------------------------------------------------
HTTPError                                 Traceback (most recent call last)
Cell In[3], line 1
----> 1 ds = readers.load_sample_dataset()
      2 ds

File ~/work/seagliderOG1/seagliderOG1/seagliderOG1/readers.py:37, in load_sample_dataset(dataset_name)
     35 def load_sample_dataset(dataset_name="p0040034_20031007.nc"):
     36     if dataset_name in data_source_og.registry.keys():
---> 37         file_path = data_source_og.fetch(dataset_name)
     38         return xr.open_dataset(file_path)
     39     else:

File ~/micromamba/envs/TEST/lib/python3.12/site-packages/pooch/core.py:589, in Pooch.fetch(self, fname, processor, downloader, progressbar)
    586     if downloader is None:
    587         downloader = choose_downloader(url, progressbar=progressbar)
--> 589     stream_download(
    590         url,
    591         full_path,
    592         known_hash,
    593         downloader,
    594         pooch=self,
    595         retry_if_failed=self.retry_if_failed,
    596     )
    598 if processor is not None:
    599     return processor(str(full_path), action, self)

File ~/micromamba/envs/TEST/lib/python3.12/site-packages/pooch/core.py:807, in stream_download(url, fname, known_hash, downloader, pooch, retry_if_failed)
    803 try:
    804     # Stream the file to a temporary so that we can safely check its
    805     # hash before overwriting the original.
    806     with temporary_file(path=str(fname.parent)) as tmp:
--> 807         downloader(url, tmp, pooch)
    808         hash_matches(tmp, known_hash, strict=True, source=str(fname.name))
    809         shutil.move(tmp, str(fname))

File ~/micromamba/envs/TEST/lib/python3.12/site-packages/pooch/downloaders.py:221, in HTTPDownloader.__call__(self, url, output_file, pooch, check_only)
    219 try:
    220     response = requests.get(url, timeout=timeout, **kwargs)
--> 221     response.raise_for_status()
    222     content = response.iter_content(chunk_size=self.chunk_size)
    223     total = int(response.headers.get("content-length", 0))

File ~/micromamba/envs/TEST/lib/python3.12/site-packages/requests/models.py:1024, in Response.raise_for_status(self)
   1019     http_error_msg = (
   1020         f"{self.status_code} Server Error: {reason} for url: {self.url}"
   1021     )
   1023 if http_error_msg:
-> 1024     raise HTTPError(http_error_msg, response=self)

HTTPError: 404 Client Error: Not Found for url: https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/015/20040924/p0040034_20031007.nc

Load datasets from a local directory

[4]:
# Specify the input directory on your local machine
input_dir = "/Users/eddifying/Dropbox/data/sg015-ncei-download"

# Load and concatenate all datasets in the input directory
# Optionally, specify the range of profiles to load (start_profile, end_profile)
list_datasets = readers.read_basestation(input_dir, start_profile=500, end_profile=503)

# Where list_datasets is a list of xarray datasets.  A single dataset can be accessed as
ds = list_datasets[0]
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[4], line 6
      2 input_dir = "/Users/eddifying/Dropbox/data/sg015-ncei-download"
      4 # Load and concatenate all datasets in the input directory
      5 # Optionally, specify the range of profiles to load (start_profile, end_profile)
----> 6 list_datasets = readers.read_basestation(input_dir, start_profile=500, end_profile=503)
      8 # Where list_datasets is a list of xarray datasets.  A single dataset can be accessed as
      9 ds = list_datasets[0]

File ~/work/seagliderOG1/seagliderOG1/seagliderOG1/readers.py:101, in read_basestation(source, start_profile, end_profile)
     99     file_list = os.listdir(source)
    100 else:
--> 101     raise ValueError("Source must be a valid URL or directory path.")
    103 filtered_files = filter_files_by_profile(file_list, start_profile, end_profile)
    105 datasets = []

ValueError: Source must be a valid URL or directory path.
[5]:
ds
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[5], line 1
----> 1 ds

NameError: name 'ds' is not defined

Load datasets from a remote directory (URL)

[6]:
# Specify the server where data are located
server = "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/015/20040924/"

# Load and concatenate all datasets from the server, optionally specifying the range of profiles to load
list_datasets = readers.read_basestation(server, start_profile=500, end_profile=503)
Downloading file 'p0150500_20050213.nc' from 'https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/015/20040924/p0150500_20050213.nc' to '/home/runner/.cache/seagliderOG1'.
Downloading file 'p0150501_20050213.nc' from 'https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/015/20040924/p0150501_20050213.nc' to '/home/runner/.cache/seagliderOG1'.
Downloading file 'p0150502_20050214.nc' from 'https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/015/20040924/p0150502_20050214.nc' to '/home/runner/.cache/seagliderOG1'.
Downloading file 'p0150503_20050214.nc' from 'https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/015/20040924/p0150503_20050214.nc' to '/home/runner/.cache/seagliderOG1'.

Convert to OG1 format

Process:

  1. For one basestation dataset, split the dataset by dimension (split_ds)

  2. Transform into OG1 format: dataset with dims sg_data_point

    • Change the dimension to N_MEASUREMENTS

    • Rename variables according to vocabularies.standard_names

    • Assign variable attributes according to vocabularies.vocab_attrs. (Note: This could go wrong since it makes assumptions about the input variables. May need additional handling.)

  3. Add missing mandatory variables:

    • From split_ds[(gps_info,)], add the LATITUDE_GPS, LONGITUDE_GPS and TIME_GPS (Note: presently TIME_GPS is stripped before saving, but TIME values contain TIME_GPS)

    • Create PROFILE_NUMBER and PHASE

    • Calculate DEPTH_Z which is positive up

  4. Update attributes for the file.

    • Combines creator and contributor from original attributes into contributor

    • Adds contributing_institutions based on institution

    • Reformats time in time_coverage_* and start_time–> start_date

    • Adds date_modified

    • Renames comments–>history, site–>summary

    • Adds title, platform, platform_vocabulary, featureType, Conventions, rtqc_method* according to OceanGliders format

    • Retains naming_authority, institution, project, geospatial_* as OG attributes

    • Retains extra attributes: license, keywords, keywords_vocabulary, file_version, acknowledgement, date_created, disclaimer

Future behaviour to be added:

  1. Retain the variables starting with sg_cal and check whether they vary over the mission (shouldn’t)

  2. Add sensors, using information in the split_ds with no dimensions

    • Need (from sg_cal_constants: sg_cal plus volmax, vbd_cnts_per_cc, therm_expan, t_*, mass, hd_*, ctcor, cpcor, c_*, abs_compress, a, Tcor, Soc, Pcor, Foffset)

    • Maybe also reviewed, magnetic_variation (which will change with position), log_D_FLARE, flight_avg_speed_north and flight_avg_speed_east also with _gsm, depth_avg_curr_north and depth_avg_curr_east also with _gsm, wlbb2f - means sensor sg_cal_mission_title sg_cal_id_str calibcomm_oxygen calibcomm sbe41 means ?? hdm_qc glider

Convert a single (sample) dataset

[7]:
# Loads one dataset (p0150500_20050213.nc)
ds = readers.load_sample_dataset()

ds_OG1 = convertOG1.convert_to_OG1(ds)

# Check the results - uncomment the following lines to either generate a plot or show the variables.
plotters.plot_profile_depth(ds_OG1)
Downloading file 'p0040034_20031007.nc' from 'https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/015/20040924/p0040034_20031007.nc' to '/home/runner/.cache/seagliderOG1'.
---------------------------------------------------------------------------
HTTPError                                 Traceback (most recent call last)
Cell In[7], line 2
      1 # Loads one dataset (p0150500_20050213.nc)
----> 2 ds = readers.load_sample_dataset()
      4 ds_OG1 = convertOG1.convert_to_OG1(ds)
      6 # Check the results - uncomment the following lines to either generate a plot or show the variables.

File ~/work/seagliderOG1/seagliderOG1/seagliderOG1/readers.py:37, in load_sample_dataset(dataset_name)
     35 def load_sample_dataset(dataset_name="p0040034_20031007.nc"):
     36     if dataset_name in data_source_og.registry.keys():
---> 37         file_path = data_source_og.fetch(dataset_name)
     38         return xr.open_dataset(file_path)
     39     else:

File ~/micromamba/envs/TEST/lib/python3.12/site-packages/pooch/core.py:589, in Pooch.fetch(self, fname, processor, downloader, progressbar)
    586     if downloader is None:
    587         downloader = choose_downloader(url, progressbar=progressbar)
--> 589     stream_download(
    590         url,
    591         full_path,
    592         known_hash,
    593         downloader,
    594         pooch=self,
    595         retry_if_failed=self.retry_if_failed,
    596     )
    598 if processor is not None:
    599     return processor(str(full_path), action, self)

File ~/micromamba/envs/TEST/lib/python3.12/site-packages/pooch/core.py:807, in stream_download(url, fname, known_hash, downloader, pooch, retry_if_failed)
    803 try:
    804     # Stream the file to a temporary so that we can safely check its
    805     # hash before overwriting the original.
    806     with temporary_file(path=str(fname.parent)) as tmp:
--> 807         downloader(url, tmp, pooch)
    808         hash_matches(tmp, known_hash, strict=True, source=str(fname.name))
    809         shutil.move(tmp, str(fname))

File ~/micromamba/envs/TEST/lib/python3.12/site-packages/pooch/downloaders.py:221, in HTTPDownloader.__call__(self, url, output_file, pooch, check_only)
    219 try:
    220     response = requests.get(url, timeout=timeout, **kwargs)
--> 221     response.raise_for_status()
    222     content = response.iter_content(chunk_size=self.chunk_size)
    223     total = int(response.headers.get("content-length", 0))

File ~/micromamba/envs/TEST/lib/python3.12/site-packages/requests/models.py:1024, in Response.raise_for_status(self)
   1019     http_error_msg = (
   1020         f"{self.status_code} Server Error: {reason} for url: {self.url}"
   1021     )
   1023 if http_error_msg:
-> 1024     raise HTTPError(http_error_msg, response=self)

HTTPError: 404 Client Error: Not Found for url: https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/015/20040924/p0040034_20031007.nc
[8]:
# Print to screen a table of the variables and variable attributes
#plotters.show_attributes(ds_OG1)
ds_OG1
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[8], line 3
      1 # Print to screen a table of the variables and variable attributes
      2 #plotters.show_attributes(ds_OG1)
----> 3 ds_OG1

NameError: name 'ds_OG1' is not defined

Convert mission from a local directory of basestation files

  • For local data in the directory input_dir

  • Creates a plot of ctd_depth against ctd_time.

[9]:
# Specify the input directory on your local machine
input_dir = "/Users/eddifying/Dropbox/data/sg015-ncei-download"

# Load and concatenate all datasets in the input directory
# Optionally, specify the range of profiles to load (start_profile, end_profile)
list_datasets = readers.read_basestation(input_dir, start_profile=500, end_profile=503)

# Convert the list of datasets to OG1
ds_OG1 = convertOG1.convert_to_OG1(list_datasets)

# Generate a simple plot
plotters.plot_profile_depth(ds_OG1)
#plotters.show_contents(ds_all,'attrs')
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[9], line 6
      2 input_dir = "/Users/eddifying/Dropbox/data/sg015-ncei-download"
      4 # Load and concatenate all datasets in the input directory
      5 # Optionally, specify the range of profiles to load (start_profile, end_profile)
----> 6 list_datasets = readers.read_basestation(input_dir, start_profile=500, end_profile=503)
      8 # Convert the list of datasets to OG1
      9 ds_OG1 = convertOG1.convert_to_OG1(list_datasets)

File ~/work/seagliderOG1/seagliderOG1/seagliderOG1/readers.py:101, in read_basestation(source, start_profile, end_profile)
     99     file_list = os.listdir(source)
    100 else:
--> 101     raise ValueError("Source must be a valid URL or directory path.")
    103 filtered_files = filter_files_by_profile(file_list, start_profile, end_profile)
    105 datasets = []

ValueError: Source must be a valid URL or directory path.

Convert mission from the NCEI server (with p*nc files)

[10]:
# Specify the server where data are located
server = "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/015/20040924/"

# Load and concatenate all datasets from the server, optionally specifying the range of profiles to load
list_datasets = readers.read_basestation(server, start_profile=500, end_profile=503)

# Convert the list of datasets to OG1
ds_OG1 = convertOG1.convert_to_OG1(list_datasets)
Variable 'vert_speed_gsm' not in OG1 vocabulary.
Variable 'time' not in OG1 vocabulary.
Variable 'speed_gsm' not in OG1 vocabulary.
Variable 'sound_velocity' not in OG1 vocabulary.
Variable 'north_displacement_gsm' not in OG1 vocabulary.
Variable 'longitude_gsm' not in OG1 vocabulary.
Variable 'latitude_gsm' not in OG1 vocabulary.
Variable 'horz_speed_gsm' not in OG1 vocabulary.
Variable 'glide_angle_gsm' not in OG1 vocabulary.
Variable 'eng_wlbb2f_VFtemp' not in OG1 vocabulary.
Variable 'eng_sbect_tempFreq' not in OG1 vocabulary.
Variable 'eng_sbect_condFreq' not in OG1 vocabulary.
Variable 'eng_elaps_t_0000' not in OG1 vocabulary.
Variable 'eng_elaps_t' not in OG1 vocabulary.
Variable 'eng_depth' not in OG1 vocabulary.
Variable 'east_displacement_gsm' not in OG1 vocabulary.
Variable 'depth' not in OG1 vocabulary.
Variable 'density' not in OG1 vocabulary.
Variable 'buoyancy' not in OG1 vocabulary.
/home/runner/work/seagliderOG1/seagliderOG1/seagliderOG1/tools.py:359: FutureWarning: The return type of `Dataset.dims` will be changed to return a set of dimension names in future, in order to be more consistent with `DataArray.dims`. To access a mapping from dimension names to lengths, please use `Dataset.sizes`.
  return ds.assign(divenum=('N_MEASUREMENTS', [dive_number] * ds.dims['N_MEASUREMENTS']))
/home/runner/work/seagliderOG1/seagliderOG1/seagliderOG1/tools.py:178: FutureWarning: The return type of `Dataset.dims` will be changed to return a set of dimension names in future, in order to be more consistent with `DataArray.dims`. To access a mapping from dimension names to lengths, please use `Dataset.sizes`.
  ds['dive_num_cast'] = (['N_MEASUREMENTS'], np.full(ds.dims['N_MEASUREMENTS'], np.nan))
/home/runner/work/seagliderOG1/seagliderOG1/seagliderOG1/tools.py:239: FutureWarning: The return type of `Dataset.dims` will be changed to return a set of dimension names in future, in order to be more consistent with `DataArray.dims`. To access a mapping from dimension names to lengths, please use `Dataset.sizes`.
  ds['PHASE'] = (['N_MEASUREMENTS'], np.full(ds.dims['N_MEASUREMENTS'], np.nan))
/home/runner/work/seagliderOG1/seagliderOG1/seagliderOG1/tools.py:241: FutureWarning: The return type of `Dataset.dims` will be changed to return a set of dimension names in future, in order to be more consistent with `DataArray.dims`. To access a mapping from dimension names to lengths, please use `Dataset.sizes`.
  ds['PHASE_QC'] = (['N_MEASUREMENTS'], np.zeros(ds.dims['N_MEASUREMENTS'], dtype=int))
/home/runner/work/seagliderOG1/seagliderOG1/seagliderOG1/tools.py:292: FutureWarning: The return type of `Dataset.dims` will be changed to return a set of dimension names in future, in order to be more consistent with `DataArray.dims`. To access a mapping from dimension names to lengths, please use `Dataset.sizes`.
  ds['DEPTH_Z'] = (['N_MEASUREMENTS'], np.full(ds.dims['N_MEASUREMENTS'], np.nan))
No conversion information found for cm s-1 to degrees
No conversion information found for micromoles/kg to percent
wlbb2f
Adding sensor: SENSOR_FLUOROMETERS
sbe41
sbe41
SBE t12/c12 calibration 30DEC03
30DEC03
Unknown
Variable 'vert_speed_gsm' not in OG1 vocabulary.
Variable 'time' not in OG1 vocabulary.
Variable 'speed_gsm' not in OG1 vocabulary.
Variable 'sound_velocity' not in OG1 vocabulary.
Variable 'north_displacement_gsm' not in OG1 vocabulary.
Variable 'longitude_gsm' not in OG1 vocabulary.
Variable 'latitude_gsm' not in OG1 vocabulary.
Variable 'horz_speed_gsm' not in OG1 vocabulary.
Variable 'glide_angle_gsm' not in OG1 vocabulary.
Variable 'eng_wlbb2f_VFtemp' not in OG1 vocabulary.
Variable 'eng_sbect_tempFreq' not in OG1 vocabulary.
Variable 'eng_sbect_condFreq' not in OG1 vocabulary.
Variable 'eng_elaps_t_0000' not in OG1 vocabulary.
Variable 'eng_elaps_t' not in OG1 vocabulary.
Variable 'eng_depth' not in OG1 vocabulary.
Variable 'east_displacement_gsm' not in OG1 vocabulary.
Variable 'depth' not in OG1 vocabulary.
Variable 'density' not in OG1 vocabulary.
Variable 'buoyancy' not in OG1 vocabulary.
/home/runner/work/seagliderOG1/seagliderOG1/seagliderOG1/tools.py:359: FutureWarning: The return type of `Dataset.dims` will be changed to return a set of dimension names in future, in order to be more consistent with `DataArray.dims`. To access a mapping from dimension names to lengths, please use `Dataset.sizes`.
  return ds.assign(divenum=('N_MEASUREMENTS', [dive_number] * ds.dims['N_MEASUREMENTS']))
/home/runner/work/seagliderOG1/seagliderOG1/seagliderOG1/tools.py:178: FutureWarning: The return type of `Dataset.dims` will be changed to return a set of dimension names in future, in order to be more consistent with `DataArray.dims`. To access a mapping from dimension names to lengths, please use `Dataset.sizes`.
  ds['dive_num_cast'] = (['N_MEASUREMENTS'], np.full(ds.dims['N_MEASUREMENTS'], np.nan))
/home/runner/work/seagliderOG1/seagliderOG1/seagliderOG1/tools.py:239: FutureWarning: The return type of `Dataset.dims` will be changed to return a set of dimension names in future, in order to be more consistent with `DataArray.dims`. To access a mapping from dimension names to lengths, please use `Dataset.sizes`.
  ds['PHASE'] = (['N_MEASUREMENTS'], np.full(ds.dims['N_MEASUREMENTS'], np.nan))
/home/runner/work/seagliderOG1/seagliderOG1/seagliderOG1/tools.py:241: FutureWarning: The return type of `Dataset.dims` will be changed to return a set of dimension names in future, in order to be more consistent with `DataArray.dims`. To access a mapping from dimension names to lengths, please use `Dataset.sizes`.
  ds['PHASE_QC'] = (['N_MEASUREMENTS'], np.zeros(ds.dims['N_MEASUREMENTS'], dtype=int))
/home/runner/work/seagliderOG1/seagliderOG1/seagliderOG1/tools.py:292: FutureWarning: The return type of `Dataset.dims` will be changed to return a set of dimension names in future, in order to be more consistent with `DataArray.dims`. To access a mapping from dimension names to lengths, please use `Dataset.sizes`.
  ds['DEPTH_Z'] = (['N_MEASUREMENTS'], np.full(ds.dims['N_MEASUREMENTS'], np.nan))
No conversion information found for cm s-1 to degrees
No conversion information found for micromoles/kg to percent
wlbb2f
Adding sensor: SENSOR_FLUOROMETERS
sbe41
sbe41
SBE t12/c12 calibration 30DEC03
30DEC03
Unknown
Variable 'vert_speed_gsm' not in OG1 vocabulary.
Variable 'time' not in OG1 vocabulary.
Variable 'speed_gsm' not in OG1 vocabulary.
Variable 'sound_velocity' not in OG1 vocabulary.
Variable 'north_displacement_gsm' not in OG1 vocabulary.
Variable 'longitude_gsm' not in OG1 vocabulary.
Variable 'latitude_gsm' not in OG1 vocabulary.
Variable 'horz_speed_gsm' not in OG1 vocabulary.
Variable 'glide_angle_gsm' not in OG1 vocabulary.
Variable 'eng_wlbb2f_VFtemp' not in OG1 vocabulary.
Variable 'eng_sbect_tempFreq' not in OG1 vocabulary.
Variable 'eng_sbect_condFreq' not in OG1 vocabulary.
Variable 'eng_elaps_t_0000' not in OG1 vocabulary.
Variable 'eng_elaps_t' not in OG1 vocabulary.
Variable 'eng_depth' not in OG1 vocabulary.
Variable 'east_displacement_gsm' not in OG1 vocabulary.
Variable 'depth' not in OG1 vocabulary.
Variable 'density' not in OG1 vocabulary.
Variable 'buoyancy' not in OG1 vocabulary.
/home/runner/work/seagliderOG1/seagliderOG1/seagliderOG1/tools.py:359: FutureWarning: The return type of `Dataset.dims` will be changed to return a set of dimension names in future, in order to be more consistent with `DataArray.dims`. To access a mapping from dimension names to lengths, please use `Dataset.sizes`.
  return ds.assign(divenum=('N_MEASUREMENTS', [dive_number] * ds.dims['N_MEASUREMENTS']))
/home/runner/work/seagliderOG1/seagliderOG1/seagliderOG1/tools.py:178: FutureWarning: The return type of `Dataset.dims` will be changed to return a set of dimension names in future, in order to be more consistent with `DataArray.dims`. To access a mapping from dimension names to lengths, please use `Dataset.sizes`.
  ds['dive_num_cast'] = (['N_MEASUREMENTS'], np.full(ds.dims['N_MEASUREMENTS'], np.nan))
/home/runner/work/seagliderOG1/seagliderOG1/seagliderOG1/tools.py:239: FutureWarning: The return type of `Dataset.dims` will be changed to return a set of dimension names in future, in order to be more consistent with `DataArray.dims`. To access a mapping from dimension names to lengths, please use `Dataset.sizes`.
  ds['PHASE'] = (['N_MEASUREMENTS'], np.full(ds.dims['N_MEASUREMENTS'], np.nan))
/home/runner/work/seagliderOG1/seagliderOG1/seagliderOG1/tools.py:241: FutureWarning: The return type of `Dataset.dims` will be changed to return a set of dimension names in future, in order to be more consistent with `DataArray.dims`. To access a mapping from dimension names to lengths, please use `Dataset.sizes`.
  ds['PHASE_QC'] = (['N_MEASUREMENTS'], np.zeros(ds.dims['N_MEASUREMENTS'], dtype=int))
/home/runner/work/seagliderOG1/seagliderOG1/seagliderOG1/tools.py:292: FutureWarning: The return type of `Dataset.dims` will be changed to return a set of dimension names in future, in order to be more consistent with `DataArray.dims`. To access a mapping from dimension names to lengths, please use `Dataset.sizes`.
  ds['DEPTH_Z'] = (['N_MEASUREMENTS'], np.full(ds.dims['N_MEASUREMENTS'], np.nan))
No conversion information found for cm s-1 to degrees
No conversion information found for micromoles/kg to percent
wlbb2f
Adding sensor: SENSOR_FLUOROMETERS
sbe41
sbe41
SBE t12/c12 calibration 30DEC03
30DEC03
Unknown
Variable 'vert_speed_gsm' not in OG1 vocabulary.
Variable 'time' not in OG1 vocabulary.
Variable 'speed_gsm' not in OG1 vocabulary.
Variable 'sound_velocity' not in OG1 vocabulary.
Variable 'north_displacement_gsm' not in OG1 vocabulary.
Variable 'longitude_gsm' not in OG1 vocabulary.
Variable 'latitude_gsm' not in OG1 vocabulary.
Variable 'horz_speed_gsm' not in OG1 vocabulary.
Variable 'glide_angle_gsm' not in OG1 vocabulary.
Variable 'eng_wlbb2f_VFtemp' not in OG1 vocabulary.
Variable 'eng_sbect_tempFreq' not in OG1 vocabulary.
Variable 'eng_sbect_condFreq' not in OG1 vocabulary.
Variable 'eng_elaps_t_0000' not in OG1 vocabulary.
Variable 'eng_elaps_t' not in OG1 vocabulary.
Variable 'eng_depth' not in OG1 vocabulary.
Variable 'east_displacement_gsm' not in OG1 vocabulary.
Variable 'depth' not in OG1 vocabulary.
Variable 'density' not in OG1 vocabulary.
Variable 'buoyancy' not in OG1 vocabulary.
No conversion information found for cm s-1 to degrees
No conversion information found for micromoles/kg to percent
wlbb2f
Adding sensor: SENSOR_FLUOROMETERS
sbe41
sbe41
SBE t12/c12 calibration 30DEC03
30DEC03
Unknown
/home/runner/work/seagliderOG1/seagliderOG1/seagliderOG1/tools.py:359: FutureWarning: The return type of `Dataset.dims` will be changed to return a set of dimension names in future, in order to be more consistent with `DataArray.dims`. To access a mapping from dimension names to lengths, please use `Dataset.sizes`.
  return ds.assign(divenum=('N_MEASUREMENTS', [dive_number] * ds.dims['N_MEASUREMENTS']))
/home/runner/work/seagliderOG1/seagliderOG1/seagliderOG1/tools.py:178: FutureWarning: The return type of `Dataset.dims` will be changed to return a set of dimension names in future, in order to be more consistent with `DataArray.dims`. To access a mapping from dimension names to lengths, please use `Dataset.sizes`.
  ds['dive_num_cast'] = (['N_MEASUREMENTS'], np.full(ds.dims['N_MEASUREMENTS'], np.nan))
/home/runner/work/seagliderOG1/seagliderOG1/seagliderOG1/tools.py:239: FutureWarning: The return type of `Dataset.dims` will be changed to return a set of dimension names in future, in order to be more consistent with `DataArray.dims`. To access a mapping from dimension names to lengths, please use `Dataset.sizes`.
  ds['PHASE'] = (['N_MEASUREMENTS'], np.full(ds.dims['N_MEASUREMENTS'], np.nan))
/home/runner/work/seagliderOG1/seagliderOG1/seagliderOG1/tools.py:241: FutureWarning: The return type of `Dataset.dims` will be changed to return a set of dimension names in future, in order to be more consistent with `DataArray.dims`. To access a mapping from dimension names to lengths, please use `Dataset.sizes`.
  ds['PHASE_QC'] = (['N_MEASUREMENTS'], np.zeros(ds.dims['N_MEASUREMENTS'], dtype=int))
/home/runner/work/seagliderOG1/seagliderOG1/seagliderOG1/tools.py:292: FutureWarning: The return type of `Dataset.dims` will be changed to return a set of dimension names in future, in order to be more consistent with `DataArray.dims`. To access a mapping from dimension names to lengths, please use `Dataset.sizes`.
  ds['DEPTH_Z'] = (['N_MEASUREMENTS'], np.full(ds.dims['N_MEASUREMENTS'], np.nan))

Saving data

Due to problems with writing xarray datasets as netCDF when attributes are not of a specified type (str, Number, np.ndarray, np.number, list, tuple), a function was written save_dataset.

[11]:
# Write the file
# This writer catches errors in data types (DType errors) when using xr.to_netcdf()
# The solution is to convert them to strings, which may be undesired behaviour
output_file = os.path.join(data_path, 'demo_test.nc')
if os.path.exists(output_file):
    os.remove(output_file)

writers.save_dataset(ds_OG1, output_file);
TypeError Invalid value for attr 'calibration_parameters': {'t_g': 0.00431040083, 't_h': 0.000631750336, 't_i': 2.27137914e-05, 't_j': 2.20168797e-06, 'c_g': -10.2690994, 'c_h': 1.1547191, 'c_i': -0.00143967206, 'c_j': 0.000204972837, 'cpcor': -9.57e-08, 'ctcor': 3.25e-06}. For serialization to netCDF files, its value must be of one of the following types: str, Number, ndarray, number, list, tuple, bytes
variable 'SENSOR_CTD_UNKNOWN': Converting attribute 'calibration_parameters' with value '{'t_g': 0.00431040083, 't_h': 0.000631750336, 't_i': 2.27137914e-05, 't_j': 2.20168797e-06, 'c_g': -10.2690994, 'c_h': 1.1547191, 'c_i': -0.00143967206, 'c_j': 0.000204972837, 'cpcor': -9.57e-08, 'ctcor': 3.25e-06}' to string.
[12]:
# Load the data saved
ds1 = xr.open_dataset(output_file)

# Generate a simple plot
#plotters.show_contents(ds_all,'attrs')
plotters.plot_depth_colored(ds1, color_by='PROFILE_NUMBER')

_images/demo-output_21_0.png

Run multiple missions

[13]:
# Add these to existing attributes - update to your details
contrib_to_append = vocabularies.contrib_to_append
print(contrib_to_append)
{'contributor_name': 'Eleanor Frajka-Williams', 'contributor_email': 'eleanorfrajka@gmail.com', 'contributor_role': 'Data scientist', 'contributor_role_vocabulary': 'http://vocab.nerc.ac.uk/search_nvs/W08', 'contributing_institutions': 'University of Hamburg - Institute of Oceanography', 'contributing_institutions_vocabulary': 'https://edmo.seadatanet.org/report/1156', 'contributing_institutions_role': 'Data scientist', 'contributing_institutions_role_vocabulary': 'http://vocab.nerc.ac.uk/search_nvs/W08'}
[14]:
# Specify a list of servers or local directories
input_locations = [
    # Either Iceland, Faroes or RAPID/MOCHA
    #"https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/005/20090829/", # done
    #"https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/005/20080606/", # done
    #"https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/005/20081106/", # done
    #"https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/012/20070831/", # done
    #"https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/014/20080214/",  # done
    #"https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/014/20080222/", # done
    #"https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/016/20061112/",  # done
    #"https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/016/20090605/", # done
    #"https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/016/20071113/", # done
    #"https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/016/20080607/",  # done
    #"https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/033/20100518/", # done
    #"https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/033/20100903/", # done
    #"https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/101/20081108/",     # done
    #"https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/101/20061112/",    # done
    #"https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/101/20070609/",   # done
    #"https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/102/20061112/",  # done
    # Labrador Sea
    #"https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/015/20040924/",
    "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/014/20040924/",
    #"https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/008/20031002/",
    #"https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/004/20031002/",
    #"https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/016/20050406/",
    # RAPID/MOCHA
    #"https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/033/20100729/",
    #"https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/034/20110128/",
]

for input_loc in input_locations:
    # Example usage
    ds_all = convertOG1.process_and_save_data(input_loc, output_dir=data_path, save=True,  run_quietly=True)
Downloading file 'p0140001_20040924.nc' from 'https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/015/20040924/p0140001_20040924.nc' to '/home/runner/.cache/seagliderOG1'.
---------------------------------------------------------------------------
HTTPError                                 Traceback (most recent call last)
Cell In[14], line 33
      2 input_locations = [
      3     # Either Iceland, Faroes or RAPID/MOCHA
      4     #"https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/005/20090829/", # done
   (...)
     28     #"https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/034/20110128/",
     29 ]
     31 for input_loc in input_locations:
     32     # Example usage
---> 33     ds_all = convertOG1.process_and_save_data(input_loc, output_dir=data_path, save=True,  run_quietly=True)

File ~/work/seagliderOG1/seagliderOG1/seagliderOG1/convertOG1.py:608, in process_and_save_data(input_location, save, output_dir, run_quietly)
    592 """
    593 Processes and saves data from the specified input location.
    594 This function loads and concatenates datasets from the server, converts them to OG1 format,
   (...)
    604 xarray.Dataset: The processed dataset.
    605 """
    607 # Load and concatenate all datasets from the server
--> 608 list_datasets = readers.read_basestation(input_location)
    610 # Convert the list of datasets to OG1
    611 ds1 = convert_to_OG1(list_datasets[-1])

File ~/work/seagliderOG1/seagliderOG1/seagliderOG1/readers.py:109, in read_basestation(source, start_profile, end_profile)
    107 for file in filtered_files:
    108     if source.startswith("http://") or source.startswith("https://"):
--> 109         ds = load_sample_dataset(file)
    110     else:
    111         ds = xr.open_dataset(os.path.join(source, file))

File ~/work/seagliderOG1/seagliderOG1/seagliderOG1/readers.py:37, in load_sample_dataset(dataset_name)
     35 def load_sample_dataset(dataset_name="p0040034_20031007.nc"):
     36     if dataset_name in data_source_og.registry.keys():
---> 37         file_path = data_source_og.fetch(dataset_name)
     38         return xr.open_dataset(file_path)
     39     else:

File ~/micromamba/envs/TEST/lib/python3.12/site-packages/pooch/core.py:589, in Pooch.fetch(self, fname, processor, downloader, progressbar)
    586     if downloader is None:
    587         downloader = choose_downloader(url, progressbar=progressbar)
--> 589     stream_download(
    590         url,
    591         full_path,
    592         known_hash,
    593         downloader,
    594         pooch=self,
    595         retry_if_failed=self.retry_if_failed,
    596     )
    598 if processor is not None:
    599     return processor(str(full_path), action, self)

File ~/micromamba/envs/TEST/lib/python3.12/site-packages/pooch/core.py:807, in stream_download(url, fname, known_hash, downloader, pooch, retry_if_failed)
    803 try:
    804     # Stream the file to a temporary so that we can safely check its
    805     # hash before overwriting the original.
    806     with temporary_file(path=str(fname.parent)) as tmp:
--> 807         downloader(url, tmp, pooch)
    808         hash_matches(tmp, known_hash, strict=True, source=str(fname.name))
    809         shutil.move(tmp, str(fname))

File ~/micromamba/envs/TEST/lib/python3.12/site-packages/pooch/downloaders.py:221, in HTTPDownloader.__call__(self, url, output_file, pooch, check_only)
    219 try:
    220     response = requests.get(url, timeout=timeout, **kwargs)
--> 221     response.raise_for_status()
    222     content = response.iter_content(chunk_size=self.chunk_size)
    223     total = int(response.headers.get("content-length", 0))

File ~/micromamba/envs/TEST/lib/python3.12/site-packages/requests/models.py:1024, in Response.raise_for_status(self)
   1019     http_error_msg = (
   1020         f"{self.status_code} Server Error: {reason} for url: {self.url}"
   1021     )
   1023 if http_error_msg:
-> 1024     raise HTTPError(http_error_msg, response=self)

HTTPError: 404 Client Error: Not Found for url: https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/015/20040924/p0140001_20040924.nc
[ ]: