seagliderOG1 demo
The purpose of this notebook is to demonstrate the functionality of seagliderOG1
to convert from Seaglider basestation files to OG1 format.
OG1 format is a newly agreed format (since June 2024) for glider data sets from various platforms (e.g., Seaglider, Slocum, Seaexplorer). It lives on github here: (
OG1 manual:
The test case is to convert sg015 data from the Labrador Sea in September 2004.
The demo is organised to show
Conversion of a single dive cycle (single
file)Conversion for a folder of local dive-cycle files (full mission of
files)Download from remote server + conversion (directory with full mission of
Options are provided to only load e.g. 10 files, but note that OG1 format expects a full mission.
import pathlib
import sys
script_dir = pathlib.Path().parent.absolute()
parent_dir = script_dir.parents[0]
sys.path.append(str(parent_dir) + '/seagliderOG1')
import xarray as xr
import os
import pooch
from seagliderOG1 import readers, writers, plotters, tools
from seagliderOG1 import convertOG1, vocabularies
['/home/runner/micromamba/envs/TEST/lib/', '/home/runner/micromamba/envs/TEST/lib/python3.12', '/home/runner/micromamba/envs/TEST/lib/python3.12/lib-dynload', '', '/home/runner/micromamba/envs/TEST/lib/python3.12/site-packages', '/home/runner/work/seagliderOG1/seagliderOG1', '/home/runner/work/seagliderOG1/seagliderOG1/seagliderOG1']
/home/runner/work/seagliderOG1/seagliderOG1/seagliderOG1/ SyntaxWarning: invalid escape sequence '\d'
fill_val = 2 ** (int(re.findall("\d+", str(new_dtype))[0]) - 1) - 1
[ ]:
# Specify the path for writing datafiles
data_path = os.path.join(parent_dir, 'data')
Reading basestation files
This has three ways to load a glider dataset.
Load an example dataset using seagliderOG1.fetchers.load_sample_dataset
Alternatively, use your own with e.g. ds = xr.open_dataset('/path/to/')
Load single sample dataset
ds = readers.load_sample_dataset()
Downloading file '' from '' to '/home/runner/.cache/seagliderOG1'.
/home/runner/work/seagliderOG1/seagliderOG1/seagliderOG1/ FutureWarning: In a future version of xarray decode_timedelta will default to False rather than None. To silence this warning, set decode_timedelta to True, False, or a 'CFTimedeltaCoder' instance.
return xr.open_dataset(file_path)
/home/runner/work/seagliderOG1/seagliderOG1/seagliderOG1/ FutureWarning: In a future version of xarray decode_timedelta will default to False rather than None. To silence this warning, set decode_timedelta to True, False, or a 'CFTimedeltaCoder' instance.
return xr.open_dataset(file_path)
/home/runner/work/seagliderOG1/seagliderOG1/seagliderOG1/ FutureWarning: In a future version of xarray decode_timedelta will default to False rather than None. To silence this warning, set decode_timedelta to True, False, or a 'CFTimedeltaCoder' instance.
return xr.open_dataset(file_path)
/home/runner/work/seagliderOG1/seagliderOG1/seagliderOG1/ FutureWarning: In a future version of xarray decode_timedelta will default to False rather than None. To silence this warning, set decode_timedelta to True, False, or a 'CFTimedeltaCoder' instance.
return xr.open_dataset(file_path)
/home/runner/work/seagliderOG1/seagliderOG1/seagliderOG1/ FutureWarning: In a future version of xarray decode_timedelta will default to False rather than None. To silence this warning, set decode_timedelta to True, False, or a 'CFTimedeltaCoder' instance.
return xr.open_dataset(file_path)
<xarray.Dataset> Size: 290kB Dimensions: (sg_data_point: 586, gc_event: 36, gc_state: 10, gps_info: 3, trajectory: 1) Coordinates: ctd_time (sg_data_point) datetime64[ns] 5kB ... longitude (sg_data_point) float64 5kB ... latitude (sg_data_point) float64 5kB ... ctd_depth (sg_data_point) float64 5kB ... * trajectory (trajectory) int32 4B 15 Dimensions without coordinates: sg_data_point, gc_event, gc_state, gps_info Data variables: (12/397) salinity_qc (sg_data_point) |S1 586B ... eng_depth (sg_data_point) float64 5kB ... eng_aa4330_Temp (sg_data_point) float64 5kB ... eng_vbdCC (sg_data_point) float64 5kB ... north_displacement (sg_data_point) float64 5kB ... east_displacement (sg_data_point) float64 5kB ... ... ... sg_cal_optode_FoilCoefB0 float64 8B ... log_MISSION int32 4B ... log_FG_AHR_24V float64 8B ... log_ICE_FREEZE_MARGIN float64 8B ... surface_curr_north float64 8B ... log_RAFOS_DEVICE float64 8B ... Attributes: (12/59) quality_control_version: 1.12 base_station_micro_version: 4935 time_coverage_resolution: PT1S geospatial_vertical_max: 511.8935394557861 sea_name: Caribbean Sea mission: 1 ... ... source: Deepglider SG033 geospatial_vertical_min: 0.2601223952664329 creator_url: geospatial_vertical_units: meter dive_number: 15 disclaimer: Data has not been reviewed and is provid...
Load datasets from a local directory
# Specify the input directory on your local machine
input_dir = "/Users/eddifying/Dropbox/data/sg015-ncei-download"
# Load and concatenate all datasets in the input directory
# Optionally, specify the range of profiles to load (start_profile, end_profile)
list_datasets = readers.read_basestation(input_dir, start_profile=500, end_profile=503)
# Where list_datasets is a list of xarray datasets. A single dataset can be accessed as
ds = list_datasets[0]
AttributeError Traceback (most recent call last)
Cell In[4], line 6
2 input_dir = "/Users/eddifying/Dropbox/data/sg015-ncei-download"
4 # Load and concatenate all datasets in the input directory
5 # Optionally, specify the range of profiles to load (start_profile, end_profile)
----> 6 list_datasets = readers.read_basestation(input_dir, start_profile=500, end_profile=503)
8 # Where list_datasets is a list of xarray datasets. A single dataset can be accessed as
9 ds = list_datasets[0]
AttributeError: module 'seagliderOG1.readers' has no attribute 'read_basestation'
<xarray.Dataset> Size: 290kB Dimensions: (sg_data_point: 586, gc_event: 36, gc_state: 10, gps_info: 3, trajectory: 1) Coordinates: ctd_time (sg_data_point) datetime64[ns] 5kB ... longitude (sg_data_point) float64 5kB ... latitude (sg_data_point) float64 5kB ... ctd_depth (sg_data_point) float64 5kB ... * trajectory (trajectory) int32 4B 15 Dimensions without coordinates: sg_data_point, gc_event, gc_state, gps_info Data variables: (12/397) salinity_qc (sg_data_point) |S1 586B ... eng_depth (sg_data_point) float64 5kB ... eng_aa4330_Temp (sg_data_point) float64 5kB ... eng_vbdCC (sg_data_point) float64 5kB ... north_displacement (sg_data_point) float64 5kB ... east_displacement (sg_data_point) float64 5kB ... ... ... sg_cal_optode_FoilCoefB0 float64 8B ... log_MISSION int32 4B ... log_FG_AHR_24V float64 8B ... log_ICE_FREEZE_MARGIN float64 8B ... surface_curr_north float64 8B ... log_RAFOS_DEVICE float64 8B ... Attributes: (12/59) quality_control_version: 1.12 base_station_micro_version: 4935 time_coverage_resolution: PT1S geospatial_vertical_max: 511.8935394557861 sea_name: Caribbean Sea mission: 1 ... ... source: Deepglider SG033 geospatial_vertical_min: 0.2601223952664329 creator_url: geospatial_vertical_units: meter dive_number: 15 disclaimer: Data has not been reviewed and is provid...
Load datasets from a remote directory (URL)
# Specify the server where data are located
server = ""
# Load and concatenate all datasets from the server, optionally specifying the range of profiles to load
list_datasets = readers.read_basestation(server, start_profile=500, end_profile=503)
AttributeError Traceback (most recent call last)
Cell In[6], line 5
2 server = ""
4 # Load and concatenate all datasets from the server, optionally specifying the range of profiles to load
----> 5 list_datasets = readers.read_basestation(server, start_profile=500, end_profile=503)
AttributeError: module 'seagliderOG1.readers' has no attribute 'read_basestation'
Convert to OG1 format
For one basestation dataset, split the dataset by dimension (
)Transform into OG1 format: dataset with dims
Change the dimension to
Rename variables according to
Assign variable attributes according to
. (Note: This could go wrong since it makes assumptions about the input variables. May need additional handling.)
Add missing mandatory variables:
(Note: presentlyTIME_GPS
is stripped before saving, butTIME
values containTIME_GPS
which is positive up
Update attributes for the file.
from original attributes intocontributor
based oninstitution
Reformats time in
according to OceanGliders formatRetains
as OG attributesRetains extra attributes:
Future behaviour to be added:
Retain the variables starting with
and check whether they vary over the mission (shouldn’t)Add sensors, using information in the
with no dimensionsNeed (from sg_cal_constants:
)Maybe also
(which will change with position),log_D_FLARE
also with_gsm
also with_gsm
- means sensorsg_cal_mission_title
means ??hdm_qc
Convert a single (sample) dataset
# Loads one dataset (
ds = readers.load_sample_dataset()
ds_OG1 = convertOG1.convert_to_OG1(ds)
# Check the results - uncomment the following lines to either generate a plot or show the variables.
/home/runner/work/seagliderOG1/seagliderOG1/seagliderOG1/ FutureWarning: In a future version of xarray decode_timedelta will default to False rather than None. To silence this warning, set decode_timedelta to True, False, or a 'CFTimedeltaCoder' instance.
return xr.open_dataset(file_path)
/home/runner/work/seagliderOG1/seagliderOG1/seagliderOG1/ FutureWarning: In a future version of xarray decode_timedelta will default to False rather than None. To silence this warning, set decode_timedelta to True, False, or a 'CFTimedeltaCoder' instance.
return xr.open_dataset(file_path)
/home/runner/work/seagliderOG1/seagliderOG1/seagliderOG1/ FutureWarning: In a future version of xarray decode_timedelta will default to False rather than None. To silence this warning, set decode_timedelta to True, False, or a 'CFTimedeltaCoder' instance.
return xr.open_dataset(file_path)
/home/runner/work/seagliderOG1/seagliderOG1/seagliderOG1/ FutureWarning: In a future version of xarray decode_timedelta will default to False rather than None. To silence this warning, set decode_timedelta to True, False, or a 'CFTimedeltaCoder' instance.
return xr.open_dataset(file_path)
/home/runner/work/seagliderOG1/seagliderOG1/seagliderOG1/ FutureWarning: In a future version of xarray decode_timedelta will default to False rather than None. To silence this warning, set decode_timedelta to True, False, or a 'CFTimedeltaCoder' instance.
return xr.open_dataset(file_path)
NameError Traceback (most recent call last)
Cell In[7], line 4
1 # Loads one dataset (
2 ds = readers.load_sample_dataset()
----> 4 ds_OG1 = convertOG1.convert_to_OG1(ds)
6 # Check the results - uncomment the following lines to either generate a plot or show the variables.
7 plotters.plot_profile_depth(ds_OG1)
File ~/work/seagliderOG1/seagliderOG1/seagliderOG1/, in convert_to_OG1(list_of_datasets, contrib_to_append)
35 for ds1_base in list_of_datasets:
36 varlist = list(set(varlist + list(ds1_base.variables)))
---> 37 ds_new, attr_warnings, sg_cal, dc_other, dc_log = process_dataset(ds1_base, firstrun)
38 if ds_new:
39 processed_datasets.append(ds_new)
File ~/work/seagliderOG1/seagliderOG1/seagliderOG1/, in process_dataset(ds1_base, firstrun)
172 divenum = ds1_base.attrs['dive_number']
173 # Split the dataset by unique dimensions
--> 174 split_ds = tools.split_by_unique_dims(ds1)
175 ds_sgdatapoint = split_ds[('sg_data_point',)]
176 # Extract the gps_info from the split dataset
NameError: name 'ds1' is not defined
# Print to screen a table of the variables and variable attributes
NameError Traceback (most recent call last)
Cell In[8], line 3
1 # Print to screen a table of the variables and variable attributes
2 #plotters.show_attributes(ds_OG1)
----> 3 ds_OG1
NameError: name 'ds_OG1' is not defined
Convert mission from a local directory of basestation files
For local data in the directory
Creates a plot of ctd_depth against ctd_time.
# Specify the input directory on your local machine
input_dir = "/Users/eddifying/Dropbox/data/sg015-ncei-download"
# Load and concatenate all datasets in the input directory
# Optionally, specify the range of profiles to load (start_profile, end_profile)
list_datasets = readers.read_basestation(input_dir, start_profile=500, end_profile=503)
# Convert the list of datasets to OG1
ds_OG1 = convertOG1.convert_to_OG1(list_datasets)
# Generate a simple plot
AttributeError Traceback (most recent call last)
Cell In[9], line 6
2 input_dir = "/Users/eddifying/Dropbox/data/sg015-ncei-download"
4 # Load and concatenate all datasets in the input directory
5 # Optionally, specify the range of profiles to load (start_profile, end_profile)
----> 6 list_datasets = readers.read_basestation(input_dir, start_profile=500, end_profile=503)
8 # Convert the list of datasets to OG1
9 ds_OG1 = convertOG1.convert_to_OG1(list_datasets)
AttributeError: module 'seagliderOG1.readers' has no attribute 'read_basestation'
Convert mission from the NCEI server (with p*nc files)
Data from the sg015 mission in the Labrador Sea (, dataset identifier gov.noaa.nodc:0111844.
# Specify the server where data are located
server = ""
# Load and concatenate all datasets from the server, optionally specifying the range of profiles to load
list_datasets = readers.read_basestation(server, start_profile=500, end_profile=503)
# Convert the list of datasets to OG1
ds_OG1 = convertOG1.convert_to_OG1(list_datasets)
AttributeError Traceback (most recent call last)
Cell In[10], line 5
2 server = ""
4 # Load and concatenate all datasets from the server, optionally specifying the range of profiles to load
----> 5 list_datasets = readers.read_basestation(server, start_profile=500, end_profile=503)
7 # Convert the list of datasets to OG1
8 ds_OG1 = convertOG1.convert_to_OG1(list_datasets)
AttributeError: module 'seagliderOG1.readers' has no attribute 'read_basestation'
Saving data
Due to problems with writing xarray
datasets as netCDF when attributes are not of a specified type (str
, Number
, np.ndarray
, np.number
, list
, tuple
), a function was written save_dataset
# Write the file
# This writer catches errors in data types (DType errors) when using xr.to_netcdf()
# The solution is to convert them to strings, which may be undesired behaviour
output_file = os.path.join(data_path, '')
if os.path.exists(output_file):
writers.save_dataset(ds_OG1, output_file);
NameError Traceback (most recent call last)
Cell In[11], line 8
5 if os.path.exists(output_file):
6 os.remove(output_file)
----> 8 writers.save_dataset(ds_OG1, output_file);
NameError: name 'ds_OG1' is not defined
# Load the data saved
ds1 = xr.open_dataset(output_file)
# Generate a simple plot
plotters.plot_depth_colored(ds1, color_by='PROFILE_NUMBER')
KeyError Traceback (most recent call last)
File ~/micromamba/envs/TEST/lib/python3.12/site-packages/xarray/backends/, in CachingFileManager._acquire_with_cache_info(self, needs_lock)
210 try:
--> 211 file = self._cache[self._key]
212 except KeyError:
File ~/micromamba/envs/TEST/lib/python3.12/site-packages/xarray/backends/, in LRUCache.__getitem__(self, key)
55 with self._lock:
---> 56 value = self._cache[key]
57 self._cache.move_to_end(key)
KeyError: [<class 'netCDF4._netCDF4.Dataset'>, ('/home/runner/work/seagliderOG1/seagliderOG1/data/',), 'r', (('clobber', True), ('diskless', False), ('format', 'NETCDF4'), ('persist', False)), '2174b3b0-d56a-4819-a507-d7b74ddfa984']
During handling of the above exception, another exception occurred:
FileNotFoundError Traceback (most recent call last)
Cell In[12], line 2
1 # Load the data saved
----> 2 ds1 = xr.open_dataset(output_file)
4 # Generate a simple plot
5 #plotters.show_contents(ds_all,'attrs')
6 plotters.plot_depth_colored(ds1, color_by='PROFILE_NUMBER')
File ~/micromamba/envs/TEST/lib/python3.12/site-packages/xarray/backends/, in open_dataset(filename_or_obj, engine, chunks, cache, decode_cf, mask_and_scale, decode_times, decode_timedelta, use_cftime, concat_characters, decode_coords, drop_variables, inline_array, chunked_array_type, from_array_kwargs, backend_kwargs, **kwargs)
674 decoders = _resolve_decoders_kwargs(
675 decode_cf,
676 open_backend_dataset_parameters=backend.open_dataset_parameters,
682 decode_coords=decode_coords,
683 )
685 overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
--> 686 backend_ds = backend.open_dataset(
687 filename_or_obj,
688 drop_variables=drop_variables,
689 **decoders,
690 **kwargs,
691 )
692 ds = _dataset_from_backend_dataset(
693 backend_ds,
694 filename_or_obj,
704 **kwargs,
705 )
706 return ds
File ~/micromamba/envs/TEST/lib/python3.12/site-packages/xarray/backends/, in NetCDF4BackendEntrypoint.open_dataset(self, filename_or_obj, mask_and_scale, decode_times, concat_characters, decode_coords, drop_variables, use_cftime, decode_timedelta, group, mode, format, clobber, diskless, persist, auto_complex, lock, autoclose)
644 def open_dataset(
645 self,
646 filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore,
663 autoclose=False,
664 ) -> Dataset:
665 filename_or_obj = _normalize_path(filename_or_obj)
--> 666 store =
667 filename_or_obj,
668 mode=mode,
669 format=format,
670 group=group,
671 clobber=clobber,
672 diskless=diskless,
673 persist=persist,
674 auto_complex=auto_complex,
675 lock=lock,
676 autoclose=autoclose,
677 )
679 store_entrypoint = StoreBackendEntrypoint()
680 with close_on_error(store):
File ~/micromamba/envs/TEST/lib/python3.12/site-packages/xarray/backends/, in, filename, mode, format, group, clobber, diskless, persist, auto_complex, lock, lock_maker, autoclose)
448 kwargs["auto_complex"] = auto_complex
449 manager = CachingFileManager(
450 netCDF4.Dataset, filename, mode=mode, kwargs=kwargs
451 )
--> 452 return cls(manager, group=group, mode=mode, lock=lock, autoclose=autoclose)
File ~/micromamba/envs/TEST/lib/python3.12/site-packages/xarray/backends/, in NetCDF4DataStore.__init__(self, manager, group, mode, lock, autoclose)
391 self._group = group
392 self._mode = mode
--> 393 self.format = self.ds.data_model
394 self._filename = self.ds.filepath()
395 self.is_remote = is_remote_uri(self._filename)
File ~/micromamba/envs/TEST/lib/python3.12/site-packages/xarray/backends/, in NetCDF4DataStore.ds(self)
459 @property
460 def ds(self):
--> 461 return self._acquire()
File ~/micromamba/envs/TEST/lib/python3.12/site-packages/xarray/backends/, in NetCDF4DataStore._acquire(self, needs_lock)
454 def _acquire(self, needs_lock=True):
--> 455 with self._manager.acquire_context(needs_lock) as root:
456 ds = _nc4_require_group(root, self._group, self._mode)
457 return ds
File ~/micromamba/envs/TEST/lib/python3.12/, in _GeneratorContextManager.__enter__(self)
135 del self.args, self.kwds, self.func
136 try:
--> 137 return next(self.gen)
138 except StopIteration:
139 raise RuntimeError("generator didn't yield") from None
File ~/micromamba/envs/TEST/lib/python3.12/site-packages/xarray/backends/, in CachingFileManager.acquire_context(self, needs_lock)
196 @contextlib.contextmanager
197 def acquire_context(self, needs_lock=True):
198 """Context manager for acquiring a file."""
--> 199 file, cached = self._acquire_with_cache_info(needs_lock)
200 try:
201 yield file
File ~/micromamba/envs/TEST/lib/python3.12/site-packages/xarray/backends/, in CachingFileManager._acquire_with_cache_info(self, needs_lock)
215 kwargs = kwargs.copy()
216 kwargs["mode"] = self._mode
--> 217 file = self._opener(*self._args, **kwargs)
218 if self._mode == "w":
219 # ensure file doesn't get overridden when opened again
220 self._mode = "a"
File src/netCDF4/_netCDF4.pyx:2521, in netCDF4._netCDF4.Dataset.__init__()
File src/netCDF4/_netCDF4.pyx:2158, in netCDF4._netCDF4._ensure_nc_success()
FileNotFoundError: [Errno 2] No such file or directory: '/home/runner/work/seagliderOG1/seagliderOG1/data/'
Run multiple missions
# Add these to existing attributes - update to your details
contrib_to_append = vocabularies.contrib_to_append
{'contributor_name': 'Eleanor Frajka-Williams', 'contributor_email': '', 'contributor_role': 'Data scientist', 'contributor_role_vocabulary': '', 'contributing_institutions': 'University of Hamburg - Institute of Oceanography', 'contributing_institutions_vocabulary': '', 'contributing_institutions_role': 'Data scientist', 'contributing_institutions_role_vocabulary': ''}
# Specify a list of servers or local directories
input_locations = [
# Either Iceland, Faroes or RAPID/MOCHA
#"", # done
#"", # done
#"", # done
#"", # done
#"", # done
#"", # done
#"", # done
#"", # done
#"", # done
#"", # done
#"", # done
#"", # done
#"", # done
#"", # done
#"", # done
#"", # done
# Labrador Sea
for input_loc in input_locations:
# Example usage
ds_all = convertOG1.process_and_save_data(input_loc, output_dir=data_path, save=True, run_quietly=True)
KeyError Traceback (most recent call last)
Cell In[14], line 33
2 input_locations = [
3 # Either Iceland, Faroes or RAPID/MOCHA
4 #"", # done
28 #"",
29 ]
31 for input_loc in input_locations:
32 # Example usage
---> 33 ds_all = convertOG1.process_and_save_data(input_loc, output_dir=data_path, save=True, run_quietly=True)
File ~/work/seagliderOG1/seagliderOG1/seagliderOG1/, in process_and_save_data(input_location, save, output_dir, run_quietly)
584 """
585 Processes and saves data from the specified input location.
586 This function loads and concatenates datasets from the server, converts them to OG1 format,
596 xarray.Dataset: The processed dataset.
597 """
599 # Load and concatenate all datasets from the server
--> 600 ds1_base = readers.load_first_basestation_file(input_location)
602 # Convert the list of datasets to OG1
603 ds1_og1, varlist = convert_to_OG1(ds1_base)
File ~/work/seagliderOG1/seagliderOG1/seagliderOG1/, in load_first_basestation_file(source)
159 filename = file_list[0]
160 start_profile = _profnum_from_filename(filename)
--> 161 datasets = load_basestation_files(source, start_profile, start_profile)
162 return datasets[0]
File ~/work/seagliderOG1/seagliderOG1/seagliderOG1/, in load_basestation_files(source, start_profile, end_profile)
181 for file in filtered_files:
182 if source.startswith("http://") or source.startswith("https://"):
--> 183 ds = load_sample_dataset(file)
184 else:
185 ds = xr.open_dataset(os.path.join(source, file))
File ~/work/seagliderOG1/seagliderOG1/seagliderOG1/, in load_sample_dataset(dataset_name)
59 else:
60 msg = f"Requested sample dataset {dataset_name} not known. Specify one of the following available datasets: {list(data_source_og.registry.keys())}"
---> 61 raise KeyError(msg)
KeyError: "Requested sample dataset not known. Specify one of the following available datasets: ['', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '']"
[ ]: