seagliderOG1 demo
The purpose of this notebook is to demonstrate the functionality of seagliderOG1
to convert from Seaglider basestation files to OG1 format.
OG1 format is a newly agreed format (since June 2024) for glider data sets from various platforms (e.g., Seaglider, Slocum, Seaexplorer). It lives on github here: (https://github.com/OceanGlidersCommunity/OG-format-user-manual).
OG1 manual: https://oceangliderscommunity.github.io/OG-format-user-manual/OG_Format.html
The test case is to convert sg015 data from the Labrador Sea in September 2004.
The demo is organised to show
Conversion of a single dive cycle (single
p*.nc
file)Conversion for a folder of local dive-cycle files (full mission of
p*.nc
files)Download from remote server + conversion (directory with full mission of
p*.nc
files)
Options are provided to only load e.g. 10 files, but note that OG1 format expects a full mission.
[1]:
import pathlib
import sys
script_dir = pathlib.Path().parent.absolute()
parent_dir = script_dir.parents[0]
sys.path.append(str(parent_dir))
sys.path.append(str(parent_dir) + '/seagliderOG1')
print(parent_dir)
print(sys.path)
import xarray as xr
import os
import pooch
from seagliderOG1 import readers, writers, plotters, tools
from seagliderOG1 import convertOG1, vocabularies
/home/runner/work/seagliderOG1/seagliderOG1
['/home/runner/micromamba/envs/TEST/lib/python312.zip', '/home/runner/micromamba/envs/TEST/lib/python3.12', '/home/runner/micromamba/envs/TEST/lib/python3.12/lib-dynload', '', '/home/runner/micromamba/envs/TEST/lib/python3.12/site-packages', '/home/runner/work/seagliderOG1/seagliderOG1', '/home/runner/work/seagliderOG1/seagliderOG1/seagliderOG1']
/home/runner/work/seagliderOG1/seagliderOG1/seagliderOG1/tools.py:472: SyntaxWarning: invalid escape sequence '\d'
fill_val = 2 ** (int(re.findall("\d+", str(new_dtype))[0]) - 1) - 1
[ ]:
[2]:
# Specify the path for writing datafiles
data_path = os.path.join(parent_dir, 'data')
Reading basestation files
This has three ways to load a glider dataset.
Load an example dataset using seagliderOG1.fetchers.load_sample_dataset
Alternatively, use your own with e.g. ds = xr.open_dataset('/path/to/yourfile.nc')
Load single sample dataset
[3]:
ds = readers.load_sample_dataset()
ds
Downloading file 'p0040034_20031007.nc' from 'https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/015/20040924/p0040034_20031007.nc' to '/home/runner/.cache/seagliderOG1'.
---------------------------------------------------------------------------
HTTPError Traceback (most recent call last)
Cell In[3], line 1
----> 1 ds = readers.load_sample_dataset()
2 ds
File ~/work/seagliderOG1/seagliderOG1/seagliderOG1/readers.py:37, in load_sample_dataset(dataset_name)
35 def load_sample_dataset(dataset_name="p0040034_20031007.nc"):
36 if dataset_name in data_source_og.registry.keys():
---> 37 file_path = data_source_og.fetch(dataset_name)
38 return xr.open_dataset(file_path)
39 else:
File ~/micromamba/envs/TEST/lib/python3.12/site-packages/pooch/core.py:589, in Pooch.fetch(self, fname, processor, downloader, progressbar)
586 if downloader is None:
587 downloader = choose_downloader(url, progressbar=progressbar)
--> 589 stream_download(
590 url,
591 full_path,
592 known_hash,
593 downloader,
594 pooch=self,
595 retry_if_failed=self.retry_if_failed,
596 )
598 if processor is not None:
599 return processor(str(full_path), action, self)
File ~/micromamba/envs/TEST/lib/python3.12/site-packages/pooch/core.py:807, in stream_download(url, fname, known_hash, downloader, pooch, retry_if_failed)
803 try:
804 # Stream the file to a temporary so that we can safely check its
805 # hash before overwriting the original.
806 with temporary_file(path=str(fname.parent)) as tmp:
--> 807 downloader(url, tmp, pooch)
808 hash_matches(tmp, known_hash, strict=True, source=str(fname.name))
809 shutil.move(tmp, str(fname))
File ~/micromamba/envs/TEST/lib/python3.12/site-packages/pooch/downloaders.py:221, in HTTPDownloader.__call__(self, url, output_file, pooch, check_only)
219 try:
220 response = requests.get(url, timeout=timeout, **kwargs)
--> 221 response.raise_for_status()
222 content = response.iter_content(chunk_size=self.chunk_size)
223 total = int(response.headers.get("content-length", 0))
File ~/micromamba/envs/TEST/lib/python3.12/site-packages/requests/models.py:1024, in Response.raise_for_status(self)
1019 http_error_msg = (
1020 f"{self.status_code} Server Error: {reason} for url: {self.url}"
1021 )
1023 if http_error_msg:
-> 1024 raise HTTPError(http_error_msg, response=self)
HTTPError: 404 Client Error: Not Found for url: https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/015/20040924/p0040034_20031007.nc
Load datasets from a local directory
[4]:
# Specify the input directory on your local machine
input_dir = "/Users/eddifying/Dropbox/data/sg015-ncei-download"
# Load and concatenate all datasets in the input directory
# Optionally, specify the range of profiles to load (start_profile, end_profile)
list_datasets = readers.read_basestation(input_dir, start_profile=500, end_profile=503)
# Where list_datasets is a list of xarray datasets. A single dataset can be accessed as
ds = list_datasets[0]
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Cell In[4], line 6
2 input_dir = "/Users/eddifying/Dropbox/data/sg015-ncei-download"
4 # Load and concatenate all datasets in the input directory
5 # Optionally, specify the range of profiles to load (start_profile, end_profile)
----> 6 list_datasets = readers.read_basestation(input_dir, start_profile=500, end_profile=503)
8 # Where list_datasets is a list of xarray datasets. A single dataset can be accessed as
9 ds = list_datasets[0]
File ~/work/seagliderOG1/seagliderOG1/seagliderOG1/readers.py:101, in read_basestation(source, start_profile, end_profile)
99 file_list = os.listdir(source)
100 else:
--> 101 raise ValueError("Source must be a valid URL or directory path.")
103 filtered_files = filter_files_by_profile(file_list, start_profile, end_profile)
105 datasets = []
ValueError: Source must be a valid URL or directory path.
[5]:
ds
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[5], line 1
----> 1 ds
NameError: name 'ds' is not defined
Load datasets from a remote directory (URL)
[6]:
# Specify the server where data are located
server = "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/015/20040924/"
# Load and concatenate all datasets from the server, optionally specifying the range of profiles to load
list_datasets = readers.read_basestation(server, start_profile=500, end_profile=503)
Downloading file 'p0150500_20050213.nc' from 'https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/015/20040924/p0150500_20050213.nc' to '/home/runner/.cache/seagliderOG1'.
Downloading file 'p0150501_20050213.nc' from 'https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/015/20040924/p0150501_20050213.nc' to '/home/runner/.cache/seagliderOG1'.
Downloading file 'p0150502_20050214.nc' from 'https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/015/20040924/p0150502_20050214.nc' to '/home/runner/.cache/seagliderOG1'.
Downloading file 'p0150503_20050214.nc' from 'https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/015/20040924/p0150503_20050214.nc' to '/home/runner/.cache/seagliderOG1'.
Convert to OG1 format
Process:
For one basestation dataset, split the dataset by dimension (
split_ds
)Transform into OG1 format: dataset with dims
sg_data_point
Change the dimension to
N_MEASUREMENTS
Rename variables according to
vocabularies.standard_names
Assign variable attributes according to
vocabularies.vocab_attrs
. (Note: This could go wrong since it makes assumptions about the input variables. May need additional handling.)
Add missing mandatory variables:
From
split_ds[(gps_info,)]
, add theLATITUDE_GPS
,LONGITUDE_GPS
andTIME_GPS
(Note: presentlyTIME_GPS
is stripped before saving, butTIME
values containTIME_GPS
)Create
PROFILE_NUMBER
andPHASE
Calculate
DEPTH_Z
which is positive up
Update attributes for the file.
Combines
creator
andcontributor
from original attributes intocontributor
Adds
contributing_institutions
based oninstitution
Reformats time in
time_coverage_*
andstart_time
–>start_date
Adds
date_modified
Renames
comments
–>history
,site
–>summary
Adds
title
,platform
,platform_vocabulary
,featureType
,Conventions
,rtqc_method*
according to OceanGliders formatRetains
naming_authority
,institution
,project
,geospatial_*
as OG attributesRetains extra attributes:
license
,keywords
,keywords_vocabulary
,file_version
,acknowledgement
,date_created
,disclaimer
Future behaviour to be added:
Retain the variables starting with
sg_cal
and check whether they vary over the mission (shouldn’t)Add sensors, using information in the
split_ds
with no dimensionsNeed (from sg_cal_constants:
sg_cal
plusvolmax
,vbd_cnts_per_cc
,therm_expan
,t_*
,mass
,hd_*
,ctcor
,cpcor
,c_*
,abs_compress
,a
,Tcor
,Soc
,Pcor
,Foffset
)Maybe also
reviewed
,magnetic_variation
(which will change with position),log_D_FLARE
,flight_avg_speed_north
andflight_avg_speed_east
also with_gsm
,depth_avg_curr_north
anddepth_avg_curr_east
also with_gsm
,wlbb2f
- means sensorsg_cal_mission_title
sg_cal_id_str
calibcomm_oxygen
calibcomm
sbe41
means ??hdm_qc
glider
Convert a single (sample) dataset
[7]:
# Loads one dataset (p0150500_20050213.nc)
ds = readers.load_sample_dataset()
ds_OG1 = convertOG1.convert_to_OG1(ds)
# Check the results - uncomment the following lines to either generate a plot or show the variables.
plotters.plot_profile_depth(ds_OG1)
Downloading file 'p0040034_20031007.nc' from 'https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/015/20040924/p0040034_20031007.nc' to '/home/runner/.cache/seagliderOG1'.
---------------------------------------------------------------------------
HTTPError Traceback (most recent call last)
Cell In[7], line 2
1 # Loads one dataset (p0150500_20050213.nc)
----> 2 ds = readers.load_sample_dataset()
4 ds_OG1 = convertOG1.convert_to_OG1(ds)
6 # Check the results - uncomment the following lines to either generate a plot or show the variables.
File ~/work/seagliderOG1/seagliderOG1/seagliderOG1/readers.py:37, in load_sample_dataset(dataset_name)
35 def load_sample_dataset(dataset_name="p0040034_20031007.nc"):
36 if dataset_name in data_source_og.registry.keys():
---> 37 file_path = data_source_og.fetch(dataset_name)
38 return xr.open_dataset(file_path)
39 else:
File ~/micromamba/envs/TEST/lib/python3.12/site-packages/pooch/core.py:589, in Pooch.fetch(self, fname, processor, downloader, progressbar)
586 if downloader is None:
587 downloader = choose_downloader(url, progressbar=progressbar)
--> 589 stream_download(
590 url,
591 full_path,
592 known_hash,
593 downloader,
594 pooch=self,
595 retry_if_failed=self.retry_if_failed,
596 )
598 if processor is not None:
599 return processor(str(full_path), action, self)
File ~/micromamba/envs/TEST/lib/python3.12/site-packages/pooch/core.py:807, in stream_download(url, fname, known_hash, downloader, pooch, retry_if_failed)
803 try:
804 # Stream the file to a temporary so that we can safely check its
805 # hash before overwriting the original.
806 with temporary_file(path=str(fname.parent)) as tmp:
--> 807 downloader(url, tmp, pooch)
808 hash_matches(tmp, known_hash, strict=True, source=str(fname.name))
809 shutil.move(tmp, str(fname))
File ~/micromamba/envs/TEST/lib/python3.12/site-packages/pooch/downloaders.py:221, in HTTPDownloader.__call__(self, url, output_file, pooch, check_only)
219 try:
220 response = requests.get(url, timeout=timeout, **kwargs)
--> 221 response.raise_for_status()
222 content = response.iter_content(chunk_size=self.chunk_size)
223 total = int(response.headers.get("content-length", 0))
File ~/micromamba/envs/TEST/lib/python3.12/site-packages/requests/models.py:1024, in Response.raise_for_status(self)
1019 http_error_msg = (
1020 f"{self.status_code} Server Error: {reason} for url: {self.url}"
1021 )
1023 if http_error_msg:
-> 1024 raise HTTPError(http_error_msg, response=self)
HTTPError: 404 Client Error: Not Found for url: https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/015/20040924/p0040034_20031007.nc
[8]:
# Print to screen a table of the variables and variable attributes
#plotters.show_attributes(ds_OG1)
ds_OG1
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[8], line 3
1 # Print to screen a table of the variables and variable attributes
2 #plotters.show_attributes(ds_OG1)
----> 3 ds_OG1
NameError: name 'ds_OG1' is not defined
Convert mission from a local directory of basestation files
For local data in the directory
input_dir
Creates a plot of ctd_depth against ctd_time.
[9]:
# Specify the input directory on your local machine
input_dir = "/Users/eddifying/Dropbox/data/sg015-ncei-download"
# Load and concatenate all datasets in the input directory
# Optionally, specify the range of profiles to load (start_profile, end_profile)
list_datasets = readers.read_basestation(input_dir, start_profile=500, end_profile=503)
# Convert the list of datasets to OG1
ds_OG1 = convertOG1.convert_to_OG1(list_datasets)
# Generate a simple plot
plotters.plot_profile_depth(ds_OG1)
#plotters.show_contents(ds_all,'attrs')
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Cell In[9], line 6
2 input_dir = "/Users/eddifying/Dropbox/data/sg015-ncei-download"
4 # Load and concatenate all datasets in the input directory
5 # Optionally, specify the range of profiles to load (start_profile, end_profile)
----> 6 list_datasets = readers.read_basestation(input_dir, start_profile=500, end_profile=503)
8 # Convert the list of datasets to OG1
9 ds_OG1 = convertOG1.convert_to_OG1(list_datasets)
File ~/work/seagliderOG1/seagliderOG1/seagliderOG1/readers.py:101, in read_basestation(source, start_profile, end_profile)
99 file_list = os.listdir(source)
100 else:
--> 101 raise ValueError("Source must be a valid URL or directory path.")
103 filtered_files = filter_files_by_profile(file_list, start_profile, end_profile)
105 datasets = []
ValueError: Source must be a valid URL or directory path.
Convert mission from the NCEI server (with p*nc files)
Data from the sg015 mission in the Labrador Sea (https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:0111844), dataset identifier gov.noaa.nodc:0111844.
[10]:
# Specify the server where data are located
server = "https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/015/20040924/"
# Load and concatenate all datasets from the server, optionally specifying the range of profiles to load
list_datasets = readers.read_basestation(server, start_profile=500, end_profile=503)
# Convert the list of datasets to OG1
ds_OG1 = convertOG1.convert_to_OG1(list_datasets)
Variable 'vert_speed_gsm' not in OG1 vocabulary.
Variable 'time' not in OG1 vocabulary.
Variable 'speed_gsm' not in OG1 vocabulary.
Variable 'sound_velocity' not in OG1 vocabulary.
Variable 'north_displacement_gsm' not in OG1 vocabulary.
Variable 'longitude_gsm' not in OG1 vocabulary.
Variable 'latitude_gsm' not in OG1 vocabulary.
Variable 'horz_speed_gsm' not in OG1 vocabulary.
Variable 'glide_angle_gsm' not in OG1 vocabulary.
Variable 'eng_wlbb2f_VFtemp' not in OG1 vocabulary.
Variable 'eng_sbect_tempFreq' not in OG1 vocabulary.
Variable 'eng_sbect_condFreq' not in OG1 vocabulary.
Variable 'eng_elaps_t_0000' not in OG1 vocabulary.
Variable 'eng_elaps_t' not in OG1 vocabulary.
Variable 'eng_depth' not in OG1 vocabulary.
Variable 'east_displacement_gsm' not in OG1 vocabulary.
Variable 'depth' not in OG1 vocabulary.
Variable 'density' not in OG1 vocabulary.
Variable 'buoyancy' not in OG1 vocabulary.
/home/runner/work/seagliderOG1/seagliderOG1/seagliderOG1/tools.py:359: FutureWarning: The return type of `Dataset.dims` will be changed to return a set of dimension names in future, in order to be more consistent with `DataArray.dims`. To access a mapping from dimension names to lengths, please use `Dataset.sizes`.
return ds.assign(divenum=('N_MEASUREMENTS', [dive_number] * ds.dims['N_MEASUREMENTS']))
/home/runner/work/seagliderOG1/seagliderOG1/seagliderOG1/tools.py:178: FutureWarning: The return type of `Dataset.dims` will be changed to return a set of dimension names in future, in order to be more consistent with `DataArray.dims`. To access a mapping from dimension names to lengths, please use `Dataset.sizes`.
ds['dive_num_cast'] = (['N_MEASUREMENTS'], np.full(ds.dims['N_MEASUREMENTS'], np.nan))
/home/runner/work/seagliderOG1/seagliderOG1/seagliderOG1/tools.py:239: FutureWarning: The return type of `Dataset.dims` will be changed to return a set of dimension names in future, in order to be more consistent with `DataArray.dims`. To access a mapping from dimension names to lengths, please use `Dataset.sizes`.
ds['PHASE'] = (['N_MEASUREMENTS'], np.full(ds.dims['N_MEASUREMENTS'], np.nan))
/home/runner/work/seagliderOG1/seagliderOG1/seagliderOG1/tools.py:241: FutureWarning: The return type of `Dataset.dims` will be changed to return a set of dimension names in future, in order to be more consistent with `DataArray.dims`. To access a mapping from dimension names to lengths, please use `Dataset.sizes`.
ds['PHASE_QC'] = (['N_MEASUREMENTS'], np.zeros(ds.dims['N_MEASUREMENTS'], dtype=int))
/home/runner/work/seagliderOG1/seagliderOG1/seagliderOG1/tools.py:292: FutureWarning: The return type of `Dataset.dims` will be changed to return a set of dimension names in future, in order to be more consistent with `DataArray.dims`. To access a mapping from dimension names to lengths, please use `Dataset.sizes`.
ds['DEPTH_Z'] = (['N_MEASUREMENTS'], np.full(ds.dims['N_MEASUREMENTS'], np.nan))
No conversion information found for cm s-1 to degrees
No conversion information found for micromoles/kg to percent
wlbb2f
Adding sensor: SENSOR_FLUOROMETERS
sbe41
sbe41
SBE t12/c12 calibration 30DEC03
30DEC03
Unknown
Variable 'vert_speed_gsm' not in OG1 vocabulary.
Variable 'time' not in OG1 vocabulary.
Variable 'speed_gsm' not in OG1 vocabulary.
Variable 'sound_velocity' not in OG1 vocabulary.
Variable 'north_displacement_gsm' not in OG1 vocabulary.
Variable 'longitude_gsm' not in OG1 vocabulary.
Variable 'latitude_gsm' not in OG1 vocabulary.
Variable 'horz_speed_gsm' not in OG1 vocabulary.
Variable 'glide_angle_gsm' not in OG1 vocabulary.
Variable 'eng_wlbb2f_VFtemp' not in OG1 vocabulary.
Variable 'eng_sbect_tempFreq' not in OG1 vocabulary.
Variable 'eng_sbect_condFreq' not in OG1 vocabulary.
Variable 'eng_elaps_t_0000' not in OG1 vocabulary.
Variable 'eng_elaps_t' not in OG1 vocabulary.
Variable 'eng_depth' not in OG1 vocabulary.
Variable 'east_displacement_gsm' not in OG1 vocabulary.
Variable 'depth' not in OG1 vocabulary.
Variable 'density' not in OG1 vocabulary.
Variable 'buoyancy' not in OG1 vocabulary.
/home/runner/work/seagliderOG1/seagliderOG1/seagliderOG1/tools.py:359: FutureWarning: The return type of `Dataset.dims` will be changed to return a set of dimension names in future, in order to be more consistent with `DataArray.dims`. To access a mapping from dimension names to lengths, please use `Dataset.sizes`.
return ds.assign(divenum=('N_MEASUREMENTS', [dive_number] * ds.dims['N_MEASUREMENTS']))
/home/runner/work/seagliderOG1/seagliderOG1/seagliderOG1/tools.py:178: FutureWarning: The return type of `Dataset.dims` will be changed to return a set of dimension names in future, in order to be more consistent with `DataArray.dims`. To access a mapping from dimension names to lengths, please use `Dataset.sizes`.
ds['dive_num_cast'] = (['N_MEASUREMENTS'], np.full(ds.dims['N_MEASUREMENTS'], np.nan))
/home/runner/work/seagliderOG1/seagliderOG1/seagliderOG1/tools.py:239: FutureWarning: The return type of `Dataset.dims` will be changed to return a set of dimension names in future, in order to be more consistent with `DataArray.dims`. To access a mapping from dimension names to lengths, please use `Dataset.sizes`.
ds['PHASE'] = (['N_MEASUREMENTS'], np.full(ds.dims['N_MEASUREMENTS'], np.nan))
/home/runner/work/seagliderOG1/seagliderOG1/seagliderOG1/tools.py:241: FutureWarning: The return type of `Dataset.dims` will be changed to return a set of dimension names in future, in order to be more consistent with `DataArray.dims`. To access a mapping from dimension names to lengths, please use `Dataset.sizes`.
ds['PHASE_QC'] = (['N_MEASUREMENTS'], np.zeros(ds.dims['N_MEASUREMENTS'], dtype=int))
/home/runner/work/seagliderOG1/seagliderOG1/seagliderOG1/tools.py:292: FutureWarning: The return type of `Dataset.dims` will be changed to return a set of dimension names in future, in order to be more consistent with `DataArray.dims`. To access a mapping from dimension names to lengths, please use `Dataset.sizes`.
ds['DEPTH_Z'] = (['N_MEASUREMENTS'], np.full(ds.dims['N_MEASUREMENTS'], np.nan))
No conversion information found for cm s-1 to degrees
No conversion information found for micromoles/kg to percent
wlbb2f
Adding sensor: SENSOR_FLUOROMETERS
sbe41
sbe41
SBE t12/c12 calibration 30DEC03
30DEC03
Unknown
Variable 'vert_speed_gsm' not in OG1 vocabulary.
Variable 'time' not in OG1 vocabulary.
Variable 'speed_gsm' not in OG1 vocabulary.
Variable 'sound_velocity' not in OG1 vocabulary.
Variable 'north_displacement_gsm' not in OG1 vocabulary.
Variable 'longitude_gsm' not in OG1 vocabulary.
Variable 'latitude_gsm' not in OG1 vocabulary.
Variable 'horz_speed_gsm' not in OG1 vocabulary.
Variable 'glide_angle_gsm' not in OG1 vocabulary.
Variable 'eng_wlbb2f_VFtemp' not in OG1 vocabulary.
Variable 'eng_sbect_tempFreq' not in OG1 vocabulary.
Variable 'eng_sbect_condFreq' not in OG1 vocabulary.
Variable 'eng_elaps_t_0000' not in OG1 vocabulary.
Variable 'eng_elaps_t' not in OG1 vocabulary.
Variable 'eng_depth' not in OG1 vocabulary.
Variable 'east_displacement_gsm' not in OG1 vocabulary.
Variable 'depth' not in OG1 vocabulary.
Variable 'density' not in OG1 vocabulary.
Variable 'buoyancy' not in OG1 vocabulary.
/home/runner/work/seagliderOG1/seagliderOG1/seagliderOG1/tools.py:359: FutureWarning: The return type of `Dataset.dims` will be changed to return a set of dimension names in future, in order to be more consistent with `DataArray.dims`. To access a mapping from dimension names to lengths, please use `Dataset.sizes`.
return ds.assign(divenum=('N_MEASUREMENTS', [dive_number] * ds.dims['N_MEASUREMENTS']))
/home/runner/work/seagliderOG1/seagliderOG1/seagliderOG1/tools.py:178: FutureWarning: The return type of `Dataset.dims` will be changed to return a set of dimension names in future, in order to be more consistent with `DataArray.dims`. To access a mapping from dimension names to lengths, please use `Dataset.sizes`.
ds['dive_num_cast'] = (['N_MEASUREMENTS'], np.full(ds.dims['N_MEASUREMENTS'], np.nan))
/home/runner/work/seagliderOG1/seagliderOG1/seagliderOG1/tools.py:239: FutureWarning: The return type of `Dataset.dims` will be changed to return a set of dimension names in future, in order to be more consistent with `DataArray.dims`. To access a mapping from dimension names to lengths, please use `Dataset.sizes`.
ds['PHASE'] = (['N_MEASUREMENTS'], np.full(ds.dims['N_MEASUREMENTS'], np.nan))
/home/runner/work/seagliderOG1/seagliderOG1/seagliderOG1/tools.py:241: FutureWarning: The return type of `Dataset.dims` will be changed to return a set of dimension names in future, in order to be more consistent with `DataArray.dims`. To access a mapping from dimension names to lengths, please use `Dataset.sizes`.
ds['PHASE_QC'] = (['N_MEASUREMENTS'], np.zeros(ds.dims['N_MEASUREMENTS'], dtype=int))
/home/runner/work/seagliderOG1/seagliderOG1/seagliderOG1/tools.py:292: FutureWarning: The return type of `Dataset.dims` will be changed to return a set of dimension names in future, in order to be more consistent with `DataArray.dims`. To access a mapping from dimension names to lengths, please use `Dataset.sizes`.
ds['DEPTH_Z'] = (['N_MEASUREMENTS'], np.full(ds.dims['N_MEASUREMENTS'], np.nan))
No conversion information found for cm s-1 to degrees
No conversion information found for micromoles/kg to percent
wlbb2f
Adding sensor: SENSOR_FLUOROMETERS
sbe41
sbe41
SBE t12/c12 calibration 30DEC03
30DEC03
Unknown
Variable 'vert_speed_gsm' not in OG1 vocabulary.
Variable 'time' not in OG1 vocabulary.
Variable 'speed_gsm' not in OG1 vocabulary.
Variable 'sound_velocity' not in OG1 vocabulary.
Variable 'north_displacement_gsm' not in OG1 vocabulary.
Variable 'longitude_gsm' not in OG1 vocabulary.
Variable 'latitude_gsm' not in OG1 vocabulary.
Variable 'horz_speed_gsm' not in OG1 vocabulary.
Variable 'glide_angle_gsm' not in OG1 vocabulary.
Variable 'eng_wlbb2f_VFtemp' not in OG1 vocabulary.
Variable 'eng_sbect_tempFreq' not in OG1 vocabulary.
Variable 'eng_sbect_condFreq' not in OG1 vocabulary.
Variable 'eng_elaps_t_0000' not in OG1 vocabulary.
Variable 'eng_elaps_t' not in OG1 vocabulary.
Variable 'eng_depth' not in OG1 vocabulary.
Variable 'east_displacement_gsm' not in OG1 vocabulary.
Variable 'depth' not in OG1 vocabulary.
Variable 'density' not in OG1 vocabulary.
Variable 'buoyancy' not in OG1 vocabulary.
No conversion information found for cm s-1 to degrees
No conversion information found for micromoles/kg to percent
wlbb2f
Adding sensor: SENSOR_FLUOROMETERS
sbe41
sbe41
SBE t12/c12 calibration 30DEC03
30DEC03
Unknown
/home/runner/work/seagliderOG1/seagliderOG1/seagliderOG1/tools.py:359: FutureWarning: The return type of `Dataset.dims` will be changed to return a set of dimension names in future, in order to be more consistent with `DataArray.dims`. To access a mapping from dimension names to lengths, please use `Dataset.sizes`.
return ds.assign(divenum=('N_MEASUREMENTS', [dive_number] * ds.dims['N_MEASUREMENTS']))
/home/runner/work/seagliderOG1/seagliderOG1/seagliderOG1/tools.py:178: FutureWarning: The return type of `Dataset.dims` will be changed to return a set of dimension names in future, in order to be more consistent with `DataArray.dims`. To access a mapping from dimension names to lengths, please use `Dataset.sizes`.
ds['dive_num_cast'] = (['N_MEASUREMENTS'], np.full(ds.dims['N_MEASUREMENTS'], np.nan))
/home/runner/work/seagliderOG1/seagliderOG1/seagliderOG1/tools.py:239: FutureWarning: The return type of `Dataset.dims` will be changed to return a set of dimension names in future, in order to be more consistent with `DataArray.dims`. To access a mapping from dimension names to lengths, please use `Dataset.sizes`.
ds['PHASE'] = (['N_MEASUREMENTS'], np.full(ds.dims['N_MEASUREMENTS'], np.nan))
/home/runner/work/seagliderOG1/seagliderOG1/seagliderOG1/tools.py:241: FutureWarning: The return type of `Dataset.dims` will be changed to return a set of dimension names in future, in order to be more consistent with `DataArray.dims`. To access a mapping from dimension names to lengths, please use `Dataset.sizes`.
ds['PHASE_QC'] = (['N_MEASUREMENTS'], np.zeros(ds.dims['N_MEASUREMENTS'], dtype=int))
/home/runner/work/seagliderOG1/seagliderOG1/seagliderOG1/tools.py:292: FutureWarning: The return type of `Dataset.dims` will be changed to return a set of dimension names in future, in order to be more consistent with `DataArray.dims`. To access a mapping from dimension names to lengths, please use `Dataset.sizes`.
ds['DEPTH_Z'] = (['N_MEASUREMENTS'], np.full(ds.dims['N_MEASUREMENTS'], np.nan))
Saving data
Due to problems with writing xarray
datasets as netCDF when attributes are not of a specified type (str
, Number
, np.ndarray
, np.number
, list
, tuple
), a function was written save_dataset
.
[11]:
# Write the file
# This writer catches errors in data types (DType errors) when using xr.to_netcdf()
# The solution is to convert them to strings, which may be undesired behaviour
output_file = os.path.join(data_path, 'demo_test.nc')
if os.path.exists(output_file):
os.remove(output_file)
writers.save_dataset(ds_OG1, output_file);
TypeError Invalid value for attr 'calibration_parameters': {'t_g': 0.00431040083, 't_h': 0.000631750336, 't_i': 2.27137914e-05, 't_j': 2.20168797e-06, 'c_g': -10.2690994, 'c_h': 1.1547191, 'c_i': -0.00143967206, 'c_j': 0.000204972837, 'cpcor': -9.57e-08, 'ctcor': 3.25e-06}. For serialization to netCDF files, its value must be of one of the following types: str, Number, ndarray, number, list, tuple, bytes
variable 'SENSOR_CTD_UNKNOWN': Converting attribute 'calibration_parameters' with value '{'t_g': 0.00431040083, 't_h': 0.000631750336, 't_i': 2.27137914e-05, 't_j': 2.20168797e-06, 'c_g': -10.2690994, 'c_h': 1.1547191, 'c_i': -0.00143967206, 'c_j': 0.000204972837, 'cpcor': -9.57e-08, 'ctcor': 3.25e-06}' to string.
[12]:
# Load the data saved
ds1 = xr.open_dataset(output_file)
# Generate a simple plot
#plotters.show_contents(ds_all,'attrs')
plotters.plot_depth_colored(ds1, color_by='PROFILE_NUMBER')
Run multiple missions
[13]:
# Add these to existing attributes - update to your details
contrib_to_append = vocabularies.contrib_to_append
print(contrib_to_append)
{'contributor_name': 'Eleanor Frajka-Williams', 'contributor_email': 'eleanorfrajka@gmail.com', 'contributor_role': 'Data scientist', 'contributor_role_vocabulary': 'http://vocab.nerc.ac.uk/search_nvs/W08', 'contributing_institutions': 'University of Hamburg - Institute of Oceanography', 'contributing_institutions_vocabulary': 'https://edmo.seadatanet.org/report/1156', 'contributing_institutions_role': 'Data scientist', 'contributing_institutions_role_vocabulary': 'http://vocab.nerc.ac.uk/search_nvs/W08'}
[14]:
# Specify a list of servers or local directories
input_locations = [
# Either Iceland, Faroes or RAPID/MOCHA
#"https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/005/20090829/", # done
#"https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/005/20080606/", # done
#"https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/005/20081106/", # done
#"https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/012/20070831/", # done
#"https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/014/20080214/", # done
#"https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/014/20080222/", # done
#"https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/016/20061112/", # done
#"https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/016/20090605/", # done
#"https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/016/20071113/", # done
#"https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/016/20080607/", # done
#"https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/033/20100518/", # done
#"https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/033/20100903/", # done
#"https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/101/20081108/", # done
#"https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/101/20061112/", # done
#"https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/101/20070609/", # done
#"https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/102/20061112/", # done
# Labrador Sea
#"https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/015/20040924/",
"https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/014/20040924/",
#"https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/008/20031002/",
#"https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/004/20031002/",
#"https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/016/20050406/",
# RAPID/MOCHA
#"https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/033/20100729/",
#"https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/034/20110128/",
]
for input_loc in input_locations:
# Example usage
ds_all = convertOG1.process_and_save_data(input_loc, output_dir=data_path, save=True, run_quietly=True)
Downloading file 'p0140001_20040924.nc' from 'https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/015/20040924/p0140001_20040924.nc' to '/home/runner/.cache/seagliderOG1'.
---------------------------------------------------------------------------
HTTPError Traceback (most recent call last)
Cell In[14], line 33
2 input_locations = [
3 # Either Iceland, Faroes or RAPID/MOCHA
4 #"https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/005/20090829/", # done
(...)
28 #"https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/034/20110128/",
29 ]
31 for input_loc in input_locations:
32 # Example usage
---> 33 ds_all = convertOG1.process_and_save_data(input_loc, output_dir=data_path, save=True, run_quietly=True)
File ~/work/seagliderOG1/seagliderOG1/seagliderOG1/convertOG1.py:608, in process_and_save_data(input_location, save, output_dir, run_quietly)
592 """
593 Processes and saves data from the specified input location.
594 This function loads and concatenates datasets from the server, converts them to OG1 format,
(...)
604 xarray.Dataset: The processed dataset.
605 """
607 # Load and concatenate all datasets from the server
--> 608 list_datasets = readers.read_basestation(input_location)
610 # Convert the list of datasets to OG1
611 ds1 = convert_to_OG1(list_datasets[-1])
File ~/work/seagliderOG1/seagliderOG1/seagliderOG1/readers.py:109, in read_basestation(source, start_profile, end_profile)
107 for file in filtered_files:
108 if source.startswith("http://") or source.startswith("https://"):
--> 109 ds = load_sample_dataset(file)
110 else:
111 ds = xr.open_dataset(os.path.join(source, file))
File ~/work/seagliderOG1/seagliderOG1/seagliderOG1/readers.py:37, in load_sample_dataset(dataset_name)
35 def load_sample_dataset(dataset_name="p0040034_20031007.nc"):
36 if dataset_name in data_source_og.registry.keys():
---> 37 file_path = data_source_og.fetch(dataset_name)
38 return xr.open_dataset(file_path)
39 else:
File ~/micromamba/envs/TEST/lib/python3.12/site-packages/pooch/core.py:589, in Pooch.fetch(self, fname, processor, downloader, progressbar)
586 if downloader is None:
587 downloader = choose_downloader(url, progressbar=progressbar)
--> 589 stream_download(
590 url,
591 full_path,
592 known_hash,
593 downloader,
594 pooch=self,
595 retry_if_failed=self.retry_if_failed,
596 )
598 if processor is not None:
599 return processor(str(full_path), action, self)
File ~/micromamba/envs/TEST/lib/python3.12/site-packages/pooch/core.py:807, in stream_download(url, fname, known_hash, downloader, pooch, retry_if_failed)
803 try:
804 # Stream the file to a temporary so that we can safely check its
805 # hash before overwriting the original.
806 with temporary_file(path=str(fname.parent)) as tmp:
--> 807 downloader(url, tmp, pooch)
808 hash_matches(tmp, known_hash, strict=True, source=str(fname.name))
809 shutil.move(tmp, str(fname))
File ~/micromamba/envs/TEST/lib/python3.12/site-packages/pooch/downloaders.py:221, in HTTPDownloader.__call__(self, url, output_file, pooch, check_only)
219 try:
220 response = requests.get(url, timeout=timeout, **kwargs)
--> 221 response.raise_for_status()
222 content = response.iter_content(chunk_size=self.chunk_size)
223 total = int(response.headers.get("content-length", 0))
File ~/micromamba/envs/TEST/lib/python3.12/site-packages/requests/models.py:1024, in Response.raise_for_status(self)
1019 http_error_msg = (
1020 f"{self.status_code} Server Error: {reason} for url: {self.url}"
1021 )
1023 if http_error_msg:
-> 1024 raise HTTPError(http_error_msg, response=self)
HTTPError: 404 Client Error: Not Found for url: https://www.ncei.noaa.gov/data/oceans/glider/seaglider/uw/015/20040924/p0140001_20040924.nc
[ ]: