Direct S3 Data Access: NetCDF - Daymet v4 Daily TMAX Example


Import Required Packages

%matplotlib inline
import matplotlib.pyplot as plt
from datetime import datetime
import os
import subprocess
import requests
from pystac_client import Client
import boto3
import s3fs
import pandas as pd
import numpy as np
import xarray as xr
import rasterio as rio
from rasterio.session import AWSSession
from rasterio.plot import show
import rioxarray
import geopandas
import pyproj
from pyproj import Proj
from shapely.ops import transform
import geoviews as gv
from cartopy import crs
import hvplot.xarray
import holoviews as hv
gv.extension('bokeh', 'matplotlib')
s3_cred_endpoint = {
    'podaac':'https://archive.podaac.earthdata.nasa.gov/s3credentials',
    'lpdaac':'https://data.lpdaac.earthdatacloud.nasa.gov/s3credentials',
    'ornldaac':'https://data.ornldaac.earthdata.nasa.gov/s3credentials'
}
def get_temp_creds():
    temp_creds_url = s3_cred_endpoint['ornldaac']
    return requests.get(temp_creds_url).json()
temp_creds_req = get_temp_creds()

Connect to NASA’s ORNL Cloud Provider

ornldaac_cat = Client.open('https://cmr.earthdata.nasa.gov/stac/ORNL_CLOUD/')

Search by collections and by datetime. Daymet Daily tmax are in netCDF format with 1 netCDF containing 1 years worth of daily data (i.e., 365 time steps)

search = ornldaac_cat.search(
    collections=['Daymet_Daily_V4_1840.v4.2'],
    datetime='2015/2020'
)

Each STAC Item return contains 21 netCDF files. The files represent weather variables for North America, Puerto Rico, and Hawaii

search.matched()
126
items = search.get_all_items()
#list(items)

Filter the items returned for North America and by tmax

https_urls = [list(i.assets.values())[0].href for i in items if '_na_' in i.id and '_tmax_' in i.id]
https_urls
['https://data.ornldaac.earthdata.nasa.gov/protected/daymet/Daymet_Daily_V4/data/daymet_v4_daily_na_tmax_2015.nc',
 'https://data.ornldaac.earthdata.nasa.gov/protected/daymet/Daymet_Daily_V4/data/daymet_v4_daily_na_tmax_2016.nc',
 'https://data.ornldaac.earthdata.nasa.gov/protected/daymet/Daymet_Daily_V4/data/daymet_v4_daily_na_tmax_2017.nc',
 'https://data.ornldaac.earthdata.nasa.gov/protected/daymet/Daymet_Daily_V4/data/daymet_v4_daily_na_tmax_2018.nc',
 'https://data.ornldaac.earthdata.nasa.gov/protected/daymet/Daymet_Daily_V4/data/daymet_v4_daily_na_tmax_2019.nc',
 'https://data.ornldaac.earthdata.nasa.gov/protected/daymet/Daymet_Daily_V4/data/daymet_v4_daily_na_tmax_2020.nc']

Substitute a portion of the HTTPS url to create the S3 location

s3_urls = [x.replace('https://data.ornldaac.earthdata.nasa.gov/protected/', 's3://ornl-cumulus-prod-protected/') for x in https_urls]
s3_urls
['s3://ornl-cumulus-prod-protected/daymet/Daymet_Daily_V4/data/daymet_v4_daily_na_tmax_2015.nc',
 's3://ornl-cumulus-prod-protected/daymet/Daymet_Daily_V4/data/daymet_v4_daily_na_tmax_2016.nc',
 's3://ornl-cumulus-prod-protected/daymet/Daymet_Daily_V4/data/daymet_v4_daily_na_tmax_2017.nc',
 's3://ornl-cumulus-prod-protected/daymet/Daymet_Daily_V4/data/daymet_v4_daily_na_tmax_2018.nc',
 's3://ornl-cumulus-prod-protected/daymet/Daymet_Daily_V4/data/daymet_v4_daily_na_tmax_2019.nc',
 's3://ornl-cumulus-prod-protected/daymet/Daymet_Daily_V4/data/daymet_v4_daily_na_tmax_2020.nc']

Single file in-region direct S3 access of netcdf file

fs_s3 = s3fs.S3FileSystem(anon=False, key=temp_creds_req['accessKeyId'], secret=temp_creds_req['secretAccessKey'], token=temp_creds_req['sessionToken'])
s3_url = s3_urls[0]
s3_url
's3://ornl-cumulus-prod-protected/daymet/Daymet_Daily_V4/data/daymet_v4_daily_na_tmax_2015.nc'
s3_file_obj = fs_s3.open(s3_url, mode='rb')
xr_ds = xr.open_dataset(s3_file_obj, chunks='auto', engine='h5netcdf')
xr_ds
<xarray.Dataset>
Dimensions:                  (x: 7814, y: 8075, time: 365, nv: 2)
Coordinates:
  * x                        (x) float32 -4.56e+06 -4.559e+06 ... 3.253e+06
  * y                        (y) float32 4.984e+06 4.983e+06 ... -3.09e+06
    lat                      (y, x) float32 dask.array<chunksize=(8075, 3907), meta=np.ndarray>
    lon                      (y, x) float32 dask.array<chunksize=(8075, 3907), meta=np.ndarray>
  * time                     (time) datetime64[ns] 2015-01-01T12:00:00 ... 20...
Dimensions without coordinates: nv
Data variables:
    yearday                  (time) int16 dask.array<chunksize=(365,), meta=np.ndarray>
    time_bnds                (time, nv) datetime64[ns] dask.array<chunksize=(365, 2), meta=np.ndarray>
    lambert_conformal_conic  int16 ...
    tmax                     (time, y, x) float32 dask.array<chunksize=(55, 475, 1282), meta=np.ndarray>
Attributes:
    start_year:        2015
    source:            Daymet Software Version 4.0
    Version_software:  Daymet Software Version 4.0
    Version_data:      Daymet Data Version 4.0
    Conventions:       CF-1.6
    citation:          Please see http://daymet.ornl.gov/ for current Daymet ...
    references:        Please see http://daymet.ornl.gov/ for current informa...

Multi-file in-region direct S3 access of netcdf files

# Iterate through remote_files to create a fileset
fileset = [fs_s3.open(file) for file in s3_urls]
# This works...if you rerun this line and get a context manager error, try 1. rerunning the line above then this line 
xr_ts = xr.open_mfdataset(fileset, chunks='auto', engine='h5netcdf')
xr_ts
<xarray.Dataset>
Dimensions:                  (x: 7814, y: 8075, time: 2190, nv: 2)
Coordinates:
  * x                        (x) float32 -4.56e+06 -4.559e+06 ... 3.253e+06
  * y                        (y) float32 4.984e+06 4.983e+06 ... -3.09e+06
    lat                      (y, x) float32 dask.array<chunksize=(8075, 3907), meta=np.ndarray>
    lon                      (y, x) float32 dask.array<chunksize=(8075, 3907), meta=np.ndarray>
  * time                     (time) datetime64[ns] 2015-01-01T12:00:00 ... 20...
Dimensions without coordinates: nv
Data variables:
    yearday                  (time) int16 dask.array<chunksize=(365,), meta=np.ndarray>
    time_bnds                (time, nv) datetime64[ns] dask.array<chunksize=(365, 2), meta=np.ndarray>
    lambert_conformal_conic  (time) int16 -32767 -32767 -32767 ... -32767 -32767
    tmax                     (time, y, x) float32 dask.array<chunksize=(55, 475, 1282), meta=np.ndarray>
Attributes:
    start_year:        2015
    source:            Daymet Software Version 4.0
    Version_software:  Daymet Software Version 4.0
    Version_data:      Daymet Data Version 4.0
    Conventions:       CF-1.6
    citation:          Please see http://daymet.ornl.gov/ for current Daymet ...
    references:        Please see http://daymet.ornl.gov/ for current informa...
#xr_ts.SSH.hvplot.image()