%matplotlib inline
import matplotlib.pyplot as plt
from datetime import datetime
import os
import subprocess
import requests
from pystac_client import Client
import boto3
import s3fs
import pandas as pd
import numpy as np
import xarray as xr
import rasterio as rio
from rasterio.session import AWSSession
from rasterio.plot import show
import rioxarray
import geopandas
import pyproj
from pyproj import Proj
from shapely.ops import transform
import geoviews as gv
from cartopy import crs
import hvplot.xarray
import holoviews as hv
'bokeh', 'matplotlib') gv.extension(
Direct S3 Data Access: NetCDF - Daymet v4 Daily TMAX Example
Import Required Packages
= {
s3_cred_endpoint 'podaac':'https://archive.podaac.earthdata.nasa.gov/s3credentials',
'lpdaac':'https://data.lpdaac.earthdatacloud.nasa.gov/s3credentials',
'ornldaac':'https://data.ornldaac.earthdata.nasa.gov/s3credentials'
}
def get_temp_creds():
= s3_cred_endpoint['ornldaac']
temp_creds_url return requests.get(temp_creds_url).json()
= get_temp_creds() temp_creds_req
Connect to NASA’s ORNL Cloud Provider
= Client.open('https://cmr.earthdata.nasa.gov/stac/ORNL_CLOUD/') ornldaac_cat
Search by collections
and by datetime
. Daymet Daily tmax are in netCDF format with 1 netCDF containing 1 years worth of daily data (i.e., 365 time steps)
= ornldaac_cat.search(
search =['Daymet_Daily_V4_1840.v4.2'],
collections='2015/2020'
datetime )
Each STAC Item return contains 21 netCDF files. The files represent weather variables for North America, Puerto Rico, and Hawaii
search.matched()
126
= search.get_all_items()
items #list(items)
Filter the items returned for North America and by tmax
= [list(i.assets.values())[0].href for i in items if '_na_' in i.id and '_tmax_' in i.id]
https_urls https_urls
['https://data.ornldaac.earthdata.nasa.gov/protected/daymet/Daymet_Daily_V4/data/daymet_v4_daily_na_tmax_2015.nc',
'https://data.ornldaac.earthdata.nasa.gov/protected/daymet/Daymet_Daily_V4/data/daymet_v4_daily_na_tmax_2016.nc',
'https://data.ornldaac.earthdata.nasa.gov/protected/daymet/Daymet_Daily_V4/data/daymet_v4_daily_na_tmax_2017.nc',
'https://data.ornldaac.earthdata.nasa.gov/protected/daymet/Daymet_Daily_V4/data/daymet_v4_daily_na_tmax_2018.nc',
'https://data.ornldaac.earthdata.nasa.gov/protected/daymet/Daymet_Daily_V4/data/daymet_v4_daily_na_tmax_2019.nc',
'https://data.ornldaac.earthdata.nasa.gov/protected/daymet/Daymet_Daily_V4/data/daymet_v4_daily_na_tmax_2020.nc']
Substitute a portion of the HTTPS url to create the S3 location
= [x.replace('https://data.ornldaac.earthdata.nasa.gov/protected/', 's3://ornl-cumulus-prod-protected/') for x in https_urls]
s3_urls s3_urls
['s3://ornl-cumulus-prod-protected/daymet/Daymet_Daily_V4/data/daymet_v4_daily_na_tmax_2015.nc',
's3://ornl-cumulus-prod-protected/daymet/Daymet_Daily_V4/data/daymet_v4_daily_na_tmax_2016.nc',
's3://ornl-cumulus-prod-protected/daymet/Daymet_Daily_V4/data/daymet_v4_daily_na_tmax_2017.nc',
's3://ornl-cumulus-prod-protected/daymet/Daymet_Daily_V4/data/daymet_v4_daily_na_tmax_2018.nc',
's3://ornl-cumulus-prod-protected/daymet/Daymet_Daily_V4/data/daymet_v4_daily_na_tmax_2019.nc',
's3://ornl-cumulus-prod-protected/daymet/Daymet_Daily_V4/data/daymet_v4_daily_na_tmax_2020.nc']
Single file in-region direct S3 access of netcdf file
= s3fs.S3FileSystem(anon=False, key=temp_creds_req['accessKeyId'], secret=temp_creds_req['secretAccessKey'], token=temp_creds_req['sessionToken']) fs_s3
= s3_urls[0]
s3_url s3_url
's3://ornl-cumulus-prod-protected/daymet/Daymet_Daily_V4/data/daymet_v4_daily_na_tmax_2015.nc'
= fs_s3.open(s3_url, mode='rb') s3_file_obj
= xr.open_dataset(s3_file_obj, chunks='auto', engine='h5netcdf')
xr_ds xr_ds
<xarray.Dataset> Dimensions: (x: 7814, y: 8075, time: 365, nv: 2) Coordinates: * x (x) float32 -4.56e+06 -4.559e+06 ... 3.253e+06 * y (y) float32 4.984e+06 4.983e+06 ... -3.09e+06 lat (y, x) float32 dask.array<chunksize=(8075, 3907), meta=np.ndarray> lon (y, x) float32 dask.array<chunksize=(8075, 3907), meta=np.ndarray> * time (time) datetime64[ns] 2015-01-01T12:00:00 ... 20... Dimensions without coordinates: nv Data variables: yearday (time) int16 dask.array<chunksize=(365,), meta=np.ndarray> time_bnds (time, nv) datetime64[ns] dask.array<chunksize=(365, 2), meta=np.ndarray> lambert_conformal_conic int16 ... tmax (time, y, x) float32 dask.array<chunksize=(55, 475, 1282), meta=np.ndarray> Attributes: start_year: 2015 source: Daymet Software Version 4.0 Version_software: Daymet Software Version 4.0 Version_data: Daymet Data Version 4.0 Conventions: CF-1.6 citation: Please see http://daymet.ornl.gov/ for current Daymet ... references: Please see http://daymet.ornl.gov/ for current informa...
Multi-file in-region direct S3 access of netcdf files
# Iterate through remote_files to create a fileset
= [fs_s3.open(file) for file in s3_urls] fileset
# This works...if you rerun this line and get a context manager error, try 1. rerunning the line above then this line
= xr.open_mfdataset(fileset, chunks='auto', engine='h5netcdf') xr_ts
xr_ts
<xarray.Dataset> Dimensions: (x: 7814, y: 8075, time: 2190, nv: 2) Coordinates: * x (x) float32 -4.56e+06 -4.559e+06 ... 3.253e+06 * y (y) float32 4.984e+06 4.983e+06 ... -3.09e+06 lat (y, x) float32 dask.array<chunksize=(8075, 3907), meta=np.ndarray> lon (y, x) float32 dask.array<chunksize=(8075, 3907), meta=np.ndarray> * time (time) datetime64[ns] 2015-01-01T12:00:00 ... 20... Dimensions without coordinates: nv Data variables: yearday (time) int16 dask.array<chunksize=(365,), meta=np.ndarray> time_bnds (time, nv) datetime64[ns] dask.array<chunksize=(365, 2), meta=np.ndarray> lambert_conformal_conic (time) int16 -32767 -32767 -32767 ... -32767 -32767 tmax (time, y, x) float32 dask.array<chunksize=(55, 475, 1282), meta=np.ndarray> Attributes: start_year: 2015 source: Daymet Software Version 4.0 Version_software: Daymet Software Version 4.0 Version_data: Daymet Data Version 4.0 Conventions: CF-1.6 citation: Please see http://daymet.ornl.gov/ for current Daymet ... references: Please see http://daymet.ornl.gov/ for current informa...
#xr_ts.SSH.hvplot.image()