import xarray as xr
import rioxarray
import rasterio as rio
import os
import holoviews as hv
import hvplot.xarray
import hvplot.pandas
import geopandas as gpd
import shapely
from shapely.geometry import box
from shapely.geometry import Point
from pyproj import Transformer
from matplotlib.colors import ListedColormap
#import rasterstats
import numpy as np
import seaborn as sns
from datetime import datetime
from datetime import timedelta
from os.path import basename
import xarray
import pandas as pd
from scipy.stats import zscore
from matplotlib.colors import LinearSegmentedColormap
import matplotlib.pyplot as plt
!mamba install rasterstats -q -y
import rasterstats
= 1080
FIG_WIDTH_PX = 720
FIG_HEIGHT_PX = 16
FIG_WIDTH_IN = 9 FIG_HEIGHT_IN
= '/home/jovyan/shared/2022-ecostress-workshop/ECOv002_L3T_ET_PT-JPL_12653_004_10SGD_20200928T224329_0700_01/ECOv002_L3T_ET_PT-JPL_12653_004_10SGD_20200928T224329_0700_01_ETdaily.tif'
ET_filename = '/home/jovyan/shared/2022-ecostress-workshop/ECOv002_L2T_LSTE_12653_004_10SGD_20200928T224329_0700_01/ECOv002_L2T_LSTE_12653_004_10SGD_20200928T224329_0700_01_LST.tif'
ST_filename = '/home/jovyan/shared/2022-ecostress-workshop/ECOv002_L3T_SM_PT-JPL_12653_004_10SGD_20200928T224329_0700_01/ECOv002_L3T_SM_PT-JPL_12653_004_10SGD_20200928T224329_0700_01_SM.tif'
SM_filename = '/home/jovyan/shared/2022-ecostress-workshop/ECOv002_L2T_STARS_12653_004_10SGD_20200928_0700_01/ECOv002_L2T_STARS_12653_004_10SGD_20200928_0700_01_NDVI.tif' NDVI_filename
Let’s use rioxarray
to open the surface temperature layer from the L2T_LSTE product on the 10SGD tile covering the Dangermond Preserve and take a first look at this image using hvplot
.
= rioxarray.open_rasterio(ET_filename, chunks='auto').squeeze('band', drop=True)
ET_image ET_image
The ECOSTRESS Collection 2 tiled products are gridded in local UTM, following Sentinel convention, and this gridding is sampled at 70 m.
= ET_image.rio.crs
crs print(f"CRS: {crs}")
= ET_image.rio.resolution()
cell_width, cell_height print(f"resolution: {cell_width} m")
= Point(np.nanmean(ET_image.x), np.nanmean(ET_image.y))
centroid_UTM = shapely.ops.transform(Transformer.from_crs(crs, "EPSG:4326", always_xy=True).transform, centroid_UTM)
centroid_latlon centroid_latlon.wkt
The date for this ECOSTRESS scene is September 28th, 2020. This overpass was around 2:45 in the afternoon.
= datetime.strptime(basename(ET_filename).split("_")[-4], "%Y%m%dT%H%M%S")
dt = dt + timedelta(hours=(np.radians(centroid_latlon.x) / np.pi * 12))
dt_solar print(f"date/time UTC: {dt:%Y-%m-%d %H:%M:%S}")
print(f"date/time solar: {dt_solar:%Y-%m-%d %H:%M:%S}")
To plot this image on top of a basemap, we’ll reproject it on the fly to match the basemap.
= [
ET_CMAP "#f6e8c3",
"#d8b365",
"#99974a",
"#53792d",
"#6bdfd2",
"#1839c5"
]
= ET_image.rio.reproject("EPSG:3857").hvplot.image(
ET_map =ET_CMAP,
cmap="ESRI",
tiles=0.7,
alpha=FIG_WIDTH_PX,
width=FIG_HEIGHT_PX,
height=(ET_image.quantile(0.02), ET_image.quantile(0.98)),
clim=basename(ET_filename)
title
)
ET_map
= rioxarray.open_rasterio(ST_filename).squeeze("band", drop=True) ST_image
= "jet"
ST_CMAP
= ST_image.attrs
attrs -= 273.15
ST_image = attrs ST_image.attrs
= ST_image.rio.reproject("EPSG:3857").hvplot.image(
ST_map =ST_CMAP,
cmap="ESRI",
tiles=0.7,
alpha=FIG_WIDTH_PX,
width=FIG_HEIGHT_PX,
height=(ST_image.quantile(0.02), ST_image.quantile(0.98)),
clim=basename(ST_filename)
title
)
ST_map
= rioxarray.open_rasterio(SM_filename).squeeze("band", drop=True) SM_image
= [
SM_CMAP "#f6e8c3",
"#d8b365",
"#99894a",
"#2d6779",
"#6bdfd2",
"#1839c5"
]
= SM_image.rio.reproject("EPSG:3857").hvplot.image(
SM_map =SM_CMAP,
cmap="ESRI",
tiles=0.7,
alpha=FIG_WIDTH_PX,
width=FIG_HEIGHT_PX,
height=(SM_image.quantile(0.02), SM_image.quantile(0.98)),
clim=basename(SM_filename)
title
)
SM_map
= rioxarray.open_rasterio(NDVI_filename).squeeze("band", drop=True) NDVI_image
= [
NDVI_CMAP "#0000ff",
"#000000",
"#745d1a",
"#e1dea2",
"#45ff01",
"#325e32"
]
= NDVI_image.rio.reproject("EPSG:3857").hvplot.image(
NDVI_map =NDVI_CMAP,
cmap="ESRI",
tiles=0.7,
alpha=FIG_WIDTH_PX,
width=FIG_HEIGHT_PX,
height=(NDVI_image.quantile(0.02), NDVI_image.quantile(0.98)),
clim=basename(NDVI_filename)
title
)
NDVI_map
Let’s open our ground observations provided by the Nature Conservancy. TNC recorded observations of tree health for each tree and the location of the tree in latitude and longitude. These are qualitative categories of good, poor, and dead.
= gpd.read_file("../data/TNC_fall_2020.geojson")
TNC_fall_2020 print(TNC_fall_2020.crs)
TNC_fall_2020.total_bounds
To compare our in situ point data with the projected raster data, we need to project these coordinates into the local UTM projection of the raster image.
= TNC_fall_2020.to_crs(ET_image.rio.crs)
TNC_fall_2020 print(TNC_fall_2020.crs)
TNC_fall_2020.head()
TNC_fall_2020.total_bounds
Let’s map out the in situ data. We’ll again reproject these points on the fly to match the basemap. Let’s assign colors to these categories as well, which we’ll use throughout the notebook.
# is there a way to get the point colors in a legend?
= {
tree_palette "dead": "black",
"poor": "red",
"other": "white",
"good": "green"
}
"EPSG:3857").hvplot.points(
TNC_fall_2020.to_crs(=TNC_fall_2020["health"].apply(lambda health: tree_palette[health]),
color="ESRI",
tiles=1.5,
size=FIG_WIDTH_PX,
width=FIG_HEIGHT_PX,
height="The Nature Conservancy Fall 2020 Tree Survey"
title )
In this projected space, let’s get the bounds of our study area in meters from the convex hull of our observation locations with a 100 meter buffer.
= TNC_fall_2020.unary_union.convex_hull.buffer(100).bounds
xmin, ymin, xmax, ymax xmin, ymin, xmax, ymax
buffer(100).total_bounds ### Similar to above... TNC_fall_2020.
Let’s look at the tree health points overlayed on top of ECOSTRESS evapotranspiration.
= ET_image.rio.clip([box(xmin, ymin, xmax, ymax)]) ET_subset
= ET_subset.rio.reproject("EPSG:3857").hvplot.image(
raster_map =ET_CMAP,
cmap="OSM",
tiles=0.7,
alpha=FIG_WIDTH_PX,
width=FIG_HEIGHT_PX,
height=(ET_subset.quantile(0.02), ET_subset.quantile(0.98))
clim
)
= TNC_fall_2020.to_crs("EPSG:3857").hvplot.points(
point_map =TNC_fall_2020["health"].apply(lambda health: tree_palette[health]),
color=1.5,
size=0.7,
alpha=FIG_WIDTH_PX,
width=FIG_HEIGHT_PX
height
)
* point_map raster_map
Now let’s look at the tree health points on top of ECOSTRESS surface temperature.
= ST_image.rio.clip([box(xmin, ymin, xmax, ymax)]) ST_subset
= ST_subset.rio.reproject("EPSG:3857").hvplot.image(
raster_map =ST_CMAP,
cmap="OSM",
tiles=0.7,
alpha=FIG_WIDTH_PX,
width=FIG_HEIGHT_PX,
height=(ST_subset.quantile(0.02), ST_subset.quantile(0.98)),
clim="ECOSTRESS Surface Temperature and in situ Tree Health"
title
)
= TNC_fall_2020.to_crs("EPSG:3857").hvplot.points(
point_map =TNC_fall_2020["health"].apply(lambda health: tree_palette[health]),
color=1.5,
size=0.7,
alpha=FIG_WIDTH_PX,
width=FIG_HEIGHT_PX
height
)
* point_map raster_map
And let’s look at tree health on top of soil moisture.
= SM_image.rio.clip([box(xmin, ymin, xmax, ymax)]) SM_subset
= SM_subset.rio.reproject("EPSG:3857").hvplot.image(
raster_map =SM_CMAP,
cmap="OSM",
tiles=0.7,
alpha=FIG_WIDTH_PX,
width=FIG_HEIGHT_PX,
height=(SM_subset.quantile(0.02), SM_subset.quantile(0.98)),
clim="ECOSTRESS Soil Moisture and in situ Tree Health"
title
)
= TNC_fall_2020.to_crs("EPSG:3857").hvplot.points(
point_map =TNC_fall_2020["health"].apply(lambda health: tree_palette[health]),
color=1.5,
size=0.7,
alpha=FIG_WIDTH_PX,
width=FIG_HEIGHT_PX
height
)
* point_map raster_map
And here’s tree health on top of vegetation index.
= NDVI_image.rio.clip([box(xmin, ymin, xmax, ymax)]) NDVI_subset
= NDVI_subset.rio.reproject("EPSG:3857").hvplot.image(
raster_map =NDVI_CMAP,
cmap="OSM",
tiles=0.7,
alpha=FIG_WIDTH_PX,
width=FIG_HEIGHT_PX,
height=(NDVI_subset.quantile(0.02), NDVI_subset.quantile(0.98)),
clim="ECOSTRESS Vegetation Index and in situ Tree Health"
title
)
= TNC_fall_2020.to_crs("EPSG:3857").hvplot.points(
point_map =TNC_fall_2020["health"].apply(lambda health: tree_palette[health]),
color=1.5,
size=0.7,
alpha=FIG_WIDTH_PX,
width=FIG_HEIGHT_PX
height
)
* point_map raster_map
To match these datasets together, let’s interpolate the ECOSTRESS rasters for evapotranspiration, surface temperature, soil moisture, and vegetation index to the Nature Conservency tree locations and make these sampled remote sensing data new columns in our table of tree observations.
"ET"] = rasterstats.point_query(
TNC_fall_2020[=TNC_fall_2020.geometry,
vectors=np.array(ET_subset),
raster=np.nan,
nodata=ET_subset.rio.transform()
affine
)
"ST"] = rasterstats.point_query(
TNC_fall_2020[=TNC_fall_2020.geometry,
vectors=np.array(ST_subset),
raster=np.nan,
nodata=ST_subset.rio.transform()
affine
)
"SM"] = rasterstats.point_query(
TNC_fall_2020[=TNC_fall_2020.geometry,
vectors=np.array(SM_subset),
raster=np.nan,
nodata=SM_subset.rio.transform()
affine
)
"NDVI"] = rasterstats.point_query(
TNC_fall_2020[=TNC_fall_2020.geometry,
vectors=np.array(NDVI_subset),
raster=np.nan,
nodata=NDVI_subset.rio.transform()
affine )
= TNC_fall_2020[["health", "ET", "ST", "SM", "NDVI", "geometry"]]
TNC_fall_2020 TNC_fall_2020.head()
Let’s plot the distributions of ECOSTRESS evapotranspiration for each of the tree health categories. The dead tree points match to a lower distribution of interpolated ECOSTRESS evapotranspiration estimates.
= plt.figure(figsize=(FIG_WIDTH_IN, FIG_HEIGHT_IN))
fig = sns.boxplot(
ax =TNC_fall_2020.health,
x=TNC_fall_2020.ET,
y=tree_palette
palette
)set(ylabel="ECOSTRESS Evapotranspiration (mm/day)", xlabel="In Situ Tree Health")
ax."Distribution of ECOSTRESS Evapotranspiration by Tree Health")
plt.title(
plt.show() plt.close(fig)
Let’s look at the distribution of ECOSTRESS surface temperature according to in situ tree health. The dead trees appear to have higher temperatures than other categories.
= plt.figure(figsize=(FIG_WIDTH_IN, FIG_HEIGHT_IN))
fig = sns.boxplot(
ax =TNC_fall_2020.health,
x=TNC_fall_2020.ST,
y=tree_palette
palette
)set(ylabel="ECOSTRESS Surface Temperature (Celsius)", xlabel="In Situ Tree Health")
ax."Distribution of ECOSTRESS Surface Temperature by Tree Health")
plt.title(
plt.show() plt.close(fig)
Looking at this again with ECOSTRESS soil moisture, the dead trees appear to be associated with lower soil moisture estimates.
= plt.figure(figsize=(FIG_WIDTH_IN, FIG_HEIGHT_IN))
fig = sns.boxplot(
ax =TNC_fall_2020.health,
x=TNC_fall_2020.SM,
y=tree_palette
palette
)set(ylabel="ECOSTRESS Soil Moisture ($m^3/m^3$)", xlabel="In Situ Tree Health")
ax."Distribution of ECOSTRESS Soil Moisture by Tree Health")
plt.title(
plt.show() plt.close(fig)
And the range of ECOSTRESS vegetation index at the dead trees appears to be lower than other categories.
= plt.figure(figsize=(FIG_WIDTH_IN, FIG_HEIGHT_IN))
fig = sns.boxplot(
ax =TNC_fall_2020.health,
x=TNC_fall_2020.NDVI,
y=tree_palette
palette
)set(ylabel="ECOSTRESS Normalized Difference Vegetation Index", xlabel="In Situ Tree Health")
ax."Distribution of ECOSTRESS Vegetation Index by Tree Health")
plt.title(
plt.show() plt.close(fig)
Let’s also approach this match-up the other way, by rasterizing the in situ point data. We’ll spatially bin counts of the tree health categories within each 70 m ECOSTRESS pixel and calculate the ratio of these category counts to the total count of observations in each pixel. Let’s start with rasterizing the total counts first.
= np.sort(ET_subset.x)
x_bins = np.append(x_bins, x_bins[-1] + (x_bins[-1] - x_bins[-2]))
x_bins = np.sort(ET_subset.y)
y_bins = np.insert(y_bins, 0, y_bins[0] + (y_bins[0] - y_bins[1]))
y_bins
= TNC_fall_2020.geometry.apply(lambda point: point.x)
x = TNC_fall_2020.geometry.apply(lambda point: point.y)
y
= np.histogram2d(
counts, _, _, =x,
x=y,
y=(x_bins, y_bins)
bins
)
= np.rot90(counts)
counts = np.where(counts <= 0, np.nan, counts)
counts
= xarray.core.dataarray.DataArray(
counts_image =counts,
data=ET_subset.coords,
coords=ET_subset.dims,
dims=ET_subset.attrs
attrs
)
= counts_image.rio.reproject("EPSG:3857").hvplot.image(
counts_map ="jet",
cmap="OSM",
tiles=0.7,
alpha=FIG_WIDTH_PX,
width=FIG_HEIGHT_PX,
height="Total Count of Tree Observations within Each 70 m ECOSTRESS Pixel"
title )
Now we’ll count the number of dead trees in each pixel and divide that by the total count to get a proportion of dead trees in each pixel. This proportion of dead trees appears to range from 5% to 25% across the rasterized image.
= TNC_fall_2020[TNC_fall_2020.health == "dead"]
dead_gdf = dead_gdf.geometry.apply(lambda point: point.x)
x = dead_gdf.geometry.apply(lambda point: point.y)
y
= np.histogram2d(
counts, _, _, =x,
x=y,
y=(x_bins, y_bins)
bins
)
= np.rot90(counts)
counts = np.where(counts <= 0, np.nan, counts)
counts
= xarray.core.dataarray.DataArray(
dead_counts_image =counts,
data=ET_subset.coords,
coords=ET_subset.dims,
dims=ET_subset.attrs
attrs
)
"EPSG:3857").hvplot.image(cmap="jet", tiles="OSM", alpha=0.7)
dead_counts_image.rio.reproject(= dead_counts_image / counts_image
dead_proportion_image = ET_subset.attrs
dead_proportion_image.attrs
= dead_proportion_image.rio.reproject("EPSG:3857").hvplot.image(
dead_proportion_map ="jet",
cmap="OSM",
tiles=0.7,
alpha=FIG_WIDTH_PX,
width=FIG_HEIGHT_PX
height
)
dead_proportion_map
We’ll do this same rasterization for good trees.
= TNC_fall_2020[TNC_fall_2020.health == "good"]
good_gdf = good_gdf.geometry.apply(lambda point: point.x)
x = good_gdf.geometry.apply(lambda point: point.y)
y
= np.histogram2d(
counts, _, _, =x,
x=y,
y=(x_bins, y_bins)
bins
)
= np.rot90(counts)
counts = np.where(counts <= 0, np.nan, counts)
counts
= xarray.core.dataarray.DataArray(
good_counts_image =counts,
data=ET_subset.coords,
coords=ET_subset.dims,
dims=ET_subset.attrs
attrs
)
"EPSG:3857").hvplot.image(cmap="jet", tiles="OSM", alpha=0.7)
good_counts_image.rio.reproject(= good_counts_image / counts_image
good_proportion_image = ET_subset.attrs
good_proportion_image.attrs
= good_proportion_image.rio.reproject("EPSG:3857").hvplot.image(
good_proportion_map ="jet",
cmap="OSM",
tiles=0.7,
alpha=FIG_WIDTH_PX,
width=FIG_HEIGHT_PX
height
)
good_proportion_map
= TNC_fall_2020[TNC_fall_2020.health == "poor"]
poor_gdf = poor_gdf.geometry.apply(lambda point: point.x)
x = poor_gdf.geometry.apply(lambda point: point.y)
y
= np.histogram2d(
counts, _, _, =x,
x=y,
y=(x_bins, y_bins)
bins
)
= np.rot90(counts)
counts = np.where(counts <= 0, np.nan, counts)
counts
= xarray.core.dataarray.DataArray(
poor_counts_image =counts,
data=ET_subset.coords,
coords=ET_subset.dims,
dims=ET_subset.attrs
attrs
)
"EPSG:3857").hvplot.image(cmap="jet", tiles="OSM", alpha=0.7)
poor_counts_image.rio.reproject(= poor_counts_image / counts_image
poor_proportion_image = ET_subset.attrs
poor_proportion_image.attrs
= poor_proportion_image.rio.reproject("EPSG:3857").hvplot.image(
poor_proportion_map ="jet",
cmap="OSM",
tiles=0.7,
alpha=FIG_WIDTH_PX,
width=FIG_HEIGHT_PX
height
)
poor_proportion_map
Now let’s bring it all together by collapsing each of these images into columns of a table so we can analyze these matching data points.
= pd.DataFrame({
proportion_table "dead": dead_proportion_image.values.ravel(),
"poor": poor_proportion_image.values.ravel(),
"good": good_proportion_image.values.ravel(),
"ET": ET_subset.values.ravel(),
"ST": ST_subset.values.ravel(),
"SM": SM_subset.values.ravel(),
"NDVI": NDVI_subset.values.ravel()
}).dropna()
= proportion_table[np.abs(zscore(proportion_table.ET)) < 2]
proportion_table proportion_table.head()
Let’s investigate the correlations between these variables with a correlogram. Focusing on dead trees, it seems that there are rather weak relationships between the dead tree observations and the evapotranspiration input variables, but there is an inverse correlation between the dead tree observations and the evapotranspiration estimate itself.
= plt.figure(figsize=(FIG_WIDTH_IN, FIG_HEIGHT_IN))
fig = sns.heatmap(proportion_table.corr(), annot=True)
ax "Correlogram of ECOSTRESS Products to Tree Health Proportions")
plt.title(
plt.show() plt.close(fig)
We can break down these correlation coefficients in to individual scatterplots to better see how each pair of variables line up together.
= plt.figure()
fig = sns.pairplot(proportion_table)
ax "Pair-Plot of ECOSTRESS Products to Tree Health Proportions")
plt.title(
plt.show() plt.close(fig)
Let’s focus on the strongest relationship involving dead trees and ECOSTRESS evapotranspiration. We can visualize this inverse correlation with a scatterplot and decreasing trendline.
= plt.figure(figsize=(FIG_WIDTH_IN, FIG_HEIGHT_IN))
fig = sns.regplot(x=proportion_table.dead, y=proportion_table.ET)
ax set(ylabel="ECOSTRESS Evapotranspiration (mm/day)", xlabel="Proportion of Dead Tree Observations")
ax."Scatterplot of ECOSTRESS Evapotranspiration to Proportion of Dead Tree Observations")
plt.title(
plt.show() plt.close(fig)