2022 ECOSTRESS Cloud Workshop – comparing_tnc_tree_data_to

import xarray as xr
import rioxarray
import rasterio as rio
import os
import holoviews as hv
import hvplot.xarray
import hvplot.pandas
import geopandas as gpd
import shapely
from shapely.geometry import box
from shapely.geometry import Point
from pyproj import Transformer
from matplotlib.colors import ListedColormap
#import rasterstats
import numpy as np
import seaborn as sns
from datetime import datetime
from datetime import timedelta
from os.path import basename
import xarray
import pandas as pd
from scipy.stats import zscore
from matplotlib.colors import LinearSegmentedColormap
import matplotlib.pyplot as plt

!mamba install rasterstats -q -y

import rasterstats

FIG_WIDTH_PX = 1080
FIG_HEIGHT_PX = 720
FIG_WIDTH_IN = 16
FIG_HEIGHT_IN = 9

ET_filename = '/home/jovyan/shared/2022-ecostress-workshop/ECOv002_L3T_ET_PT-JPL_12653_004_10SGD_20200928T224329_0700_01/ECOv002_L3T_ET_PT-JPL_12653_004_10SGD_20200928T224329_0700_01_ETdaily.tif'
ST_filename = '/home/jovyan/shared/2022-ecostress-workshop/ECOv002_L2T_LSTE_12653_004_10SGD_20200928T224329_0700_01/ECOv002_L2T_LSTE_12653_004_10SGD_20200928T224329_0700_01_LST.tif'
SM_filename = '/home/jovyan/shared/2022-ecostress-workshop/ECOv002_L3T_SM_PT-JPL_12653_004_10SGD_20200928T224329_0700_01/ECOv002_L3T_SM_PT-JPL_12653_004_10SGD_20200928T224329_0700_01_SM.tif'
NDVI_filename = '/home/jovyan/shared/2022-ecostress-workshop/ECOv002_L2T_STARS_12653_004_10SGD_20200928_0700_01/ECOv002_L2T_STARS_12653_004_10SGD_20200928_0700_01_NDVI.tif'

Let’s use rioxarray to open the surface temperature layer from the L2T_LSTE product on the 10SGD tile covering the Dangermond Preserve and take a first look at this image using hvplot.

ET_image = rioxarray.open_rasterio(ET_filename, chunks='auto').squeeze('band', drop=True)
ET_image

The ECOSTRESS Collection 2 tiled products are gridded in local UTM, following Sentinel convention, and this gridding is sampled at 70 m.

crs = ET_image.rio.crs
print(f"CRS: {crs}")
cell_width, cell_height = ET_image.rio.resolution()
print(f"resolution: {cell_width} m")

centroid_UTM = Point(np.nanmean(ET_image.x), np.nanmean(ET_image.y))
centroid_latlon = shapely.ops.transform(Transformer.from_crs(crs, "EPSG:4326", always_xy=True).transform, centroid_UTM)
centroid_latlon.wkt

The date for this ECOSTRESS scene is September 28th, 2020. This overpass was around 2:45 in the afternoon.

dt = datetime.strptime(basename(ET_filename).split("_")[-4], "%Y%m%dT%H%M%S")
dt_solar = dt + timedelta(hours=(np.radians(centroid_latlon.x) / np.pi * 12))
print(f"date/time UTC: {dt:%Y-%m-%d %H:%M:%S}")
print(f"date/time solar: {dt_solar:%Y-%m-%d %H:%M:%S}")

To plot this image on top of a basemap, we’ll reproject it on the fly to match the basemap.

ET_CMAP = [
    "#f6e8c3",
    "#d8b365",
    "#99974a",
    "#53792d",
    "#6bdfd2",
    "#1839c5"
]

ET_map = ET_image.rio.reproject("EPSG:3857").hvplot.image(
    cmap=ET_CMAP, 
    tiles="ESRI", 
    alpha=0.7,
    width=FIG_WIDTH_PX,
    height=FIG_HEIGHT_PX,
    clim=(ET_image.quantile(0.02), ET_image.quantile(0.98)),
    title=basename(ET_filename)
)

ET_map

ST_image = rioxarray.open_rasterio(ST_filename).squeeze("band", drop=True)

ST_CMAP = "jet"

attrs = ST_image.attrs
ST_image -= 273.15
ST_image.attrs = attrs

ST_map = ST_image.rio.reproject("EPSG:3857").hvplot.image(
    cmap=ST_CMAP, 
    tiles="ESRI", 
    alpha=0.7,
    width=FIG_WIDTH_PX,
    height=FIG_HEIGHT_PX,
    clim=(ST_image.quantile(0.02), ST_image.quantile(0.98)),
    title=basename(ST_filename)
)

ST_map

SM_image = rioxarray.open_rasterio(SM_filename).squeeze("band", drop=True)

SM_CMAP = [
    "#f6e8c3",
    "#d8b365",
    "#99894a",
    "#2d6779",
    "#6bdfd2",
    "#1839c5"
]

SM_map = SM_image.rio.reproject("EPSG:3857").hvplot.image(
    cmap=SM_CMAP, 
    tiles="ESRI", 
    alpha=0.7,
    width=FIG_WIDTH_PX,
    height=FIG_HEIGHT_PX,
    clim=(SM_image.quantile(0.02), SM_image.quantile(0.98)),
    title=basename(SM_filename)
)

SM_map

NDVI_image = rioxarray.open_rasterio(NDVI_filename).squeeze("band", drop=True)

NDVI_CMAP = [
    "#0000ff",
    "#000000",
    "#745d1a",
    "#e1dea2",
    "#45ff01",
    "#325e32"
]

NDVI_map = NDVI_image.rio.reproject("EPSG:3857").hvplot.image(
    cmap=NDVI_CMAP, 
    tiles="ESRI", 
    alpha=0.7,
    width=FIG_WIDTH_PX,
    height=FIG_HEIGHT_PX,
    clim=(NDVI_image.quantile(0.02), NDVI_image.quantile(0.98)),
    title=basename(NDVI_filename)
)

NDVI_map

Let’s open our ground observations provided by the Nature Conservancy. TNC recorded observations of tree health for each tree and the location of the tree in latitude and longitude. These are qualitative categories of good, poor, and dead.

TNC_fall_2020 = gpd.read_file("../data/TNC_fall_2020.geojson")
print(TNC_fall_2020.crs)
TNC_fall_2020.total_bounds

To compare our in situ point data with the projected raster data, we need to project these coordinates into the local UTM projection of the raster image.

TNC_fall_2020 = TNC_fall_2020.to_crs(ET_image.rio.crs)
print(TNC_fall_2020.crs)

TNC_fall_2020.head()

TNC_fall_2020.total_bounds

Let’s map out the in situ data. We’ll again reproject these points on the fly to match the basemap. Let’s assign colors to these categories as well, which we’ll use throughout the notebook.

# is there a way to get the point colors in a legend?
tree_palette = {
    "dead": "black",
    "poor": "red",
    "other": "white",
    "good": "green"
}

TNC_fall_2020.to_crs("EPSG:3857").hvplot.points(
    color=TNC_fall_2020["health"].apply(lambda health: tree_palette[health]), 
    tiles="ESRI", 
    size=1.5, 
    width=FIG_WIDTH_PX, 
    height=FIG_HEIGHT_PX,
    title="The Nature Conservancy Fall 2020 Tree Survey"
)

In this projected space, let’s get the bounds of our study area in meters from the convex hull of our observation locations with a 100 meter buffer.

xmin, ymin, xmax, ymax = TNC_fall_2020.unary_union.convex_hull.buffer(100).bounds
xmin, ymin, xmax, ymax

TNC_fall_2020.buffer(100).total_bounds ### Similar to above...

Let’s look at the tree health points overlayed on top of ECOSTRESS evapotranspiration.

ET_subset = ET_image.rio.clip([box(xmin, ymin, xmax, ymax)])

raster_map = ET_subset.rio.reproject("EPSG:3857").hvplot.image(
    cmap=ET_CMAP, 
    tiles="OSM", 
    alpha=0.7,
    width=FIG_WIDTH_PX,
    height=FIG_HEIGHT_PX,
    clim=(ET_subset.quantile(0.02), ET_subset.quantile(0.98))
)

point_map = TNC_fall_2020.to_crs("EPSG:3857").hvplot.points(
    color=TNC_fall_2020["health"].apply(lambda health: tree_palette[health]), 
    size=1.5, 
    alpha=0.7,
    width=FIG_WIDTH_PX, 
    height=FIG_HEIGHT_PX
)

raster_map * point_map

Now let’s look at the tree health points on top of ECOSTRESS surface temperature.

ST_subset = ST_image.rio.clip([box(xmin, ymin, xmax, ymax)])

raster_map = ST_subset.rio.reproject("EPSG:3857").hvplot.image(
    cmap=ST_CMAP, 
    tiles="OSM", 
    alpha=0.7,
    width=FIG_WIDTH_PX,
    height=FIG_HEIGHT_PX,
    clim=(ST_subset.quantile(0.02), ST_subset.quantile(0.98)),
    title="ECOSTRESS Surface Temperature and in situ Tree Health"
)

point_map = TNC_fall_2020.to_crs("EPSG:3857").hvplot.points(
    color=TNC_fall_2020["health"].apply(lambda health: tree_palette[health]), 
    size=1.5, 
    alpha=0.7,
    width=FIG_WIDTH_PX, 
    height=FIG_HEIGHT_PX
)

raster_map * point_map

And let’s look at tree health on top of soil moisture.

SM_subset = SM_image.rio.clip([box(xmin, ymin, xmax, ymax)])

raster_map = SM_subset.rio.reproject("EPSG:3857").hvplot.image(
    cmap=SM_CMAP, 
    tiles="OSM", 
    alpha=0.7,
    width=FIG_WIDTH_PX,
    height=FIG_HEIGHT_PX,
    clim=(SM_subset.quantile(0.02), SM_subset.quantile(0.98)),
    title="ECOSTRESS Soil Moisture and in situ Tree Health"
)

point_map = TNC_fall_2020.to_crs("EPSG:3857").hvplot.points(
    color=TNC_fall_2020["health"].apply(lambda health: tree_palette[health]), 
    size=1.5, 
    alpha=0.7,
    width=FIG_WIDTH_PX, 
    height=FIG_HEIGHT_PX
)

raster_map * point_map

And here’s tree health on top of vegetation index.

NDVI_subset = NDVI_image.rio.clip([box(xmin, ymin, xmax, ymax)])

raster_map = NDVI_subset.rio.reproject("EPSG:3857").hvplot.image(
    cmap=NDVI_CMAP, 
    tiles="OSM", 
    alpha=0.7,
    width=FIG_WIDTH_PX,
    height=FIG_HEIGHT_PX,
    clim=(NDVI_subset.quantile(0.02), NDVI_subset.quantile(0.98)),
    title="ECOSTRESS Vegetation Index and in situ Tree Health"
)

point_map = TNC_fall_2020.to_crs("EPSG:3857").hvplot.points(
    color=TNC_fall_2020["health"].apply(lambda health: tree_palette[health]), 
    size=1.5, 
    alpha=0.7,
    width=FIG_WIDTH_PX, 
    height=FIG_HEIGHT_PX
)

raster_map * point_map

To match these datasets together, let’s interpolate the ECOSTRESS rasters for evapotranspiration, surface temperature, soil moisture, and vegetation index to the Nature Conservency tree locations and make these sampled remote sensing data new columns in our table of tree observations.

TNC_fall_2020["ET"] = rasterstats.point_query(
    vectors=TNC_fall_2020.geometry,
    raster=np.array(ET_subset),
    nodata=np.nan,
    affine=ET_subset.rio.transform()
)

TNC_fall_2020["ST"] = rasterstats.point_query(
    vectors=TNC_fall_2020.geometry,
    raster=np.array(ST_subset),
    nodata=np.nan,
    affine=ST_subset.rio.transform()
)

TNC_fall_2020["SM"] = rasterstats.point_query(
    vectors=TNC_fall_2020.geometry,
    raster=np.array(SM_subset),
    nodata=np.nan,
    affine=SM_subset.rio.transform()
)

TNC_fall_2020["NDVI"] = rasterstats.point_query(
    vectors=TNC_fall_2020.geometry,
    raster=np.array(NDVI_subset),
    nodata=np.nan,
    affine=NDVI_subset.rio.transform()
)

TNC_fall_2020 = TNC_fall_2020[["health", "ET", "ST", "SM", "NDVI", "geometry"]]
TNC_fall_2020.head()

Let’s plot the distributions of ECOSTRESS evapotranspiration for each of the tree health categories. The dead tree points match to a lower distribution of interpolated ECOSTRESS evapotranspiration estimates.

fig = plt.figure(figsize=(FIG_WIDTH_IN, FIG_HEIGHT_IN))
ax = sns.boxplot(
    x=TNC_fall_2020.health, 
    y=TNC_fall_2020.ET,
    palette=tree_palette
)
ax.set(ylabel="ECOSTRESS Evapotranspiration (mm/day)", xlabel="In Situ Tree Health")
plt.title("Distribution of ECOSTRESS Evapotranspiration by Tree Health")
plt.show()
plt.close(fig)

Let’s look at the distribution of ECOSTRESS surface temperature according to in situ tree health. The dead trees appear to have higher temperatures than other categories.

fig = plt.figure(figsize=(FIG_WIDTH_IN, FIG_HEIGHT_IN))
ax = sns.boxplot(
    x=TNC_fall_2020.health, 
    y=TNC_fall_2020.ST,
    palette=tree_palette
)
ax.set(ylabel="ECOSTRESS Surface Temperature (Celsius)", xlabel="In Situ Tree Health")
plt.title("Distribution of ECOSTRESS Surface Temperature by Tree Health")
plt.show()
plt.close(fig)

Looking at this again with ECOSTRESS soil moisture, the dead trees appear to be associated with lower soil moisture estimates.

fig = plt.figure(figsize=(FIG_WIDTH_IN, FIG_HEIGHT_IN))
ax = sns.boxplot(
    x=TNC_fall_2020.health, 
    y=TNC_fall_2020.SM,
    palette=tree_palette
)
ax.set(ylabel="ECOSTRESS Soil Moisture ($m^3/m^3$)", xlabel="In Situ Tree Health")
plt.title("Distribution of ECOSTRESS Soil Moisture by Tree Health")
plt.show()
plt.close(fig)

And the range of ECOSTRESS vegetation index at the dead trees appears to be lower than other categories.

fig = plt.figure(figsize=(FIG_WIDTH_IN, FIG_HEIGHT_IN))
ax = sns.boxplot(
    x=TNC_fall_2020.health, 
    y=TNC_fall_2020.NDVI,
    palette=tree_palette
)
ax.set(ylabel="ECOSTRESS Normalized Difference Vegetation Index", xlabel="In Situ Tree Health")
plt.title("Distribution of ECOSTRESS Vegetation Index by Tree Health")
plt.show()
plt.close(fig)

Let’s also approach this match-up the other way, by rasterizing the in situ point data. We’ll spatially bin counts of the tree health categories within each 70 m ECOSTRESS pixel and calculate the ratio of these category counts to the total count of observations in each pixel. Let’s start with rasterizing the total counts first.

x_bins = np.sort(ET_subset.x)
x_bins = np.append(x_bins, x_bins[-1] + (x_bins[-1] - x_bins[-2]))
y_bins = np.sort(ET_subset.y)
y_bins = np.insert(y_bins, 0, y_bins[0] + (y_bins[0] - y_bins[1]))

x = TNC_fall_2020.geometry.apply(lambda point: point.x)
y = TNC_fall_2020.geometry.apply(lambda point: point.y)

counts, _, _, = np.histogram2d(
    x=x,
    y=y,
    bins=(x_bins, y_bins)
)

counts = np.rot90(counts)
counts = np.where(counts <= 0, np.nan, counts)

counts_image = xarray.core.dataarray.DataArray(
    data=counts,
    coords=ET_subset.coords,
    dims=ET_subset.dims,
    attrs=ET_subset.attrs
)

counts_map = counts_image.rio.reproject("EPSG:3857").hvplot.image(
    cmap="jet",
    tiles="OSM",
    alpha=0.7,
    width=FIG_WIDTH_PX,
    height=FIG_HEIGHT_PX,
    title="Total Count of Tree Observations within Each 70 m ECOSTRESS Pixel"
)

Now we’ll count the number of dead trees in each pixel and divide that by the total count to get a proportion of dead trees in each pixel. This proportion of dead trees appears to range from 5% to 25% across the rasterized image.

dead_gdf = TNC_fall_2020[TNC_fall_2020.health == "dead"]
x = dead_gdf.geometry.apply(lambda point: point.x)
y = dead_gdf.geometry.apply(lambda point: point.y)

counts, _, _, = np.histogram2d(
    x=x,
    y=y,
    bins=(x_bins, y_bins)
)

counts = np.rot90(counts)
counts = np.where(counts <= 0, np.nan, counts)

dead_counts_image = xarray.core.dataarray.DataArray(
    data=counts,
    coords=ET_subset.coords,
    dims=ET_subset.dims,
    attrs=ET_subset.attrs
)

dead_counts_image.rio.reproject("EPSG:3857").hvplot.image(cmap="jet", tiles="OSM", alpha=0.7)
dead_proportion_image = dead_counts_image / counts_image
dead_proportion_image.attrs = ET_subset.attrs

dead_proportion_map = dead_proportion_image.rio.reproject("EPSG:3857").hvplot.image(
    cmap="jet",
    tiles="OSM",
    alpha=0.7,
    width=FIG_WIDTH_PX,
    height=FIG_HEIGHT_PX
)

dead_proportion_map

We’ll do this same rasterization for good trees.

good_gdf = TNC_fall_2020[TNC_fall_2020.health == "good"]
x = good_gdf.geometry.apply(lambda point: point.x)
y = good_gdf.geometry.apply(lambda point: point.y)

counts, _, _, = np.histogram2d(
    x=x,
    y=y,
    bins=(x_bins, y_bins)
)

counts = np.rot90(counts)
counts = np.where(counts <= 0, np.nan, counts)

good_counts_image = xarray.core.dataarray.DataArray(
    data=counts,
    coords=ET_subset.coords,
    dims=ET_subset.dims,
    attrs=ET_subset.attrs
)

good_counts_image.rio.reproject("EPSG:3857").hvplot.image(cmap="jet", tiles="OSM", alpha=0.7)
good_proportion_image = good_counts_image / counts_image
good_proportion_image.attrs = ET_subset.attrs

good_proportion_map = good_proportion_image.rio.reproject("EPSG:3857").hvplot.image(
    cmap="jet", 
    tiles="OSM", 
    alpha=0.7,
    width=FIG_WIDTH_PX,
    height=FIG_HEIGHT_PX
)

good_proportion_map

poor_gdf = TNC_fall_2020[TNC_fall_2020.health == "poor"]
x = poor_gdf.geometry.apply(lambda point: point.x)
y = poor_gdf.geometry.apply(lambda point: point.y)

counts, _, _, = np.histogram2d(
    x=x,
    y=y,
    bins=(x_bins, y_bins)
)

counts = np.rot90(counts)
counts = np.where(counts <= 0, np.nan, counts)

poor_counts_image = xarray.core.dataarray.DataArray(
    data=counts,
    coords=ET_subset.coords,
    dims=ET_subset.dims,
    attrs=ET_subset.attrs
)

poor_counts_image.rio.reproject("EPSG:3857").hvplot.image(cmap="jet", tiles="OSM", alpha=0.7)
poor_proportion_image = poor_counts_image / counts_image
poor_proportion_image.attrs = ET_subset.attrs

poor_proportion_map = poor_proportion_image.rio.reproject("EPSG:3857").hvplot.image(
    cmap="jet", 
    tiles="OSM", 
    alpha=0.7,
    width=FIG_WIDTH_PX,
    height=FIG_HEIGHT_PX
)

poor_proportion_map

Now let’s bring it all together by collapsing each of these images into columns of a table so we can analyze these matching data points.

proportion_table = pd.DataFrame({
    "dead": dead_proportion_image.values.ravel(), 
    "poor": poor_proportion_image.values.ravel(), 
    "good": good_proportion_image.values.ravel(), 
    "ET": ET_subset.values.ravel(),
    "ST": ST_subset.values.ravel(),
    "SM": SM_subset.values.ravel(),
    "NDVI": NDVI_subset.values.ravel()
}).dropna()

proportion_table = proportion_table[np.abs(zscore(proportion_table.ET)) < 2]
proportion_table.head()

Let’s investigate the correlations between these variables with a correlogram. Focusing on dead trees, it seems that there are rather weak relationships between the dead tree observations and the evapotranspiration input variables, but there is an inverse correlation between the dead tree observations and the evapotranspiration estimate itself.

fig = plt.figure(figsize=(FIG_WIDTH_IN, FIG_HEIGHT_IN))
ax = sns.heatmap(proportion_table.corr(), annot=True)
plt.title("Correlogram of ECOSTRESS Products to Tree Health Proportions")
plt.show()
plt.close(fig)

We can break down these correlation coefficients in to individual scatterplots to better see how each pair of variables line up together.

fig = plt.figure()
ax = sns.pairplot(proportion_table)
plt.title("Pair-Plot of ECOSTRESS Products to Tree Health Proportions")
plt.show()
plt.close(fig)

Let’s focus on the strongest relationship involving dead trees and ECOSTRESS evapotranspiration. We can visualize this inverse correlation with a scatterplot and decreasing trendline.

fig = plt.figure(figsize=(FIG_WIDTH_IN, FIG_HEIGHT_IN))
ax = sns.regplot(x=proportion_table.dead, y=proportion_table.ET)
ax.set(ylabel="ECOSTRESS Evapotranspiration (mm/day)", xlabel="Proportion of Dead Tree Observations")
plt.title("Scatterplot of ECOSTRESS Evapotranspiration to Proportion of Dead Tree Observations")
plt.show()
plt.close(fig)