Reproducing Key Figures from Kay et al. (2015) Paper¶
Introduction¶
This Jupyter Notebook demonstrates how one might use the NCAR Community Earth System Model (CESM) Large Ensemble (LENS) data hosted on AWS S3 (doi:10.26024/wt24-5j82). The notebook shows how to reproduce figures 2 and 4 from the Kay et al. (2015) paper describing the CESM LENS dataset (doi:10.1175/BAMS-D-13-00255.1)
This resource is intended to be helpful for people not familiar with elements of the Pangeo framework including Jupyter Notebooks, Xarray, and Zarr data format, or with the original paper, so it includes additional explanation.
Set up environment¶
1 2 3 4 5 6 7 8 | import warnings warnings.filterwarnings("ignore") import intake import numpy as np import pandas as pd import xarray as xr |
Create and Connect to Dask Distributed Cluster¶
1 2 3 4 5 6 7 8 9 10 11 12 | from dask.distributed import Client # Create cluster from dask_gateway import Gateway gateway = Gateway() cluster = gateway.new_cluster() cluster.adapt(minimum=2, maximum=100) # Connect to cluster client = Client(cluster) # Display cluster dashboard URL cluster |
☝️ Link to scheduler dashboard will appear above.
Load data into xarray from a catalog using intake-esm¶
1 2 3 4 | # Open collection description file catalog_url = "https://ncar-cesm-lens.s3-us-west-2.amazonaws.com/catalogs/aws-cesm1-le.json" col = intake.open_esm_datastore(catalog_url) col |
1 2 | # Show the first few lines of the catalog col.df.head(10) |
1 2 3 4 5 6 7 | # Show expanded version of collection structure with details import pprint uniques = col.unique( columns=["component", "frequency", "experiment", "variable"] ) pprint.pprint(uniques, compact=True, indent=4) |
Extract data needed to construct Figure 2 of Kay et al. paper¶
Search the catalog to find the desired data, in this case the reference height temperature of the atmosphere, at daily time resolution, for the Historical, 20th Century, and RCP8.5 (IPCC Representative Concentration Pathway 8.5) experiments
1 2 3 4 5 6 7 8 | col_subset = col.search( frequency=["daily", "monthly"], component="atm", variable="TREFHT", experiment=["20C", "RCP85", "HIST"], ) col_subset |
1 | col_subset.df |
1 2 3 4 5 | # Load catalog entries for subset into a dictionary of xarray datasets dsets = col_subset.to_dataset_dict( zarr_kwargs={"consolidated": True}, storage_options={"anon": True} ) print(f"\nDataset dictionary keys:\n {dsets.keys()}") |
1 2 3 4 | # Define Xarray datasets corresponding to the three experiments ds_HIST = dsets["atm.HIST.monthly"] ds_20C = dsets["atm.20C.daily"] ds_RCP85 = dsets["atm.RCP85.daily"] |
1 2 3 4 5 6 7 8 | # Use Dask.Distributed utility function to display size of each dataset from distributed.utils import format_bytes print( f"Historical: {format_bytes(ds_HIST.nbytes)}\n" f"20th Century: {format_bytes(ds_20C.nbytes)}\n" f"RCP8.5: {format_bytes(ds_RCP85.nbytes)}" ) |
1 2 3 4 5 | # Extract the Reference Height Temperature data variable t_hist = ds_HIST["TREFHT"] t_20c = ds_20C["TREFHT"] t_rcp = ds_RCP85["TREFHT"] t_20c |
1 2 3 | # The global surface temperature anomaly was computed relative to the 1961-90 base period # in the Kay et al. paper, so extract that time slice t_ref = t_20c.sel(time=slice("1961", "1990")) |
Read grid cell areas¶
Cell size varies with latitude, so this must be accounted for when computing the global mean.
1 2 3 4 5 | cat = col.search(frequency="static", component="atm", experiment=["20C"]) _, grid = cat.to_dataset_dict( aggregate=False, zarr_kwargs={"consolidated": True} ).popitem() grid |
1 2 3 | cell_area = grid.area.load() total_area = cell_area.sum() cell_area |
Define weighted means¶
Note: resample(time="AS")
does an Annual resampling based on Start of calendar
year.
See documentation for Pandas resampling options
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 | t_ref_ts = ( (t_ref.resample(time="AS").mean("time") * cell_area).sum(dim=("lat", "lon")) / total_area ).mean(dim=("time", "member_id")) t_hist_ts = ( (t_hist.resample(time="AS").mean("time") * cell_area).sum( dim=("lat", "lon") ) ) / total_area t_20c_ts = ( (t_20c.resample(time="AS").mean("time") * cell_area).sum(dim=("lat", "lon")) ) / total_area t_rcp_ts = ( (t_rcp.resample(time="AS").mean("time") * cell_area).sum(dim=("lat", "lon")) ) / total_area |
Read data and compute means¶
Note: Dask's "lazy execution" philosophy means that until this point we have not
actually read the bulk of the data. These steps take a while to complete, so we
include the Notebook "cell magic" directive %%time
to display elapsed and CPU
times after computation.
1 2 3 | %%time t_ref_mean = t_ref_ts.load() t_ref_mean |
1 2 3 | %%time t_hist_ts_df = t_hist_ts.to_series().T t_hist_ts_df.head() |
1 2 3 | %%time t_20c_ts_df = t_20c_ts.to_series().unstack().T t_20c_ts_df.head() |
1 2 3 | %%time t_rcp_ts_df = t_rcp_ts.to_series().unstack().T t_rcp_ts_df.head() |
Get observations for figure 2 (HadCRUT4; Morice et al. 2012)¶
1 2 | # Observational time series data for comparison with ensemble average obsDataURL = "https://www.esrl.noaa.gov/psd/thredds/dodsC/Datasets/cru/hadcrut4/air.mon.anom.median.nc" |
1 2 | ds = xr.open_dataset(obsDataURL).load() ds |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 | def weighted_temporal_mean(ds): """ weight by days in each month """ time_bound_diff = ds.time_bnds.diff(dim="nbnds")[:, 0] wgts = time_bound_diff.groupby("time.year") / time_bound_diff.groupby( "time.year" ).sum(xr.ALL_DIMS) np.testing.assert_allclose(wgts.groupby("time.year").sum(xr.ALL_DIMS), 1.0) obs = ds["air"] cond = obs.isnull() ones = xr.where(cond, 0.0, 1.0) obs_sum = (obs * wgts).resample(time="AS").sum(dim="time") ones_out = (ones * wgts).resample(time="AS").sum(dim="time") obs_s = (obs_sum / ones_out).mean(("lat", "lon")).to_series() return obs_s |
- Limit Observations to 20th Century
1 2 3 | obs_s = weighted_temporal_mean(ds) obs_s = obs_s["1920":] obs_s.head() |
1 2 3 | all_ts_anom = pd.concat([t_20c_ts_df, t_rcp_ts_df]) - t_ref_mean.data years = [val.year for val in all_ts_anom.index] obs_years = [val.year for val in obs_s.index] |
1 2 3 4 5 6 | # Combine ensemble member 1 data from historical and 20th century experiments hist_anom = t_hist_ts_df - t_ref_mean.data member1 = pd.concat( [hist_anom.iloc[:-2], all_ts_anom.iloc[:, 0]], verify_integrity=True ) member1_years = [val.year for val in member1.index] |
Figure 2: Global surface temperature anomaly (1961-90 base period) for individual ensemble members, and observations¶
1 | import matplotlib.pyplot as plt |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 | ax = plt.axes() ax.tick_params( right=True, top=True, direction="out", length=6, width=2, grid_alpha=0.5 ) ax.plot(years, all_ts_anom.iloc[:, 1:], color="grey") ax.plot(obs_years, obs_s["1920":], color="red") ax.plot(member1_years, member1, color="black") ax.text( 0.35, 0.4, "observations", verticalalignment="bottom", horizontalalignment="left", transform=ax.transAxes, color="red", fontsize=10, ) ax.text( 0.35, 0.33, "members 2-40", verticalalignment="bottom", horizontalalignment="left", transform=ax.transAxes, color="grey", fontsize=10, ) ax.text( 0.05, 0.2, "member 1", verticalalignment="bottom", horizontalalignment="left", transform=ax.transAxes, color="black", fontsize=10, ) ax.set_xticks([1850, 1920, 1950, 2000, 2050, 2100]) plt.ylim(-1, 5) plt.xlim(1850, 2100) plt.ylabel("Global Surface\nTemperature Anomaly (K)") plt.show() |
Figure will appear above when ready. Compare with Fig.2 of Kay et al. 2015 (doi:10.1175/BAMS-D-13-00255.1)
Compute linear trend for winter seasons¶
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 | def linear_trend(da, dim="time"): da_chunk = da.chunk({dim: -1}) trend = xr.apply_ufunc( calc_slope, da_chunk, vectorize=True, input_core_dims=[[dim]], output_core_dims=[[]], output_dtypes=[np.float], dask="parallelized", ) return trend def calc_slope(y): """ufunc to be used by linear_trend""" x = np.arange(len(y)) # drop missing values (NaNs) from x and y finite_indexes = ~np.isnan(y) slope = ( np.nan if (np.sum(finite_indexes) < 2) else np.polyfit(x[finite_indexes], y[finite_indexes], 1)[0] ) return slope |
Compute ensemble trends¶
1 2 3 4 | t = xr.concat([t_20c, t_rcp], dim="time") seasons = t.sel(time=slice("1979", "2012")).resample(time="QS-DEC").mean("time") # Include only full seasons from 1979 and 2012 seasons = seasons.sel(time=slice("1979", "2012")).load() |
1 2 3 4 5 6 7 8 9 10 11 | winter_seasons = seasons.sel( time=seasons.time.where(seasons.time.dt.month == 12, drop=True) ) winter_trends = linear_trend( winter_seasons.chunk({"lat": 20, "lon": 20, "time": -1}) ).load() * len(winter_seasons.time) # Compute ensemble mean from the first 30 members winter_trends_mean = winter_trends.isel(member_id=range(30)).mean( dim="member_id" ) |
1 2 | # Make sure that we have 34 seasons assert len(winter_seasons.time) == 34 |
Get Observations for Figure 4 (NASA GISS GisTemp)¶
1 2 3 4 5 | # Observational time series data for comparison with ensemble average # NASA GISS Surface Temperature Analysis, https://data.giss.nasa.gov/gistemp/ obsDataURL = ( "https://data.giss.nasa.gov/pub/gistemp/gistemp1200_GHCNv4_ERSSTv5.nc.gz" ) |
1 2 3 4 5 6 7 8 9 10 11 | # Download, unzip, and load file import os os.system("wget " + obsDataURL) obsDataFileName = obsDataURL.split("/")[-1] os.system("gunzip " + obsDataFileName) obsDataFileName = obsDataFileName[:-3] ds = xr.open_dataset(obsDataFileName).load() ds |
1 2 3 | # Remap longitude range from [-180, 180] to [0, 360] for plotting purposes ds = ds.assign_coords(lon=((ds.lon + 360) % 360)).sortby("lon") ds |
Compute observed trends¶
1 2 3 4 5 6 7 8 9 10 11 | obs_seasons = ( ds.sel(time=slice("1979", "2012")).resample(time="QS-DEC").mean("time") ) # Include only full seasons from 1979 through 2012 obs_seasons = obs_seasons.sel(time=slice("1979", "2012")).load() # Compute observed winter trends obs_winter_seasons = obs_seasons.sel( time=obs_seasons.time.where(obs_seasons.time.dt.month == 12, drop=True) ) obs_winter_seasons |
1 2 3 4 | obs_winter_trends = linear_trend( obs_winter_seasons.chunk({"lat": 20, "lon": 20, "time": -1}) ).load() * len(obs_winter_seasons.time) obs_winter_trends |
Figure 4: Global maps of historical (1979 - 2012) boreal winter (DJF) surface air trends¶
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 | import cartopy.crs as ccrs import cmaps # for NCL colormaps import matplotlib.pyplot as plt contour_levels = [ -6, -5, -4, -3, -2, -1.5, -1, -0.5, 0, 0.5, 1, 1.5, 2, 3, 4, 5, 6, ] color_map = cmaps.ncl_default |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 | def make_map_plot(nplot_rows, nplot_cols, plot_index, data, plot_label): """ Create a single map subplot. """ ax = plt.subplot( nplot_rows, nplot_cols, plot_index, projection=ccrs.Robinson(central_longitude=180), ) cplot = plt.contourf( lons, lats, data, levels=contour_levels, cmap=color_map, extend="both", transform=ccrs.PlateCarree(), ) ax.coastlines(color="grey") ax.text(0.01, 0.01, plot_label, fontSize=14, transform=ax.transAxes) return cplot, ax |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 | # Generate plot (may take a while as many individual maps are generated) numPlotRows = 8 numPlotCols = 4 figWidth = 20 figHeight = 30 fig, axs = plt.subplots(numPlotRows, numPlotCols, figsize=(figWidth, figHeight)) lats = winter_trends.lat lons = winter_trends.lon # Create ensemble member plots for ensemble_index in range(30): plot_data = winter_trends.isel(member_id=ensemble_index) plot_index = ensemble_index + 1 plot_label = str(plot_index) plotRow = ensemble_index // numPlotCols plotCol = ensemble_index % numPlotCols # Retain axes objects for figure colorbar cplot, axs[plotRow, plotCol] = make_map_plot( numPlotRows, numPlotCols, plot_index, plot_data, plot_label ) # Create plots for the ensemble mean, observations, and a figure color bar. cplot, axs[7, 2] = make_map_plot( numPlotRows, numPlotCols, 31, winter_trends_mean, "EM" ) lats = obs_winter_trends.lat lons = obs_winter_trends.lon cplot, axs[7, 3] = make_map_plot( numPlotRows, numPlotCols, 32, obs_winter_trends.tempanomaly, "OBS" ) cbar = fig.colorbar( cplot, ax=axs, orientation="horizontal", shrink=0.7, pad=0.02 ) cbar.ax.set_title( "1979-2012 DJF surface air temperature trends (K/34 years)", fontSize=16 ) cbar.set_ticks(contour_levels) cbar.set_ticklabels(contour_levels) |
Figure will appear above when ready. Compare with Fig. 4 of Kay et al. 2015 (doi:10.1175/BAMS-D-13-00255.1).
1 2 3 | # Gracefully destroy/close our cluster client.close() cluster.close() |