Ice Extent over years¶

1979 to 2023¶

The files used provide the total extent of ice sheets for each day for the entire time period for both hemispheres. The data was obtained from National Snow and Ice Data Center, a part of CIRES at the University of Colorado Boulder. We are interested in the columns: "Year", "Month", "Day", "Extent". The column "Extent" has values in million square kilometers.

We intend to observe the change in the size of the ice sheets over the given time period. We removed the data for 1978 as there was only two months of data and that is not representative of the whole year.

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import watermark
import requests
import pathlib
import zipfile
sns.set_theme()

Arctic¶

# if the data is not available, download it
# if Datasets directory is not available, create it
pathlib.Path("Datasets").mkdir(exist_ok=True)

# check the files in the Datasets directory
path = pathlib.Path("Datasets")
files = [file.name for file in path.iterdir()]
if "N_seaice_extent_daily_v3.0.csv" in files:
    print("The data is available")
else:

    # read the data
    url = "https://noaadata.apps.nsidc.org/NOAA/G02135/north/daily/data/N_seaice_extent_daily_v3.0.csv"
    r = requests.get(url)
    with open("Datasets/N_seaice_extent_daily_v3.0.csv", "wb") as f:
        f.write(r.content)

# Display the data
n_seaice = pd.read_csv('Datasets/N_seaice_extent_daily_v3.0.csv', header=0)
n_seaice.head(10)

	Year	Month	Day	Extent	Missing	Source Data
0	YYYY	MM	DD	10^6 sq km	10^6 sq km	Source data product web sites: http://nsidc.o...
1	1978	10	26	10.231	0.000	['/ecs/DP1/PM/NSIDC-0051.001/1978.10.26/nt_19...
2	1978	10	28	10.420	0.000	['/ecs/DP1/PM/NSIDC-0051.001/1978.10.28/nt_19...
3	1978	10	30	10.557	0.000	['/ecs/DP1/PM/NSIDC-0051.001/1978.10.30/nt_19...
4	1978	11	01	10.670	0.000	['/ecs/DP1/PM/NSIDC-0051.001/1978.11.01/nt_19...
5	1978	11	03	10.777	0.000	['/ecs/DP1/PM/NSIDC-0051.001/1978.11.03/nt_19...
6	1978	11	05	10.968	0.000	['/ecs/DP1/PM/NSIDC-0051.001/1978.11.05/nt_19...
7	1978	11	07	11.080	0.000	['/ecs/DP1/PM/NSIDC-0051.001/1978.11.07/nt_19...
8	1978	11	09	11.189	0.000	['/ecs/DP1/PM/NSIDC-0051.001/1978.11.09/nt_19...
9	1978	11	11	11.314	0.000	['/ecs/DP1/PM/NSIDC-0051.001/1978.11.11/nt_19...

# Name of the columns
n_seaice.columns

Index(['Year', ' Month', ' Day', '     Extent', '    Missing', ' Source Data'], dtype='object')

# Dropping the Source Data colunm
n_seaice_clean =(n_seaice
                 .drop([' Source Data'], axis=1)
                 .drop([0], axis=0)
                 )
n_seaice_clean.head()

	Year	Month	Day	Extent
1	1978	10	26	10.231
2	1978	10	28	10.420
3	1978	10	30	10.557
4	1978	11	01	10.670
5	1978	11	03	10.777

# Explor the data shape
n_seaice_clean.shape

(14905, 5)

# Basic information about the data set
n_seaice_clean.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14905 entries, 1 to 14905
Data columns (total 5 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   Year         14905 non-null  object
 1    Month       14905 non-null  object
 2    Day         14905 non-null  object
 3        Extent  14905 non-null  object
 4       Missing  14905 non-null  object
dtypes: object(5)
memory usage: 582.4+ KB

# Change the Year, Month, Day, Missing and Extent columns to integers
n_seaice_clean['Year'] = n_seaice_clean['Year'].astype(int)
n_seaice_clean['Month'] = n_seaice_clean[' Month'].astype(int)
n_seaice_clean['Day'] = n_seaice_clean[' Day'].astype(int)
n_seaice_clean['Extent'] = n_seaice_clean['     Extent'].astype(float)
n_seaice_clean['Missing'] = n_seaice_clean['    Missing'].astype(float)
n_seaice_clean =(n_seaice_clean
                 .drop([' Month'], axis=1)
                 .drop([' Day'], axis=1)
                 .drop(['     Extent'], axis=1)
                 .drop(['    Missing'], axis=1)
                 )
n_seaice_clean['Date'] = pd.to_datetime(n_seaice_clean[['Year', 'Month', 'Day']])
n_seaice_clean.head()

	Year	Month	Day	Extent	Date
1	1978	10	26	10.231	1978-10-26
2	1978	10	28	10.420	1978-10-28
3	1978	10	30	10.557	1978-10-30
4	1978	11	1	10.670	1978-11-01
5	1978	11	3	10.777	1978-11-03

# Basic information about the data afer converting the Yaer colunm to float and dropping unnecessary cols
n_seaice_clean.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14905 entries, 1 to 14905
Data columns (total 6 columns):
 #   Column   Non-Null Count  Dtype         
---  ------   --------------  -----         
 0   Year     14905 non-null  int64         
 1   Month    14905 non-null  int64         
 2   Day      14905 non-null  int64         
 3   Extent   14905 non-null  float64       
 4   Missing  14905 non-null  float64       
 5   Date     14905 non-null  datetime64[ns]
dtypes: datetime64[ns](1), float64(2), int64(3)
memory usage: 698.8 KB

1	n_seaice_clean

	Year	Month	Day	Extent	Missing	Date
1	1978	10	26	10.231	0.0	1978-10-26
2	1978	10	28	10.420	0.0	1978-10-28
3	1978	10	30	10.557	0.0	1978-10-30
4	1978	11	1	10.670	0.0	1978-11-01
5	1978	11	3	10.777	0.0	1978-11-03
...	...	...	...	...	...	...
14901	2024	2	18	14.676	0.0	2024-02-18
14902	2024	2	19	14.776	0.0	2024-02-19
14903	2024	2	20	14.846	0.0	2024-02-20
14904	2024	2	21	14.842	0.0	2024-02-21
14905	2024	2	22	14.823	0.0	2024-02-22

14905 rows × 6 columns

# Data Describtion 
n_seaice_clean.describe()

	Year	Month	Day	Extent	Missing	Date
count	14905.000000	14905.000000	14905.000000	14905.000000	14905.0	14905
mean	2002.988796	6.526669	15.734586	11.233564	0.0	2003-06-28 06:35:08.554176384
min	1978.000000	1.000000	1.000000	3.340000	0.0	1978-10-26 00:00:00
25%	1993.000000	4.000000	8.000000	8.475000	0.0	1993-07-16 00:00:00
50%	2003.000000	7.000000	16.000000	11.969000	0.0	2003-09-28 00:00:00
75%	2013.000000	10.000000	23.000000	14.159000	0.0	2013-12-10 00:00:00
max	2024.000000	12.000000	31.000000	16.635000	0.0	2024-02-22 00:00:00
std	12.207257	3.455468	8.800672	3.316977	0.0	NaN

# Groupby year the data, and get the mean
n_seaice_grouped = n_seaice_clean.groupby(['Year']).mean()
n_seaice_grouped.head()

	Month	Day	Extent	Missing	Date
Year
1978	11.382353	16.617647	12.487000	0.0	1978-11-28
1979	6.527473	15.681319	12.319560	0.0	1979-07-02
1980	6.497268	15.759563	12.334148	0.0	1980-07-01
1981	6.524590	15.759563	12.135486	0.0	1981-07-02
1982	6.527473	15.681319	12.439445	0.0	1982-07-02

# Min & Max year 
yearMin = n_seaice_grouped.index.min()
yearMax = n_seaice_grouped.index.max()
print(yearMin)
print(yearMax)

1978
2024

# remove the first year and the last year

n_seaice_grouped = n_seaice_grouped.drop([yearMin, yearMax])

# plot the data
plt.style.use('seaborn-v0_8')
plt.figure(figsize=(10, 6))
plt.plot(n_seaice_grouped['Extent'], label='Arctic Ics Extent')
plt.xlabel('Year')
plt.ylabel('Ice Extent')
plt.title('Arctic Sea Ice Extent', fontsize=20)
plt.legend()
plt.show()

No description has been provided for this image

# Add regression line
# plot the data
plt.style.use('seaborn-v0_8')
plt.figure(figsize=(10, 6))
plt.plot(n_seaice_grouped['Extent'], label='Arctic Ics Extent', color='black')
plt.xlabel('Year')
plt.ylabel('Ice Extent')
plt.title('Arctic Sea Ice Extent', fontsize=20)
plt.legend()

# Add regression line
z = np.polyfit(n_seaice_grouped.index, n_seaice_grouped['Extent'], 1)
p = np.poly1d(z)
plt.plot(n_seaice_grouped.index, p(n_seaice_grouped.index), "b-")
plt.show()

Antarctic¶

# if the data is not available, download it

# check the files in the Datasets directory
path = pathlib.Path("Datasets")
files = [file.name for file in path.iterdir()]
if "S_seaice_extent_daily_v3.0.csv" in files:
    print("The data is available")
else:
    # read the data
    url = "https://noaadata.apps.nsidc.org/NOAA/G02135/south/daily/data/S_seaice_extent_daily_v3.0.csv"
    r = requests.get(url)
    with open("Datasets/S_seaice_extent_daily_v3.0.csv", "wb") as f:
        f.write(r.content)

s_seaice = pd.read_csv ('Datasets/S_seaice_extent_daily_v3.0.csv', header=0)

s_seaice.head()

	Year	Month	Day	Extent	Missing	Source Data
0	YYYY	MM	DD	10^6 sq km	10^6 sq km	Source data product web sites: http://nsidc.o...
1	1978	10	26	17.624	0.000	['/ecs/DP1/PM/NSIDC-0051.001/1978.10.26/nt_19...
2	1978	10	28	17.803	0.000	['/ecs/DP1/PM/NSIDC-0051.001/1978.10.28/nt_19...
3	1978	10	30	17.670	0.000	['/ecs/DP1/PM/NSIDC-0051.001/1978.10.30/nt_19...
4	1978	11	01	17.527	0.000	['/ecs/DP1/PM/NSIDC-0051.001/1978.11.01/nt_19...

# Name of the columns
s_seaice.columns

Index(['Year', ' Month', ' Day', '     Extent', '    Missing', ' Source Data'], dtype='object')

# Dropping the cloumn (Soure Data)
s_seaice_clean = (s_seaice
                  .drop([' Source Data'], axis =1)
                  .drop([0], axis=0))
s_seaice_clean.head()

	Year	Month	Day	Extent
1	1978	10	26	17.624
2	1978	10	28	17.803
3	1978	10	30	17.670
4	1978	11	01	17.527
5	1978	11	03	17.486

# Shape of the data
s_seaice_clean.shape

(14905, 5)

# Types of the data 
s_seaice_clean.dtypes

Year           object
 Month         object
 Day           object
     Extent    object
    Missing    object
dtype: object

# Change the Year, Month and Day columns to integers
s_seaice_clean['Year'] = s_seaice_clean['Year'].astype(int)
s_seaice_clean['Month'] = s_seaice_clean[' Month'].astype(int)
s_seaice_clean['Day'] = s_seaice_clean[' Day'].astype(int)
s_seaice_clean['Extent'] = s_seaice_clean['     Extent'].astype(float)
s_seaice_clean['Missing'] = s_seaice_clean['    Missing'].astype(float)
s_seaice_clean =(s_seaice_clean
                 .drop([' Month'], axis=1)
                 .drop([' Day'], axis=1)
                 .drop(['     Extent'], axis=1)
                 .drop(['    Missing'], axis=1))
s_seaice_clean['Date'] = pd.to_datetime(s_seaice_clean[['Year', 'Month', 'Day']])
s_seaice_clean.head()

	Year	Month	Day	Extent	Date
1	1978	10	26	17.624	1978-10-26
2	1978	10	28	17.803	1978-10-28
3	1978	10	30	17.670	1978-10-30
4	1978	11	1	17.527	1978-11-01
5	1978	11	3	17.486	1978-11-03

s_seaice_grouped = s_seaice_clean.groupby(['Year']).mean()
s_seaice_grouped.tail()

# plot 

	Month	Day	Extent	Missing	Date
Year
2020	6.513661	15.756831	11.602402	0.000000	2020-07-01 12:00:00.000000000
2021	6.526027	15.720548	11.578682	0.000000	2021-07-02 00:00:00.000000000
2022	6.526027	15.720548	10.726003	0.000000	2022-07-01 23:59:59.999999744
2023	6.526027	15.720548	9.852699	0.000164	2023-07-02 00:00:00.000000000
2024	1.415094	14.132075	3.199000	0.004472	2024-01-27 00:00:00.000000000

yearMin = s_seaice_grouped.index.min()
yearMax = s_seaice_grouped.index.max()
print(yearMin)
print(yearMax)

1978
2024

# remove 1978 and 2024

s_seaice_grouped = s_seaice_grouped.drop([yearMin, yearMax])

# plot the rolling mean of the sea ice extent

# plot the data
plt.style.use('seaborn-v0_8')
plt.figure(figsize=(10,6))
plt.plot(s_seaice_grouped['Extent'], label='Antarctic Ics Extent')
plt.xlabel('Year')
plt.ylabel('Ice Extent')
plt.title('Antarctic Sea Ice Extent', fontsize=20)
plt.legend()
plt.show()

# Add regression line
# plot the data
plt.style.use('seaborn-v0_8')
plt.figure(figsize=(10, 6))
plt.plot(s_seaice_grouped['Extent'], label='Antarctic Ics Extent', color='black')
plt.xlabel('Year')
plt.ylabel('Ice Extent')
plt.title('Antarctic Sea Ice Extent', fontsize=20)
plt.legend()

# Add regression line
z = np.polyfit(s_seaice_grouped.index, s_seaice_grouped['Extent'], 1)
p = np.poly1d(z)
plt.plot(s_seaice_grouped.index, p(s_seaice_grouped.index), "b-")
plt.show()

# plot the arctic and antarctic ice extent in one plot

# plot the data
plt.style.use('seaborn-v0_8')
plt.figure(figsize=(10,6))
plt.plot(s_seaice_grouped['Extent'], label='Antarctic Ics Extent')
plt.plot(n_seaice_grouped['Extent'], label='Arctic Ics Extent')
plt.xlabel('Year')
plt.ylabel('Ice Extent')
plt.title('Sea Ice Extent', fontsize=20)
plt.legend()
plt.show()

# Fit a linear regression line for Antarctic ice extent
x_antarctic = np.arange(len(s_seaice_grouped['Extent']))
y_antarctic = s_seaice_grouped['Extent']
coefficients_antarctic = np.polyfit(x_antarctic, y_antarctic, 1)
poly_line_antarctic = np.poly1d(coefficients_antarctic)

# Fit a linear regression line for Arctic ice extent
x_arctic = np.arange(len(n_seaice_grouped['Extent']))
y_arctic = n_seaice_grouped['Extent']
coefficients_arctic = np.polyfit(x_arctic, y_arctic, 1)
poly_line_arctic = np.poly1d(coefficients_arctic)

# Plotting
plt.style.use('seaborn-v0_8')
plt.figure(figsize=(10,6))

# Plot Antarctic sea ice extent and regression line
plt.plot(x_antarctic, y_antarctic, label='Antarctic Ice Extent')
plt.plot(x_antarctic, poly_line_antarctic(x_antarctic), label='Antarctic Regression Line', color='blue', linestyle='--')

# Plot Arctic sea ice extent and regression line
plt.plot(x_arctic, y_arctic, label='Arctic Ice Extent')
plt.plot(x_arctic, poly_line_arctic(x_arctic), label='Arctic Regression Line', color='orange', linestyle='--')

plt.xlabel('Year')
plt.ylabel('Ice Extent')
plt.title('Sea Ice Extent', fontsize=20)
plt.legend()
plt.show()

Global Mean Sea Level¶

The data for GMSL is from two sources:

US Environmental Protection Agency, from 1880-2014. The data is monthy, instead of daily. It is recorded on the 15th of every month. The columns are: "Time", "GMSL".
European Environment Agency, from 1993-2022. The data obtained is daily. The columns are: "Year", "GSML".

We intend to observe the change in sea level.

# check the files in the Datasets directory
path = pathlib.Path("Datasets")
files = [file.name for file in path.iterdir()]
if "sea-level-rise_zip.zip" in files:
    print("The data is available")

else:
    # read the data
    url = "https://datahub.io/core/sea-level-rise/r/sea-level-rise_zip.zip"
    r = requests.get(url)
    with open("Datasets/sea-level-rise_zip.zip", "wb") as f:
        f.write(r.content)

    # unzip the file
    with zipfile.ZipFile("Datasets/sea-level-rise_zip.zip", 'r') as zip_ref:
        zip_ref.extractall("Datasets/sea-level-rise")

# read the sea level data

sea_level = pd.read_csv('Datasets/sea-level-rise/data/csiro_alt_gmsl_mo_2015_csv.csv', header=0)
sea_level.head()

	Time	GMSL
0	1993-01-15	-1.6
1	1993-02-15	-3.4
2	1993-03-15	5.5
3	1993-04-15	0.1
4	1993-05-15	5.3

sea_level.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 266 entries, 0 to 265
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Time    266 non-null    object 
 1   GMSL    266 non-null    float64
dtypes: float64(1), object(1)
memory usage: 4.3+ KB

# convert the time column to datetime

sea_level['Time'] = pd.to_datetime(sea_level['Time'])
sea_level.head()

	Time	GMSL
0	1993-01-15	-1.6
1	1993-02-15	-3.4
2	1993-03-15	5.5
3	1993-04-15	0.1
4	1993-05-15	5.3

sea_level.describe()

	Time	GMSL
count	266	266.000000
mean	2004-01-29 21:44:39.699248128	36.028571
min	1993-01-15 00:00:00	-3.500000
25%	1998-07-22 18:00:00	20.525000
50%	2004-01-30 12:00:00	36.700000
75%	2009-08-07 06:00:00	52.500000
max	2015-02-15 00:00:00	79.500000
std	NaN	20.889803

# group by year and get the mean

sea_level_grouped = sea_level.groupby(sea_level['Time'].dt.year).mean()
sea_level_grouped.head()

	Time	GMSL
Time
1993	1993-06-30 12:00:00	1.408333
1994	1994-06-30 12:00:00	2.733333
1995	1995-06-30 12:00:00	5.750000
1996	1996-06-30 08:00:00	11.341667
1997	1997-06-30 12:00:00	16.050000

# plot the data

plt.style.use('seaborn-v0_8')
plt.figure(figsize=(10,6))
plt.plot(sea_level_grouped['GMSL'], label='Sea Level Rise (mm)')
plt.xlabel('Year')
plt.ylabel('Sea Level Rise (mm)')
plt.title('Global Mean Sea Level', fontsize=20)
plt.legend()
plt.show()

# Plot the Arctic and Antarctic ice extent and the global mean sea level in one plot

# plot the data
plt.style.use('seaborn-v0_8')
plt.figure(figsize=(10,6))
plt.plot(s_seaice_grouped['Extent'], label='Antarctic Ics Extent')
plt.plot(n_seaice_grouped['Extent'], label='Arctic Ics Extent')
plt.plot(sea_level_grouped['GMSL'], label='Sea Level Rise (mm)')
plt.xlabel('Year')
plt.ylabel('Extent / Sea Level Rise (mm)')
plt.title('Sea Ice Extent and Sea Level Rise', fontsize=20)
plt.legend()
plt.show()