Ice Extent over years¶

1979 to 2023¶

The files used provide the total extent of ice sheets for each day for the entire time period for both hemispheres. The data was obtained from National Snow and Ice Data Center, a part of CIRES at the University of Colorado Boulder. We are interested in the columns: "Year", "Month", "Day", "Extent". The column "Extent" has values in million square kilometers.

We intend to observe the change in the size of the ice sheets over the given time period. We removed the data for 1978 as there was only two months of data and that is not representative of the whole year.

1
2
3
4
5
6
7
8
9
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import watermark
import requests
import pathlib
import zipfile
sns.set_theme()

Arctic¶

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
# if the data is not available, download it
# if Datasets directory is not available, create it
pathlib.Path("Datasets").mkdir(exist_ok=True)

# check the files in the Datasets directory
path = pathlib.Path("Datasets")
files = [file.name for file in path.iterdir()]
if "N_seaice_extent_daily_v3.0.csv" in files:
    print("The data is available")
else:

    # read the data
    url = "https://noaadata.apps.nsidc.org/NOAA/G02135/north/daily/data/N_seaice_extent_daily_v3.0.csv"
    r = requests.get(url)
    with open("Datasets/N_seaice_extent_daily_v3.0.csv", "wb") as f:
        f.write(r.content)
1
2
3
# Display the data
n_seaice = pd.read_csv('Datasets/N_seaice_extent_daily_v3.0.csv', header=0)
n_seaice.head(10)
Year Month Day Extent Missing Source Data
0 YYYY MM DD 10^6 sq km 10^6 sq km Source data product web sites: http://nsidc.o...
1 1978 10 26 10.231 0.000 ['/ecs/DP1/PM/NSIDC-0051.001/1978.10.26/nt_19...
2 1978 10 28 10.420 0.000 ['/ecs/DP1/PM/NSIDC-0051.001/1978.10.28/nt_19...
3 1978 10 30 10.557 0.000 ['/ecs/DP1/PM/NSIDC-0051.001/1978.10.30/nt_19...
4 1978 11 01 10.670 0.000 ['/ecs/DP1/PM/NSIDC-0051.001/1978.11.01/nt_19...
5 1978 11 03 10.777 0.000 ['/ecs/DP1/PM/NSIDC-0051.001/1978.11.03/nt_19...
6 1978 11 05 10.968 0.000 ['/ecs/DP1/PM/NSIDC-0051.001/1978.11.05/nt_19...
7 1978 11 07 11.080 0.000 ['/ecs/DP1/PM/NSIDC-0051.001/1978.11.07/nt_19...
8 1978 11 09 11.189 0.000 ['/ecs/DP1/PM/NSIDC-0051.001/1978.11.09/nt_19...
9 1978 11 11 11.314 0.000 ['/ecs/DP1/PM/NSIDC-0051.001/1978.11.11/nt_19...
1
2
# Name of the columns
n_seaice.columns
Index(['Year', ' Month', ' Day', '     Extent', '    Missing', ' Source Data'], dtype='object')
1
2
3
4
5
6
# Dropping the Source Data colunm
n_seaice_clean =(n_seaice
                 .drop([' Source Data'], axis=1)
                 .drop([0], axis=0)
                 )
n_seaice_clean.head()
Year Month Day Extent Missing
1 1978 10 26 10.231 0.000
2 1978 10 28 10.420 0.000
3 1978 10 30 10.557 0.000
4 1978 11 01 10.670 0.000
5 1978 11 03 10.777 0.000
1
2
# Explor the data shape
n_seaice_clean.shape
(14905, 5)
1
2
# Basic information about the data set
n_seaice_clean.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14905 entries, 1 to 14905
Data columns (total 5 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   Year         14905 non-null  object
 1    Month       14905 non-null  object
 2    Day         14905 non-null  object
 3        Extent  14905 non-null  object
 4       Missing  14905 non-null  object
dtypes: object(5)
memory usage: 582.4+ KB
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
# Change the Year, Month, Day, Missing and Extent columns to integers
n_seaice_clean['Year'] = n_seaice_clean['Year'].astype(int)
n_seaice_clean['Month'] = n_seaice_clean[' Month'].astype(int)
n_seaice_clean['Day'] = n_seaice_clean[' Day'].astype(int)
n_seaice_clean['Extent'] = n_seaice_clean['     Extent'].astype(float)
n_seaice_clean['Missing'] = n_seaice_clean['    Missing'].astype(float)
n_seaice_clean =(n_seaice_clean
                 .drop([' Month'], axis=1)
                 .drop([' Day'], axis=1)
                 .drop(['     Extent'], axis=1)
                 .drop(['    Missing'], axis=1)
                 )
n_seaice_clean['Date'] = pd.to_datetime(n_seaice_clean[['Year', 'Month', 'Day']])
n_seaice_clean.head()
Year Month Day Extent Missing Date
1 1978 10 26 10.231 0.0 1978-10-26
2 1978 10 28 10.420 0.0 1978-10-28
3 1978 10 30 10.557 0.0 1978-10-30
4 1978 11 1 10.670 0.0 1978-11-01
5 1978 11 3 10.777 0.0 1978-11-03
1
2
# Basic information about the data afer converting the Yaer colunm to float and dropping unnecessary cols
n_seaice_clean.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14905 entries, 1 to 14905
Data columns (total 6 columns):
 #   Column   Non-Null Count  Dtype         
---  ------   --------------  -----         
 0   Year     14905 non-null  int64         
 1   Month    14905 non-null  int64         
 2   Day      14905 non-null  int64         
 3   Extent   14905 non-null  float64       
 4   Missing  14905 non-null  float64       
 5   Date     14905 non-null  datetime64[ns]
dtypes: datetime64[ns](1), float64(2), int64(3)
memory usage: 698.8 KB
1
n_seaice_clean
Year Month Day Extent Missing Date
1 1978 10 26 10.231 0.0 1978-10-26
2 1978 10 28 10.420 0.0 1978-10-28
3 1978 10 30 10.557 0.0 1978-10-30
4 1978 11 1 10.670 0.0 1978-11-01
5 1978 11 3 10.777 0.0 1978-11-03
... ... ... ... ... ... ...
14901 2024 2 18 14.676 0.0 2024-02-18
14902 2024 2 19 14.776 0.0 2024-02-19
14903 2024 2 20 14.846 0.0 2024-02-20
14904 2024 2 21 14.842 0.0 2024-02-21
14905 2024 2 22 14.823 0.0 2024-02-22

14905 rows × 6 columns

1
2
# Data Describtion 
n_seaice_clean.describe()
Year Month Day Extent Missing Date
count 14905.000000 14905.000000 14905.000000 14905.000000 14905.0 14905
mean 2002.988796 6.526669 15.734586 11.233564 0.0 2003-06-28 06:35:08.554176384
min 1978.000000 1.000000 1.000000 3.340000 0.0 1978-10-26 00:00:00
25% 1993.000000 4.000000 8.000000 8.475000 0.0 1993-07-16 00:00:00
50% 2003.000000 7.000000 16.000000 11.969000 0.0 2003-09-28 00:00:00
75% 2013.000000 10.000000 23.000000 14.159000 0.0 2013-12-10 00:00:00
max 2024.000000 12.000000 31.000000 16.635000 0.0 2024-02-22 00:00:00
std 12.207257 3.455468 8.800672 3.316977 0.0 NaN
1
2
3
# Groupby year the data, and get the mean
n_seaice_grouped = n_seaice_clean.groupby(['Year']).mean()
n_seaice_grouped.head()
Month Day Extent Missing Date
Year
1978 11.382353 16.617647 12.487000 0.0 1978-11-28
1979 6.527473 15.681319 12.319560 0.0 1979-07-02
1980 6.497268 15.759563 12.334148 0.0 1980-07-01
1981 6.524590 15.759563 12.135486 0.0 1981-07-02
1982 6.527473 15.681319 12.439445 0.0 1982-07-02
1
2
3
4
5
# Min & Max year 
yearMin = n_seaice_grouped.index.min()
yearMax = n_seaice_grouped.index.max()
print(yearMin)
print(yearMax)
1978
2024
1
2
3
# remove the first year and the last year

n_seaice_grouped = n_seaice_grouped.drop([yearMin, yearMax])
1
2
3
4
5
6
7
8
9
# plot the data
plt.style.use('seaborn-v0_8')
plt.figure(figsize=(10, 6))
plt.plot(n_seaice_grouped['Extent'], label='Arctic Ics Extent')
plt.xlabel('Year')
plt.ylabel('Ice Extent')
plt.title('Arctic Sea Ice Extent', fontsize=20)
plt.legend()
plt.show()
No description has been provided for this image
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
# Add regression line
# plot the data
plt.style.use('seaborn-v0_8')
plt.figure(figsize=(10, 6))
plt.plot(n_seaice_grouped['Extent'], label='Arctic Ics Extent', color='black')
plt.xlabel('Year')
plt.ylabel('Ice Extent')
plt.title('Arctic Sea Ice Extent', fontsize=20)
plt.legend()

# Add regression line
z = np.polyfit(n_seaice_grouped.index, n_seaice_grouped['Extent'], 1)
p = np.poly1d(z)
plt.plot(n_seaice_grouped.index, p(n_seaice_grouped.index), "b-")
plt.show()
No description has been provided for this image

Antarctic¶

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
# if the data is not available, download it

# check the files in the Datasets directory
path = pathlib.Path("Datasets")
files = [file.name for file in path.iterdir()]
if "S_seaice_extent_daily_v3.0.csv" in files:
    print("The data is available")
else:
    # read the data
    url = "https://noaadata.apps.nsidc.org/NOAA/G02135/south/daily/data/S_seaice_extent_daily_v3.0.csv"
    r = requests.get(url)
    with open("Datasets/S_seaice_extent_daily_v3.0.csv", "wb") as f:
        f.write(r.content)
1
2
3
s_seaice = pd.read_csv ('Datasets/S_seaice_extent_daily_v3.0.csv', header=0)

s_seaice.head()
Year Month Day Extent Missing Source Data
0 YYYY MM DD 10^6 sq km 10^6 sq km Source data product web sites: http://nsidc.o...
1 1978 10 26 17.624 0.000 ['/ecs/DP1/PM/NSIDC-0051.001/1978.10.26/nt_19...
2 1978 10 28 17.803 0.000 ['/ecs/DP1/PM/NSIDC-0051.001/1978.10.28/nt_19...
3 1978 10 30 17.670 0.000 ['/ecs/DP1/PM/NSIDC-0051.001/1978.10.30/nt_19...
4 1978 11 01 17.527 0.000 ['/ecs/DP1/PM/NSIDC-0051.001/1978.11.01/nt_19...
1
2
# Name of the columns
s_seaice.columns
Index(['Year', ' Month', ' Day', '     Extent', '    Missing', ' Source Data'], dtype='object')
1
2
3
4
5
# Dropping the cloumn (Soure Data)
s_seaice_clean = (s_seaice
                  .drop([' Source Data'], axis =1)
                  .drop([0], axis=0))
s_seaice_clean.head()
Year Month Day Extent Missing
1 1978 10 26 17.624 0.000
2 1978 10 28 17.803 0.000
3 1978 10 30 17.670 0.000
4 1978 11 01 17.527 0.000
5 1978 11 03 17.486 0.000
1
2
# Shape of the data
s_seaice_clean.shape
(14905, 5)
1
2
# Types of the data 
s_seaice_clean.dtypes
Year           object
 Month         object
 Day           object
     Extent    object
    Missing    object
dtype: object
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
# Change the Year, Month and Day columns to integers
s_seaice_clean['Year'] = s_seaice_clean['Year'].astype(int)
s_seaice_clean['Month'] = s_seaice_clean[' Month'].astype(int)
s_seaice_clean['Day'] = s_seaice_clean[' Day'].astype(int)
s_seaice_clean['Extent'] = s_seaice_clean['     Extent'].astype(float)
s_seaice_clean['Missing'] = s_seaice_clean['    Missing'].astype(float)
s_seaice_clean =(s_seaice_clean
                 .drop([' Month'], axis=1)
                 .drop([' Day'], axis=1)
                 .drop(['     Extent'], axis=1)
                 .drop(['    Missing'], axis=1))
s_seaice_clean['Date'] = pd.to_datetime(s_seaice_clean[['Year', 'Month', 'Day']])
s_seaice_clean.head()
Year Month Day Extent Missing Date
1 1978 10 26 17.624 0.0 1978-10-26
2 1978 10 28 17.803 0.0 1978-10-28
3 1978 10 30 17.670 0.0 1978-10-30
4 1978 11 1 17.527 0.0 1978-11-01
5 1978 11 3 17.486 0.0 1978-11-03
1
2
3
4
s_seaice_grouped = s_seaice_clean.groupby(['Year']).mean()
s_seaice_grouped.tail()

# plot 
Month Day Extent Missing Date
Year
2020 6.513661 15.756831 11.602402 0.000000 2020-07-01 12:00:00.000000000
2021 6.526027 15.720548 11.578682 0.000000 2021-07-02 00:00:00.000000000
2022 6.526027 15.720548 10.726003 0.000000 2022-07-01 23:59:59.999999744
2023 6.526027 15.720548 9.852699 0.000164 2023-07-02 00:00:00.000000000
2024 1.415094 14.132075 3.199000 0.004472 2024-01-27 00:00:00.000000000
1
2
3
4
yearMin = s_seaice_grouped.index.min()
yearMax = s_seaice_grouped.index.max()
print(yearMin)
print(yearMax)
1978
2024
1
2
3
# remove 1978 and 2024

s_seaice_grouped = s_seaice_grouped.drop([yearMin, yearMax])
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
# plot the rolling mean of the sea ice extent

# plot the data
plt.style.use('seaborn-v0_8')
plt.figure(figsize=(10,6))
plt.plot(s_seaice_grouped['Extent'], label='Antarctic Ics Extent')
plt.xlabel('Year')
plt.ylabel('Ice Extent')
plt.title('Antarctic Sea Ice Extent', fontsize=20)
plt.legend()
plt.show()
No description has been provided for this image
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
# Add regression line
# plot the data
plt.style.use('seaborn-v0_8')
plt.figure(figsize=(10, 6))
plt.plot(s_seaice_grouped['Extent'], label='Antarctic Ics Extent', color='black')
plt.xlabel('Year')
plt.ylabel('Ice Extent')
plt.title('Antarctic Sea Ice Extent', fontsize=20)
plt.legend()

# Add regression line
z = np.polyfit(s_seaice_grouped.index, s_seaice_grouped['Extent'], 1)
p = np.poly1d(z)
plt.plot(s_seaice_grouped.index, p(s_seaice_grouped.index), "b-")
plt.show()
No description has been provided for this image
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
# plot the arctic and antarctic ice extent in one plot

# plot the data
plt.style.use('seaborn-v0_8')
plt.figure(figsize=(10,6))
plt.plot(s_seaice_grouped['Extent'], label='Antarctic Ics Extent')
plt.plot(n_seaice_grouped['Extent'], label='Arctic Ics Extent')
plt.xlabel('Year')
plt.ylabel('Ice Extent')
plt.title('Sea Ice Extent', fontsize=20)
plt.legend()
plt.show()
No description has been provided for this image
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
# Fit a linear regression line for Antarctic ice extent
x_antarctic = np.arange(len(s_seaice_grouped['Extent']))
y_antarctic = s_seaice_grouped['Extent']
coefficients_antarctic = np.polyfit(x_antarctic, y_antarctic, 1)
poly_line_antarctic = np.poly1d(coefficients_antarctic)

# Fit a linear regression line for Arctic ice extent
x_arctic = np.arange(len(n_seaice_grouped['Extent']))
y_arctic = n_seaice_grouped['Extent']
coefficients_arctic = np.polyfit(x_arctic, y_arctic, 1)
poly_line_arctic = np.poly1d(coefficients_arctic)

# Plotting
plt.style.use('seaborn-v0_8')
plt.figure(figsize=(10,6))

# Plot Antarctic sea ice extent and regression line
plt.plot(x_antarctic, y_antarctic, label='Antarctic Ice Extent')
plt.plot(x_antarctic, poly_line_antarctic(x_antarctic), label='Antarctic Regression Line', color='blue', linestyle='--')

# Plot Arctic sea ice extent and regression line
plt.plot(x_arctic, y_arctic, label='Arctic Ice Extent')
plt.plot(x_arctic, poly_line_arctic(x_arctic), label='Arctic Regression Line', color='orange', linestyle='--')

plt.xlabel('Year')
plt.ylabel('Ice Extent')
plt.title('Sea Ice Extent', fontsize=20)
plt.legend()
plt.show()
No description has been provided for this image

Global Mean Sea Level¶

The data for GMSL is from two sources:

  1. US Environmental Protection Agency, from 1880-2014. The data is monthy, instead of daily. It is recorded on the 15th of every month. The columns are: "Time", "GMSL".

  2. European Environment Agency, from 1993-2022. The data obtained is daily. The columns are: "Year", "GSML".

We intend to observe the change in sea level.

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
# check the files in the Datasets directory
path = pathlib.Path("Datasets")
files = [file.name for file in path.iterdir()]
if "sea-level-rise_zip.zip" in files:
    print("The data is available")

else:
    # read the data
    url = "https://datahub.io/core/sea-level-rise/r/sea-level-rise_zip.zip"
    r = requests.get(url)
    with open("Datasets/sea-level-rise_zip.zip", "wb") as f:
        f.write(r.content)

    # unzip the file
    with zipfile.ZipFile("Datasets/sea-level-rise_zip.zip", 'r') as zip_ref:
        zip_ref.extractall("Datasets/sea-level-rise")
1
2
3
4
# read the sea level data

sea_level = pd.read_csv('Datasets/sea-level-rise/data/csiro_alt_gmsl_mo_2015_csv.csv', header=0)
sea_level.head()
Time GMSL
0 1993-01-15 -1.6
1 1993-02-15 -3.4
2 1993-03-15 5.5
3 1993-04-15 0.1
4 1993-05-15 5.3
1
sea_level.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 266 entries, 0 to 265
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Time    266 non-null    object 
 1   GMSL    266 non-null    float64
dtypes: float64(1), object(1)
memory usage: 4.3+ KB
1
2
3
4
# convert the time column to datetime

sea_level['Time'] = pd.to_datetime(sea_level['Time'])
sea_level.head()
Time GMSL
0 1993-01-15 -1.6
1 1993-02-15 -3.4
2 1993-03-15 5.5
3 1993-04-15 0.1
4 1993-05-15 5.3
1
sea_level.describe()
Time GMSL
count 266 266.000000
mean 2004-01-29 21:44:39.699248128 36.028571
min 1993-01-15 00:00:00 -3.500000
25% 1998-07-22 18:00:00 20.525000
50% 2004-01-30 12:00:00 36.700000
75% 2009-08-07 06:00:00 52.500000
max 2015-02-15 00:00:00 79.500000
std NaN 20.889803
1
2
3
4
# group by year and get the mean

sea_level_grouped = sea_level.groupby(sea_level['Time'].dt.year).mean()
sea_level_grouped.head()
Time GMSL
Time
1993 1993-06-30 12:00:00 1.408333
1994 1994-06-30 12:00:00 2.733333
1995 1995-06-30 12:00:00 5.750000
1996 1996-06-30 08:00:00 11.341667
1997 1997-06-30 12:00:00 16.050000
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
# plot the data

plt.style.use('seaborn-v0_8')
plt.figure(figsize=(10,6))
plt.plot(sea_level_grouped['GMSL'], label='Sea Level Rise (mm)')
plt.xlabel('Year')
plt.ylabel('Sea Level Rise (mm)')
plt.title('Global Mean Sea Level', fontsize=20)
plt.legend()
plt.show()
No description has been provided for this image
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
# Plot the Arctic and Antarctic ice extent and the global mean sea level in one plot

# plot the data
plt.style.use('seaborn-v0_8')
plt.figure(figsize=(10,6))
plt.plot(s_seaice_grouped['Extent'], label='Antarctic Ics Extent')
plt.plot(n_seaice_grouped['Extent'], label='Arctic Ics Extent')
plt.plot(sea_level_grouped['GMSL'], label='Sea Level Rise (mm)')
plt.xlabel('Year')
plt.ylabel('Extent / Sea Level Rise (mm)')
plt.title('Sea Ice Extent and Sea Level Rise', fontsize=20)
plt.legend()
plt.show()
No description has been provided for this image