from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive

!pip install pandas_datareader

Requirement already satisfied: pandas_datareader in /usr/local/lib/python3.10/dist-packages (0.10.0)
Requirement already satisfied: lxml in /usr/local/lib/python3.10/dist-packages (from pandas_datareader) (4.9.4)
Requirement already satisfied: pandas>=0.23 in /usr/local/lib/python3.10/dist-packages (from pandas_datareader) (2.2.2)
Requirement already satisfied: requests>=2.19.0 in /usr/local/lib/python3.10/dist-packages (from pandas_datareader) (2.32.3)
Requirement already satisfied: numpy>=1.22.4 in /usr/local/lib/python3.10/dist-packages (from pandas>=0.23->pandas_datareader) (1.26.4)
Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/dist-packages (from pandas>=0.23->pandas_datareader) (2.8.2)
Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas>=0.23->pandas_datareader) (2024.2)
Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.10/dist-packages (from pandas>=0.23->pandas_datareader) (2024.2)
Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->pandas_datareader) (3.4.0)
Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->pandas_datareader) (3.10)
Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->pandas_datareader) (2.2.3)
Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->pandas_datareader) (2024.8.30)
Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.2->pandas>=0.23->pandas_datareader) (1.16.0)

import tqdm                               # Used to display progress bars
import requests                           # Create HTTP requests to inteact with APIs
import numpy as np
import pandas as pd
import seaborn as sns
import networkx as nx                     # Network analysis
import plotly.express as px               # High-level module in the plotly library for creating interactive visualizations
from bs4 import BeautifulSoup             # Webscraper
import matplotlib.pyplot as plt
import pandas_datareader.data as web      # Data reader

import warnings
warnings.filterwarnings('ignore')

'''
# Original code - has an error DO NOT RUN
#Extracting list of S&P 500 companies using BeautifulSoup.
resp = requests.get('http://en.wikipedia.org/wiki/List_of_S%26P_500_companies')
soup = BeautifulSoup(resp.text, 'lxml')
table = soup.find('table', {'class': 'wikitable sortable'})
tickers = []
for row in table.findAll('tr')[1:]:
    ticker = row.findAll('td')[0].text.strip('\n')
    tickers.append(ticker)

tickers = [ticker.replace('.', '-') for ticker in tickers] # list of S&P 500 stocks

---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
<ipython-input-6-f29fe04de8da> in <cell line: 6>()
      5 tickers = []
      6 for row in table.findAll('tr')[1:]:
----> 7     ticker = row.findAll('td')[0].text.strip('\n')
      8     tickers.append(ticker)
      9 

IndexError: list index out of range

import requests
from bs4 import BeautifulSoup

# Extracting list of S&P 500 companies using BeautifulSoup.
resp = requests.get('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')
soup = BeautifulSoup(resp.text, 'lxml')
table = soup.find('table', {'id': 'constituents'})  # Using the table ID to be more specific
tickers = []

# Loop through each row in the table (skip the header row)
for row in table.findAll('tr')[1:]:
    tds = row.findAll('td')  # Get all 'td' elements in the row
    if len(tds) > 0:         # Ensure the row contains 'td' elements
        ticker = tds[0].text.strip()  # Extract the first 'td' element text
        tickers.append(ticker)

# Replace '.' with '-' to standardize ticker symbols
tickers = [ticker.replace('.', '-') for ticker in tickers]

print(tickers[:10])

['MMM', 'AOS', 'ABT', 'ABBV', 'ACN', 'ADBE', 'AMD', 'AES', 'AFL', 'A']

# price_data = web.DataReader(tickers, 'yahoo', start='2011-01-01', end='2020-12-31')  # We will get the dataset for yahoo
# price_data = price_data['Adj Close']    # we will get all the data points and we also get the volume not only the close price, open price
# price_data.to_csv('snp500_price_data_2011_to_2020.csv')

price_data = pd.read_csv('/content/drive/MyDrive/MIT - Data Sciences/Colab Notebooks/Week_Eight_-_Networking_and_Graphical_Models/Case_Studies/Network_Stock_Portfolio_Optimization/snp500_price_data_2011_to_2020.csv', index_col=[0])

price_data.head()

figure = plt.figure(figsize=(24, 8))
sns.heatmap(price_data.T.isnull());

price_data_cleaned = price_data.dropna(axis=1) # dropping na values columnwise

figure = plt.figure(figsize=(16, 8))
sns.heatmap(price_data_cleaned.T.isnull());

def get_year_wise_snp_500_data(data, year):
    year_wise_data = data.loc['{}-01-01'.format(year):'{}-12-31'.format(year)]

    return year_wise_data

# Getting year wise data of S&P stocks from 2011 to 2020
snp_500_2011 = get_year_wise_snp_500_data(price_data_cleaned, 2011)
snp_500_2012 = get_year_wise_snp_500_data(price_data_cleaned, 2012)
snp_500_2013 = get_year_wise_snp_500_data(price_data_cleaned, 2013)
snp_500_2014 = get_year_wise_snp_500_data(price_data_cleaned, 2014)
snp_500_2015 = get_year_wise_snp_500_data(price_data_cleaned, 2015)
snp_500_2016 = get_year_wise_snp_500_data(price_data_cleaned, 2016)
snp_500_2017 = get_year_wise_snp_500_data(price_data_cleaned, 2017)
snp_500_2018 = get_year_wise_snp_500_data(price_data_cleaned, 2018)
snp_500_2019 = get_year_wise_snp_500_data(price_data_cleaned, 2019)
snp_500_2020 = get_year_wise_snp_500_data(price_data_cleaned, 2020)

snp_500_2011

snp_500_2011.shift(1)

# Calculating daily log returns by subtracting between two days with the help of shift function
log_returns_2011 = np.log(snp_500_2011.shift(1)) - np.log(snp_500_2011)
log_returns_2012 = np.log(snp_500_2012.shift(1)) - np.log(snp_500_2012)
log_returns_2013 = np.log(snp_500_2013.shift(1)) - np.log(snp_500_2013)
log_returns_2014 = np.log(snp_500_2014.shift(1)) - np.log(snp_500_2014)
log_returns_2015 = np.log(snp_500_2015.shift(1)) - np.log(snp_500_2015)
log_returns_2016 = np.log(snp_500_2016.shift(1)) - np.log(snp_500_2016)
log_returns_2017 = np.log(snp_500_2017.shift(1)) - np.log(snp_500_2017)
log_returns_2018 = np.log(snp_500_2018.shift(1)) - np.log(snp_500_2018)
log_returns_2019 = np.log(snp_500_2019.shift(1)) - np.log(snp_500_2019)
log_returns_2020 = np.log(snp_500_2020.shift(1)) - np.log(snp_500_2020)

# Computing adjacency matrix:
return_correlation_2011 = log_returns_2011.corr()
return_correlation_2012 = log_returns_2012.corr()
return_correlation_2013 = log_returns_2013.corr()
return_correlation_2014 = log_returns_2014.corr()
return_correlation_2015 = log_returns_2015.corr()
return_correlation_2016 = log_returns_2016.corr()
return_correlation_2017 = log_returns_2017.corr()
return_correlation_2018 = log_returns_2018.corr()
return_correlation_2019 = log_returns_2019.corr()
return_correlation_2020 = log_returns_2020.corr()

figure, axes = plt.subplots(5, 2, figsize=(30, 30))
sns.heatmap(return_correlation_2011, ax=axes[0, 0]);
sns.heatmap(return_correlation_2012, ax=axes[0, 1]);
sns.heatmap(return_correlation_2013, ax=axes[1, 0]);
sns.heatmap(return_correlation_2014, ax=axes[1, 1]);
sns.heatmap(return_correlation_2015, ax=axes[2, 0]);
sns.heatmap(return_correlation_2016, ax=axes[2, 1]);
sns.heatmap(return_correlation_2017, ax=axes[3, 0]);
sns.heatmap(return_correlation_2018, ax=axes[3, 1]);
sns.heatmap(return_correlation_2019, ax=axes[4, 0]);
sns.heatmap(return_correlation_2020, ax=axes[4, 1]);

Output hidden; open in https://colab.research.google.com to view.

graph_2011 = nx.Graph(return_correlation_2011)

figure = plt.figure(figsize=(22, 10))
nx.draw_networkx(graph_2011, with_labels=False)

distance_2011 = np.sqrt(2 * (1 - return_correlation_2011))
distance_2012 = np.sqrt(2 * (1 - return_correlation_2012))
distance_2013 = np.sqrt(2 * (1 - return_correlation_2013))
distance_2014 = np.sqrt(2 * (1 - return_correlation_2014))
distance_2015 = np.sqrt(2 * (1 - return_correlation_2015))
distance_2016 = np.sqrt(2 * (1 - return_correlation_2016))
distance_2017 = np.sqrt(2 * (1 - return_correlation_2017))
distance_2018 = np.sqrt(2 * (1 - return_correlation_2018))
distance_2019 = np.sqrt(2 * (1 - return_correlation_2019))
distance_2020 = np.sqrt(2 * (1 - return_correlation_2020))

distance_2011_graph = nx.Graph(distance_2011)
distance_2012_graph = nx.Graph(distance_2012)
distance_2013_graph = nx.Graph(distance_2013)
distance_2014_graph = nx.Graph(distance_2014)
distance_2015_graph = nx.Graph(distance_2015)
distance_2016_graph = nx.Graph(distance_2016)
distance_2017_graph = nx.Graph(distance_2017)
distance_2018_graph = nx.Graph(distance_2018)
distance_2019_graph = nx.Graph(distance_2019)
distance_2020_graph = nx.Graph(distance_2020)

graph_2011_filtered = nx.minimum_spanning_tree(distance_2011_graph)
graph_2012_filtered = nx.minimum_spanning_tree(distance_2012_graph)
graph_2013_filtered = nx.minimum_spanning_tree(distance_2013_graph)
graph_2014_filtered = nx.minimum_spanning_tree(distance_2014_graph)
graph_2015_filtered = nx.minimum_spanning_tree(distance_2015_graph)
graph_2016_filtered = nx.minimum_spanning_tree(distance_2016_graph)
graph_2017_filtered = nx.minimum_spanning_tree(distance_2017_graph)
graph_2018_filtered = nx.minimum_spanning_tree(distance_2018_graph)
graph_2019_filtered = nx.minimum_spanning_tree(distance_2019_graph)
graph_2020_filtered = nx.minimum_spanning_tree(distance_2020_graph)

figure, axes = plt.subplots(10, 1, figsize=(24, 120))
nx.draw_networkx(graph_2011_filtered, with_labels=False, ax=axes[0])
nx.draw_networkx(graph_2012_filtered, with_labels=False, ax=axes[1])
nx.draw_networkx(graph_2013_filtered, with_labels=False, ax=axes[2])
nx.draw_networkx(graph_2014_filtered, with_labels=False, ax=axes[3])
nx.draw_networkx(graph_2015_filtered, with_labels=False, ax=axes[4])
nx.draw_networkx(graph_2016_filtered, with_labels=False, ax=axes[5])
nx.draw_networkx(graph_2017_filtered, with_labels=False, ax=axes[6])
nx.draw_networkx(graph_2018_filtered, with_labels=False, ax=axes[7])
nx.draw_networkx(graph_2019_filtered, with_labels=False, ax=axes[8])
nx.draw_networkx(graph_2020_filtered, with_labels=False, ax=axes[9])

Output hidden; open in https://colab.research.google.com to view.

average_degree_connectivity = []
average_shortest_path_length = []
year = [2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020]

for graph in [graph_2011_filtered, graph_2012_filtered, graph_2013_filtered, graph_2014_filtered, graph_2015_filtered,
             graph_2016_filtered, graph_2017_filtered, graph_2018_filtered, graph_2019_filtered, graph_2020_filtered]:
    average_shortest_path_length.append(nx.average_shortest_path_length(graph))

figure = plt.figure(figsize=(22, 8))
sns.lineplot(x='year', y='average_shortest_path_length',
             data=pd.DataFrame({'year': year, 'average_shortest_path_length': average_shortest_path_length}));

log_returns_2011_till_2020 = np.log(price_data_cleaned.shift(1)) - np.log(price_data_cleaned)
return_correlation_2011_till_2020 = log_returns_2011_till_2020.corr()

figure = plt.figure(figsize=(24, 8))
sns.heatmap(return_correlation_2011_till_2020);

distance_2011_till_2020 = np.sqrt(2 * (1 - return_correlation_2011_till_2020))
distance_2011_till_2020_graph = nx.Graph(distance_2011_till_2020)
distance_2011_till_2020_graph_filtered = nx.minimum_spanning_tree(distance_2011_till_2020_graph)

figure = plt.figure(figsize=(24, 8))
nx.draw_kamada_kawai(distance_2011_till_2020_graph_filtered, with_labels=False)

degree_centrality = nx.degree_centrality(distance_2011_till_2020_graph_filtered)
closeness_centrality = nx.closeness_centrality(distance_2011_till_2020_graph_filtered)
betweenness_centrality = nx.betweenness_centrality(distance_2011_till_2020_graph_filtered)
eigenvector_centrality=nx.eigenvector_centrality_numpy(distance_2011_till_2020_graph_filtered)

keys = []
values = []

for key, value in degree_centrality.items():
    keys.append(key)
    values.append(value)

dc_data = pd.DataFrame({'stocks': keys, 'degree_centrality': values}).sort_values('degree_centrality', ascending=False)
px.bar(data_frame=dc_data, x='stocks', y='degree_centrality', template='plotly_dark')

keys = []
values = []

for key, value in closeness_centrality.items():
    keys.append(key)
    values.append(value)

cc_data = pd.DataFrame({'stocks': keys, 'closeness_centrality': values}).sort_values('closeness_centrality',
                                                                                       ascending=False)
px.bar(data_frame=cc_data, x='stocks', y='closeness_centrality', template='plotly_dark')

keys = []
values = []

for key, value in betweenness_centrality.items():
    keys.append(key)
    values.append(value)

bc_data = pd.DataFrame({'stocks': keys, 'betweenness_centrality': values}).sort_values('betweenness_centrality',
                                                                                       ascending=False)
px.bar(data_frame=bc_data, x='stocks', y='betweenness_centrality', template='plotly_dark')

# we already computed degree centrality above

# we already computed betweenness centrality above

# distance on degree criterion
distance_degree_criteria = {}
node_with_largest_degree_centrality = max(dict(degree_centrality), key=dict(degree_centrality).get)
for node in distance_2011_till_2020_graph_filtered.nodes():
    distance_degree_criteria[node] = nx.shortest_path_length(distance_2011_till_2020_graph_filtered, node,
                                                             node_with_largest_degree_centrality)

# distance on correlation criterion
distance_correlation_criteria = {}
sum_correlation = {}

for node in distance_2011_till_2020_graph_filtered.nodes():
    neighbors = nx.neighbors(distance_2011_till_2020_graph_filtered, node)
    sum_correlation[node] = sum(return_correlation_2011_till_2020[node][neighbor] for neighbor in neighbors)

node_with_highest_correlation = max(sum_correlation, key=sum_correlation.get)

for node in distance_2011_till_2020_graph_filtered.nodes():
    distance_correlation_criteria[node] = nx.shortest_path_length(distance_2011_till_2020_graph_filtered, node,
                                                             node_with_highest_correlation)

# distance on distance criterion
distance_distance_criteria = {}
mean_distance = {}

for node in distance_2011_till_2020_graph_filtered.nodes():
    nodes = list(distance_2011_till_2020_graph_filtered.nodes())
    nodes.remove(node)
    distance_distance = [nx.shortest_path_length(distance_2011_till_2020_graph_filtered, node, ns) for ns in nodes]
    mean_distance[node] = np.mean(distance_distance)

node_with_minimum_mean_distance = min(mean_distance, key=mean_distance.get)

for node in distance_2011_till_2020_graph_filtered.nodes():
    distance_distance_criteria[node] = nx.shortest_path_length(distance_2011_till_2020_graph_filtered, node,
                                                             node_with_minimum_mean_distance)

node_stats = pd.DataFrame.from_dict(dict(degree_centrality), orient='index')
node_stats.columns = ['degree_centrality']
node_stats['betweenness_centrality'] = betweenness_centrality.values()

node_stats['average_centrality'] = 0.5 * (node_stats['degree_centrality'] + node_stats['betweenness_centrality'])

node_stats['distance_degree_criteria'] = distance_degree_criteria.values()
node_stats['distance_correlation_criteria'] = distance_correlation_criteria.values()
node_stats['distance_distance_criteria'] = distance_distance_criteria.values()
node_stats['average_distance'] = (node_stats['distance_degree_criteria'] + node_stats['distance_correlation_criteria'] +
                                  node_stats['distance_distance_criteria']) / 3

node_stats.head()

central_stocks = node_stats.sort_values('average_centrality', ascending=False).head(15)
central_portfolio = [stock for stock in central_stocks.index.values]

peripheral_stocks = node_stats.sort_values('average_distance', ascending=False).head(15)
peripheral_portfolio = [stock for stock in peripheral_stocks.index.values]

central_stocks

peripheral_stocks

color = []

for node in distance_2011_till_2020_graph_filtered:
    if node in central_portfolio:
        color.append('red')

    elif node in peripheral_portfolio:
        color.append('green')

    else:
        color.append('blue')

figure = plt.figure(figsize=(24, 8))
nx.draw_kamada_kawai(distance_2011_till_2020_graph_filtered, with_labels=False, node_color=color)

# collecting data for all S&P 500 components for the year 2021
# %time price_data_2021 = web.DataReader(tickers, 'yahoo', start='2021-01-01', end='2021-12-31')

#Reading data for 2021 S&P 500 stocks:
price_data_2021 = pd.read_csv('/content/drive/MyDrive/MIT - Data Sciences/Colab Notebooks/Week_Eight_-_Networking_and_Graphical_Models/Case_Studies/Network_Stock_Portfolio_Optimization/snp500_price_data_2021.csv', index_col=[0])
# price_data_2021 = price_data_2021['Adj Close']
# price_data_2021.to_csv('snp500_price_data_2021.csv')

price_data_2021.head()

snp_500_2021 = web.DataReader(['sp500'], 'fred', start='2021-01-01', end='2021-12-31')

price_data_2021.head()

# Removing NA values:
price_data_2021 = price_data_2021.dropna(axis=1)
snp_500_2021 = snp_500_2021.dropna()

price_data_2021.head()

price_data_2021 = price_data_2021['2021-01-04':]

amount = 100000

central_portfolio_value = pd.DataFrame()
for stock in central_portfolio:
    central_portfolio_value[stock] = price_data_2021[stock]

portfolio_unit = central_portfolio_value.sum(axis=1)[0]
share = amount / portfolio_unit
central_portfolio_value = central_portfolio_value.sum(axis=1) * share

peripheral_portfolio_value = pd.DataFrame()
for stock in peripheral_portfolio:
    peripheral_portfolio_value[stock] = price_data_2021[stock]

portfolio_unit = peripheral_portfolio_value.sum(axis=1)[0]
share = amount / portfolio_unit
peripheral_portfolio_value = peripheral_portfolio_value.sum(axis=1) * share

snp_500_2021_value = snp_500_2021 * (amount / snp_500_2021.iloc[0])

all_portfolios = snp_500_2021_value
all_portfolios['central_portfolio'] = central_portfolio_value.values
all_portfolios['peripheral_portfolio'] = peripheral_portfolio_value.values

# all_portfolios = pd.concat([snp_500_2021_value, central_portfolio_value, peripheral_portfolio_value], axis=1)
# all_portfolios.columns = ['snp500', 'central_portfolio', 'peripheral_portfolio']

all_portfolios.head()

figure, ax = plt.subplots(figsize=(16, 8))
snp_500_line = ax.plot(all_portfolios['sp500'], label='S&P 500')
central_portfolio_line = ax.plot(all_portfolios['central_portfolio'], label= 'Central Portfolio')
peripheral_portfolio_line = ax.plot(all_portfolios['peripheral_portfolio'], label= 'Peripheral Portfolio')
ax.legend(loc='upper left')
plt.show()

# Convert notebook to html
!jupyter nbconvert --to html "/content/drive/MyDrive/MIT - Data Sciences/Colab Notebooks/Week_Eight_-_Networking_and_Graphical_Models/Case_Studies/Network_Stock_Portfolio_Optimization/Network_Stock_Portfolio_Optimization.ipynb"

[NbConvertApp] Converting notebook /content/drive/MyDrive/MIT - Data Sciences/Colab Notebooks/Week_Eight_-_Networking_and_Graphical_Models/Case_Studies/Network_Stock_Portfolio_Optimization/Network_Stock_Portfolio_Optimization.ipynb to html
[NbConvertApp] WARNING | Alternative text is missing on 8 image(s).
[NbConvertApp] Writing 3200408 bytes to /content/drive/MyDrive/MIT - Data Sciences/Colab Notebooks/Week_Eight_-_Networking_and_Graphical_Models/Case_Studies/Network_Stock_Portfolio_Optimization/Network_Stock_Portfolio_Optimization.html

	MMM	AOS	ABT	ABBV	ABMD	ACN	ATVI	ADM	ADBE	ADP	...	XEL	XLNX	XYL	YUM	ZBRA	ZBH	ZION	ZTS	CEG	OGN
Date
2010-12-31	63.855606	8.113162	17.986767	NaN	9.61	39.143620	11.245819	22.385578	30.780001	31.271172	...	16.221039	23.216919	NaN	28.547478	37.990002	49.212429	21.089169	NaN	NaN	NaN
2011-01-03	64.218163	8.125947	17.952976	NaN	9.80	39.224346	11.318138	22.623722	31.290001	31.791464	...	16.227924	23.569420	NaN	28.570745	38.200001	50.395058	21.907326	NaN	NaN	NaN
2011-01-04	64.129395	8.100383	18.121916	NaN	9.80	38.966022	11.327178	22.608845	31.510000	31.676586	...	16.296804	23.665552	NaN	28.134232	37.840000	49.725819	21.550467	NaN	NaN	NaN
2011-01-05	64.129395	8.285738	18.121916	NaN	10.03	38.974094	11.110217	22.713020	32.220001	32.183357	...	16.200367	23.745670	NaN	28.268110	37.799999	49.762482	21.672325	NaN	NaN	NaN
2011-01-06	63.737186	8.289999	18.084377	NaN	10.05	39.119389	11.083097	23.583750	32.270000	32.433369	...	16.186602	24.146229	NaN	28.465996	37.480000	48.222305	21.611391	NaN	NaN	NaN

	MMM	AOS	ABT	ABMD	ACN	ATVI	ADM	ADBE	ADP	AAP	...	WHR	WMB	WTW	WYNN	XEL	XLNX	YUM	ZBRA	ZBH	ZION
Date
2011-01-03	64.218163	8.125947	17.952976	9.800000	39.224346	11.318138	22.623722	31.290001	31.791464	62.732765	...	67.926483	11.205445	93.139076	77.632828	16.227924	23.569420	28.570745	38.200001	50.395058	21.907326
2011-01-04	64.129395	8.100383	18.121916	9.800000	38.966022	11.327178	22.608845	31.510000	31.676586	59.610516	...	66.950417	11.123855	91.576157	80.054619	16.296804	23.665552	28.134232	37.840000	49.725819	21.550467
2011-01-05	64.129395	8.285738	18.121916	10.030000	38.974094	11.110217	22.713020	32.220001	32.183357	59.687115	...	67.400887	11.141987	92.847679	81.087433	16.200367	23.745670	28.268110	37.799999	49.762482	21.672325
2011-01-06	63.737186	8.289999	18.084377	10.050000	39.119389	11.083097	23.583750	32.270000	32.433369	57.723724	...	65.966843	11.119320	93.112579	81.678650	16.186602	24.146229	28.465996	37.480000	48.222305	21.611391
2011-01-07	63.803787	8.409311	18.159452	9.890000	39.183979	10.938456	23.777237	32.040001	32.507687	59.265705	...	65.689018	11.296103	92.953644	84.570557	16.331245	24.010040	28.821018	37.599998	48.213131	21.385098
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
2011-12-23	62.401131	8.806977	21.821796	18.469999	43.558022	11.196037	22.084383	28.290001	37.792553	67.537323	...	39.581589	15.226687	103.337746	82.537613	19.524267	26.499556	35.055527	36.520000	48.781536	14.242986
2011-12-27	62.461864	8.845891	21.903597	18.360001	43.590965	11.168506	22.069183	28.500000	37.869064	68.181442	...	36.047947	15.277890	103.761589	85.186325	19.825743	26.450407	35.215870	36.590000	48.845718	14.304041
2011-12-28	61.604034	8.612420	21.747778	18.250000	43.533318	11.131798	21.560009	28.020000	37.423878	67.546921	...	35.854633	14.956691	102.516556	81.952354	19.710886	26.188274	35.025822	35.700001	48.735703	14.033657
2011-12-29	62.332783	8.837245	21.942547	18.379999	44.340408	11.287809	21.841192	28.309999	37.806461	67.633461	...	36.589203	15.152204	102.966888	82.792717	19.890350	26.409443	35.382153	35.980000	48.992397	14.373814
2011-12-30	62.044331	8.672951	21.903597	18.469999	43.838036	11.306164	21.734802	28.270000	37.569965	66.941238	...	36.689724	15.370993	102.781456	82.905281	19.840096	26.262003	35.043640	35.779999	48.974060	14.199376

	MMM	AOS	ABT	ABMD	ACN	ATVI	ADM	ADBE	ADP	AAP	...	WHR	WMB	WTW	WYNN	XEL	XLNX	YUM	ZBRA	ZBH	ZION
Date
2011-01-03	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
2011-01-04	64.218163	8.125947	17.952976	9.800000	39.224346	11.318138	22.623722	31.290001	31.791464	62.732765	...	67.926483	11.205445	93.139076	77.632828	16.227924	23.569420	28.570745	38.200001	50.395058	21.907326
2011-01-05	64.129395	8.100383	18.121916	9.800000	38.966022	11.327178	22.608845	31.510000	31.676586	59.610516	...	66.950417	11.123855	91.576157	80.054619	16.296804	23.665552	28.134232	37.840000	49.725819	21.550467
2011-01-06	64.129395	8.285738	18.121916	10.030000	38.974094	11.110217	22.713020	32.220001	32.183357	59.687115	...	67.400887	11.141987	92.847679	81.087433	16.200367	23.745670	28.268110	37.799999	49.762482	21.672325
2011-01-07	63.737186	8.289999	18.084377	10.050000	39.119389	11.083097	23.583750	32.270000	32.433369	57.723724	...	65.966843	11.119320	93.112579	81.678650	16.186602	24.146229	28.465996	37.480000	48.222305	21.611391
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
2011-12-23	61.467400	8.679438	21.677664	18.520000	43.302727	10.920726	21.810799	27.889999	37.444744	66.700890	...	39.094479	15.007899	101.907288	81.066956	19.380699	26.450407	34.675449	36.380001	48.625694	14.085989
2011-12-27	62.401131	8.806977	21.821796	18.469999	43.558022	11.196037	22.084383	28.290001	37.792553	67.537323	...	39.581589	15.226687	103.337746	82.537613	19.524267	26.499556	35.055527	36.520000	48.781536	14.242986
2011-12-28	62.461864	8.845891	21.903597	18.360001	43.590965	11.168506	22.069183	28.500000	37.869064	68.181442	...	36.047947	15.277890	103.761589	85.186325	19.825743	26.450407	35.215870	36.590000	48.845718	14.304041
2011-12-29	61.604034	8.612420	21.747778	18.250000	43.533318	11.131798	21.560009	28.020000	37.423878	67.546921	...	35.854633	14.956691	102.516556	81.952354	19.710886	26.188274	35.025822	35.700001	48.735703	14.033657
2011-12-30	62.332783	8.837245	21.942547	18.379999	44.340408	11.287809	21.841192	28.309999	37.806461	67.633461	...	36.589203	15.152204	102.966888	82.792717	19.890350	26.409443	35.382153	35.980000	48.992397	14.373814

	degree_centrality	betweenness_centrality	average_centrality	distance_degree_criteria	distance_correlation_criteria	distance_distance_criteria	average_distance
MMM	0.002227	0.000000	0.001114	2	2	7	3.666667
AOS	0.004454	0.022073	0.013264	6	6	5	5.666667
ABT	0.008909	0.056584	0.032746	9	9	8	8.666667
ABMD	0.002227	0.000000	0.001114	10	10	9	9.666667
ACN	0.006682	0.008899	0.007790	16	16	9	13.666667

	degree_centrality	betweenness_centrality	average_centrality	distance_degree_criteria	distance_correlation_criteria	distance_distance_criteria	average_distance
PRU	0.011136	0.639884	0.325510	7	7	0	4.666667
AMP	0.028953	0.540198	0.284576	5	5	2	4.000000
LNC	0.015590	0.526050	0.270820	6	6	1	4.333333
AME	0.020045	0.517430	0.268737	4	4	3	3.666667
GL	0.017817	0.452504	0.235160	8	8	1	5.666667
PH	0.031180	0.389924	0.210552	2	2	5	3.000000
EMR	0.006682	0.414403	0.210542	3	3	4	3.333333
TFC	0.008909	0.400791	0.204850	10	10	3	7.666667
USB	0.006682	0.391624	0.199153	9	9	2	6.666667
PNC	0.008909	0.353911	0.181410	11	11	4	8.666667
JPM	0.011136	0.351078	0.181107	12	12	5	9.666667
PFG	0.011136	0.345023	0.178079	8	8	1	5.666667
BRK-B	0.008909	0.332216	0.170563	13	13	6	10.666667
HST	0.006682	0.331202	0.168942	9	9	2	6.666667
ADP	0.011136	0.312888	0.162012	14	14	7	11.666667

Network Stock Portfolio Optimization¶

Context and Problem Statement¶

Proposed Approach¶

Loading the Libraries¶

Getting the S&P 500 Components¶

Getting the Price Data for all the S&P 500 components in the last 10 years¶

Missing Data due to Index Rebalancing¶

Getting Yearwise Data¶

Computing the Daily Log Returns¶

Computing the Correlation of Returns¶

Inferences¶

Creating Graphs¶

Filtering Graphs using MST¶

Computing Graph Statistics over Time¶

Portfolio Construction¶

Selecting Stocks based on Network Topological Parameters¶

Selecting the top 15 stocks for both Central Stocks and Peripheral Stocks¶

Performance Evalutation¶

	MMM	AOS	ABT	ABBV	ABMD	ACN	ATVI	ADM	ADBE	ADP	...	WYNN	XEL	XLNX	XYL	YUM	ZBRA	ZBH	ZION	ZTS	CEG
Date
2020-12-31	169.412521	53.700413	107.444366	101.195663	324.200012	257.353546	92.402596	49.218819	500.119995	172.915405	...	112.830002	64.846153	141.770004	100.827858	106.770256	384.329987	153.096832	42.901466	164.329178	NaN
2021-01-04	166.582382	52.818787	107.071472	99.552361	316.730011	252.673630	89.466812	48.691574	485.339996	165.810379	...	106.900002	63.863796	142.429993	98.747719	104.075439	378.130005	152.172821	42.397789	162.432663	NaN
2021-01-05	166.301315	53.161644	108.396248	100.581787	322.600006	254.112091	90.253006	49.638653	485.690002	165.349136	...	110.190002	63.241299	144.229996	98.628838	104.085266	380.570007	154.805740	43.069359	163.564590	NaN
2021-01-06	168.831009	54.993446	108.170555	99.712914	321.609985	256.890442	87.575966	51.649975	466.309998	164.770126	...	110.849998	64.641907	141.220001	102.789139	104.655716	394.820007	159.217133	47.908611	165.967484	NaN
2021-01-07	164.498520	55.669361	109.220566	100.780106	323.559998	259.314148	89.237915	51.191086	477.739990	165.702438	...	109.750000	63.377476	149.710007	107.454628	103.859055	409.100006	158.273254	49.370266	165.818558	NaN

	sp500	central_portfolio	peripheral_portfolio
DATE
2021-01-04	100000.000000	100000.000000	100000.000000
2021-01-05	100708.253955	100426.138652	99911.532803
2021-01-06	101283.288071	104249.598589	99165.855978
2021-01-07	102787.077946	105184.349194	99511.870053
2021-01-08	103351.573372	105127.033059	99586.017687