my capstone ! This was fun. In case you take some time to review this: Thanks in advance.
Please review the code below. You can find my finished presentation at mikesMeanVarOpt - Google Slides
import yfinance as yf
# OF NOTE: End date is non-inclusive. Also, random securities become unavailable intermittently.
# Watch in console for ommissions.
start_date = '2009-06-30'
end_date = '2023-04-04'
ticker = 'AAPL MSFT CRM HAL XOM AER CAH UPS PNC WYNN F'
result_data = yf.download(ticker, start_date, end_date)
result_data['Date'] = result_data.index
result_data = result_data[['Date', 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume']]
result_data.reset_index(drop=True, inplace=True)
#result_data.to_csv('rawProjectTickers.csv')
import pandas as pd
# Read csv procuced by yfDownloadTrial_ii.py
result_data = pd.read_csv('rawProjectTickers.csv')
# Dropping all columns except Adj Close, Date, and 'Unnamed 0'
result_data = result_data.drop(['Close','Close.1','Close.2','Close.3','Close.4','Close.5','Close.6','Close.7',\
'Close.8','Close.9','Close.10'], axis=1).reset_index(drop=True)
result_data = result_data.drop(columns=result_data.columns[result_data.columns.str.contains('Open')])
result_data = result_data.drop(columns=result_data.columns[result_data.columns.str.contains('High')])
result_data = result_data.drop(columns=result_data.columns[result_data.columns.str.contains('Low')])
result_data = result_data.drop(columns=result_data.columns[result_data.columns.str.contains('Volume')])
# Convert times to datetime objects.
result_data['Date'] = pd.to_datetime(result_data['Date'], format='%Y-%m-%d')
# Transform all NaNs/NaTs to zero. The next line changes 'Unnamed' column to ints.
# This wasn't necessary, but it helped me see what the 'real' indexes were meant to look like.
result_data = result_data.fillna(value=0)
result_data['Unnamed: 0'] = result_data['Unnamed: 0'].astype('int')
# This was a manual data massage to recreate the ticker labels on the2nd line.
result_data = result_data.rename(columns={'Adj Close':'AAPL', 'Adj Close.1':'AER', \
'Adj Close.2':'CAH', 'Adj Close.3':'CRM', 'Adj Close.4':'F', 'Adj Close.5':'HAL', \
'Adj Close.6':'MSFT', 'Adj Close.7':'PNC', 'Adj Close.8':'UPS', 'Adj Close.9':'WYNN', \
'Adj Close.10':'XOM'})
# Dropped the top row of the dataframe.
result_data = result_data.drop(0, axis=0)
# Drop the unnamed column to establish the new indices, disallow adding the
# old index column, and insure the result is a new dataframe.
result_data = result_data.drop('Unnamed: 0', axis=1).reset_index(drop=True)
# File to be read in and used in rowManipAndPrep_ii.py
#result_data.to_csv('modifiedProjectTickers.csv')
import pandas as pd
# Read csv produced by workStrictlyWithDf_ii.py
result_data = pd.read_csv('modifiedProjectTickers.csv')
result_data = result_data.drop('Unnamed: 0', axis=1)
# Creating a dictionary to name headers and then transforming it into a dataframe.
result_data2 = {'Date': [], 'AAPL': [], 'AER': [], 'CAH': [], 'CRM': [], 'F': [], 'HAL': [], \
'MSFT':[], 'PNC': [], 'UPS': [], 'WYNN': [], 'XOM': []}
result_data2 = pd.DataFrame(result_data2)
# Function to append a row.
def add_a_row(i_for_modified, i_for_original):
result_data2.loc[i_for_modified]= result_data.loc[i_for_original]
return
# Iterating through the csv:
# Grab the date for the current index. [5:7] is the two-digit month.
# Test to only examine the date for a Quarterly month.
# Make sure I don't go past the final element when comparing to the next one.
# Examine current index and the next one to find the last trading day of the month.
# Invoke the function above and manage the index for the new df.
i_for_new_df = 0
for i in range(len(result_data)):
examine_date = result_data.loc[i,'Date']
if (examine_date[5:7] == '03') | (examine_date[5:7] == '06') | \
(examine_date[5:7] == '09') | (examine_date[5:7] == '12'):
if i+1 in range(len(result_data)):
examine_date_plus_1 = result_data.loc[i+1,'Date']
if examine_date[5:7] != examine_date_plus_1[5:7]:
add_a_row(i_for_new_df, i)
i_for_new_df += 1
# Write the new (fully prepared) csv file.
#result_data2.to_csv('adjClose.csv')
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from rf import return_portfolios, optimal_portfolio
# Read csv produced by workStrictlyWithDf_ii.py
result_data = pd.read_csv('adjClose.csv', index_col=1)#.reset_index(drop=True)
result_data = result_data.drop('Unnamed: 0', axis=1)
my_var = result_data.var()#.reset_index()
#my_var.to_csv('outAllVariance.csv') # This can be done as a Series or as a Dataframe (1 dimension).
my_cov = result_data.cov()#.reset_index() # BUT this is based on Adj Close, NOT on
# quarterly returns!!
# Calculate quarterly returns, and expected_returns
selected = list(result_data.columns[:]) # Each ticker
quarterly_returns = result_data[selected].pct_change()
expected_returns = quarterly_returns.mean()
#expected_returns.to_csv('outExpectedReturns.csv')
quarterly_cov = quarterly_returns.cov() # THIS is cov based on quarterly returns!
#quarterly_cov.to_csv('outQuarRetCovariance.csv')
correlQR = quarterly_returns.corr()
#correlQR.to_csv('correlationBasedOnQuarRets.csv')
single_asset_std=np.sqrt(np.diagonal(quarterly_cov)) # All the standard
# Call to rf.py functions
randomized_weights = return_portfolios(expected_returns, quarterly_cov)
#randomized_weights.to_csv('fullListOfRandWts.csv')
weights, returns, risks = optimal_portfolio(quarterly_returns[1:]) # IS THIS [1:] OMITTING ANYTHING?
# Initial plot, followed by the risk/return efficient frontier.
randomized_weights.plot.scatter(x='Volatility', y='Returns', figsize=(10,8), grid=True)
plt.plot(risks, returns, color='aquamarine', marker='v')
# Plot the individual assets.
plt.scatter(single_asset_std,expected_returns,marker='X',color='red',s=100)
plt.xlabel('Volatility (Std. Deviation)')
plt.ylabel('Expected Returns')
plt.title('Efficient Frontier')
plt.show()