Hi guys!
Here’s my take on the capstone project for the Analyze Financial Data with Python skill path. Feel free to have a look. I appreciate any comments or recommendations for improvement.
Thanks!
Hi guys!
Here’s my take on the capstone project for the Analyze Financial Data with Python skill path. Feel free to have a look. I appreciate any comments or recommendations for improvement.
Thanks!
Here’s the code i used for this assignment:
import pandas as pd
import numpy as np
import pandas_datareader.data as web
import cvxopt as opt
import matplotlib.pyplot as plt
from cvxopt import blas, solvers
def return_portfolios(expected_returns, cov_matrix):
port_returns = []
port_volatility = []
stock_weights = []
selected = (expected_returns.axes)[0]
num_assets = len(selected)
num_portfolios = 5000
for single_portfolio in range(num_portfolios):
#get stock portfolio weights by dividing random number assigned to each stock with the sum of random numbers
weights = np.random.random(num_assets)
weights /= np.sum(weights)
returns = np.dot(weights, expected_returns)
volatility = np.sqrt(np.dot(weights.T, np.dot(cov_matrix, weights)))
port_returns.append(returns)
port_volatility.append(volatility)
stock_weights.append(weights)
portfolio = {'Returns': port_returns,
'Volatility': port_volatility}
for counter,symbol in enumerate(selected):
portfolio[symbol +' Weight'] = [Weight[counter] for Weight in stock_weights]
df = pd.DataFrame(portfolio)
column_order = ['Returns', 'Volatility'] + [stock+' Weight' for stock in selected]
df = df[column_order]
return df
def optimal_portfolio(returns):
n = returns.shape[1]
returns = np.transpose(returns.values)
N = 100
mus = [10**(5.0 * t/N - 1.0) for t in range(N)]
# Convert to cvxopt matrices
S = opt.matrix(np.cov(returns))
pbar = opt.matrix(np.mean(returns, axis=1))
# Create constraint matrices
G = -opt.matrix(np.eye(n)) # negative n x n identity matrix
h = opt.matrix(0.0, (n ,1))
A = opt.matrix(1.0, (1, n))
b = opt.matrix(1.0)
# Calculate efficient frontier weights using quadratic programming
portfolios = [solvers.qp(mu*S, -pbar, G, h, A, b)['x'] for mu in mus]
## CALCULATE RISKS AND RETURNS FOR FRONTIER
returns = [blas.dot(pbar, x) for x in portfolios]
risks = [np.sqrt(blas.dot(x, S*x)) for x in portfolios]
## CALCULATE THE 2ND DEGREE POLYNOMIAL OF THE FRONTIER CURVE
m1 = np.polyfit(returns, risks, 2)
x1 = np.sqrt(m1[2] / m1[0])
# CALCULATE THE OPTIMAL PORTFOLIO
wt = solvers.qp(opt.matrix(x1 * S), -pbar, G, h, A, b)['x']
return np.asarray(wt), returns, risks
startdate = '2015/5/1'
enddate = '2020/5/1'
symbol = ['DUK', 'MSFT', 'AMD', 'ABBV', 'BABA']
stocks = ['Duke Energy', 'Microsoft', 'Advanced Micro Devices Inc.', 'Abbvie', 'Alibaba']
#['Adj Close']
adjclose = web.DataReader(symbol[0], start=startdate, end=enddate, data_source='yahoo')['Adj Close']
for i in range(1,len(symbol)):
series = web.DataReader(symbol[i] , start=startdate, end=enddate, data_source='yahoo')['Adj Close']
adjclose = pd.concat([adjclose, series], axis = 1)
adjclose.columns = symbol
print(adjclose)
plt.figure(figsize = (25, 25))
for i in range(len(symbol)):
plt.subplot(3, 2, i + 1)
adjclose[symbol[i]].plot()
plt.xlabel('Date')
plt.ylabel('Adj Close')
plt.title(str(symbol[i]), fontsize = 20, fontweight='bold')
plt.savefig('stockperformance.jpeg')
plt.show()
simpleror = adjclose.pct_change()
print(simpleror)
simple_returns = simpleror.mean()
print(simple_returns)
plt.figure()
plt.bar(symbol, simple_returns)
plt.title('Average Daily Returns of Stocks')
plt.xlabel('Stock')
plt.ylabel('Simple Returns')
plt.savefig('dailyreturns.jpeg')
plt.show()
simpleror_var = simpleror.var()
print(simpleror_var)
print('The riskiest stock in the portfolio, exhibiting a variance of {} in daily simple returns over the past 5 years is {}.'.format(max(simpleror_var), simpleror_var.idxmax()))
plt.figure()
plt.bar(symbol, simpleror_var)
plt.title('Variance of Daily Stock Returns')
plt.xlabel('Stock')
plt.ylabel('Variance')
plt.savefig('variance.jpeg')
plt.show()
simpleror_std = simpleror.std()
print(simpleror_std)
simpleror_corr = simpleror.corr()
#simpleror_corr = simpleror_corr.where(simpleror_corr < 1.00)
print(simpleror_corr)
a, b = simpleror_corr.stack().idxmin()
c, d = simpleror_corr.where(simpleror_corr < 1.00).stack().idxmax()
print('{}/{} exhibits the lowest correlation in daily simple returns.'.format(a, b))
print('{}/{} exhibits the greatest correlation in daily simple returns.'.format(c, d))
Return a set of portfolio options with volatility and annualized returns
portfolio_options = return_portfolios(simple_returns, simpleror.cov())
print(portfolio_options)
weights, returns, risks = optimal_portfolio(simpleror[1:])
print(weights.shape)
simp_returns = simple_returns.to_numpy().reshape(1,5)
print(simp_returns.shape)
opt_returns = np.asscalar(np.dot(simp_returns, weights))
print(opt_returns)
plt.figure(figsize = (15,15))
portfolio_options.plot.scatter(x='Volatility', y='Returns', fontsize=12)
plt.title('Efficient Frontier', fontsize = 15)
plt.xlabel('Volatility')
plt.ylabel('Daily Returns')
plt.axis([min(portfolio_options['Volatility']) * 0.5, max((portfolio_options['Volatility'])) * 1.5, min(portfolio_options['Returns']) * 0.5, max((portfolio_options['Returns'])) * 1.5])
#Place markers for single stock portfolios
std = np.sqrt(np.diagonal(simpleror.cov()))
plt.scatter(std, simple_returns, marker = 'x', color = 'red', s = 200)
plt.plot(risks, returns, color = 'purple')
plt.grid(color='grey', linestyle=':', linewidth=2)
plt.savefig('efficientfrontier.jpeg')
plt.show()
returns_tolerance = np.array([0.0015, 0.0020])
volatility_tolerance = np.array([0.017, 0.022])
df_lower_volatility = portfolio_options[(portfolio_options.Returns > returns_tolerance[0]) & (portfolio_options.Volatility < volatility_tolerance[0])]
df_higher_returns = portfolio_options[(portfolio_options.Returns > returns_tolerance[1]) & (portfolio_options.Volatility < volatility_tolerance[1])]
print(df_lower_volatility)
print(df_higher_returns)
print(portfolio_options.iloc[4384])
print(portfolio_options.iloc[4581])
(5, 1)
(1, 5)
0.0032835199914771087
plt.figure(figsize = (15,15))
portfolio_options.plot.scatter(x='Volatility', y='Returns', fontsize=12)
plt.title('Efficient Frontier', fontsize = 15)
plt.xlabel('Volatility')
plt.ylabel('Daily Returns')
plt.axis([min(portfolio_options['Volatility']) * 0.5, max((portfolio_options['Volatility'])) * 1.5, min(portfolio_options['Returns']) * 0.5, max((portfolio_options['Returns'])) * 1.5])
#Place markers for single stock portfolios
std = np.sqrt(np.diagonal(simpleror.cov()))
plt.scatter(std, simple_returns, marker = 'x', color = 'red', s = 200)
plt.plot(risks, returns, color = 'purple')
plt.grid(color='grey', linestyle=':', linewidth=2)
plt.savefig('efficientfrontier.jpeg')
plt.show()
<Figure size 1080x1080 with 0 Axes>
returns_tolerance = np.array([0.0015, 0.0020])
volatility_tolerance = np.array([0.017, 0.022])
df_lower_volatility = portfolio_options[(portfolio_options.Returns > returns_tolerance[0]) & (portfolio_options.Volatility < volatility_tolerance[0])]
df_higher_returns = portfolio_options[(portfolio_options.Returns > returns_tolerance[1]) & (portfolio_options.Volatility < volatility_tolerance[1])]
print(df_lower_volatility)
print(df_higher_returns)
Returns Volatility DUK Weight MSFT Weight AMD Weight ABBV Weight \
1464 0.001501 0.016963 0.232939 0.383125 0.253342 0.004610
3082 0.001503 0.016983 0.131725 0.431681 0.227222 0.034093
4384 0.001514 0.016963 0.085769 0.543129 0.210829 0.078017
BABA Weight
1464 0.125985
3082 0.175278
4384 0.082256
Returns Volatility DUK Weight MSFT Weight AMD Weight ABBV Weight \
4581 0.002006 0.021957 0.011947 0.507696 0.399157 0.039691
BABA Weight
4581 0.041509
print(portfolio_options.iloc[4384])
print(portfolio_options.iloc[4581])
Returns 0.001514
Volatility 0.016963
DUK Weight 0.085769
MSFT Weight 0.543129
AMD Weight 0.210829
ABBV Weight 0.078017
BABA Weight 0.082256
Name: 4384, dtype: float64
Returns 0.002006
Volatility 0.021957
DUK Weight 0.011947
MSFT Weight 0.507696
AMD Weight 0.399157
ABBV Weight 0.039691
BABA Weight 0.041509
Name: 4581, dtype: float64
Did you copy the return_portfolios and optimal_portfolio functions directly from the portfolio optimization section of the course?
I am currently trying to do this project and find myself with no idea how to do the final section:
“Perform a mean-variance portfolio optimization that shows the efficient frontier for the group of stocks you have selected. If the investor is less risky, how should she allocate her funds across the portfolio? If she is more risky, how should she allocate her funds? Indicate multiple investment options at different risk levels and specify the returns.”
I’m sure I could copy and paste the functions from the course into my code and use them but that feels like I’m skipping a step.
How did you know how/why to use cvxopt?
Hi thanks for your reply!
The cvxopt package is used in my optimal_portfolio() function, which was derived from the source code in page 9/12 of the lesson “Mean-Variance Portfolio Optimization”.
However, this might not be that critical to my analysis outcome, but was more for a visualization aspect of my efficient frontier.
From the graph obtained, by adding gridlines, I set the limits of my intended returns (0.0015 and 0.0020), and their corresponding volatility (just a rough gauge) from the efficient frontier plot.
returns_tolerance = np.array([0.0015, 0.0020])
volatility_tolerance = np.array([0.017, 0.022])
Subsequently, I extracted the list of portfolios that satisfy my preset conditions for returns and volatility from the list of portfolio options (portfolio_options), and select my final portfolios from what was extracted, with the option of a lower return but lower volatility portfolio (df_lower_volatility) and the option of a higher return but higher volatility and therefore riskier portfolio (df_higher_returns).
df_lower_volatility = portfolio_options[(portfolio_options.Returns > returns_tolerance[0]) & (portfolio_options.Volatility < volatility_tolerance[0])]
df_higher_returns = portfolio_options[(portfolio_options.Returns > returns_tolerance[1]) & (portfolio_options.Volatility < volatility_tolerance[1])]
print(df_lower_volatility)
print(df_higher_returns)
Hope this helps!
I think the highest return “portfolio” will just be 100% of the previously best performing stock. If you add any lower-performing stock, the expected returns should go down.