I was doing this project and just noticed that unlike all projects there is no video walkthrough available for this one. So thought it might help someone who is having trouble completing it.
from bs4 import BeautifulSoup
import requests
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
webpage = requests.get(âhttps://s3.amazonaws.com/codecademy-content/courses/beautifulsoup/cacao/index.htmlâ)
soup = BeautifulSoup(webpage.content, âhtml.parserâ)
ratings_data = soup.find_all(attrs={âclassâ: âRatingâ})
ratings =
for rating in ratings_data[1:]:
ratings.append(float(rating.string))
print(ratings)
plt.hist(ratings)
plt.show()
company_data = soup.select(â.Companyâ)
companies =
for company in company_data[1:]:
companies.append(company.string)
print(companies)
dict = {
âCompanyâ: companies,
âRatingâ: ratings
}
df = pd.DataFrame.from_dict(dict)
df.head()
avg_ratings = df.groupby(âCompanyâ).Rating.mean()
top_ten = avg_ratings.nlargest(10)
print(top_ten)
cocoa_data = soup.select(â.CocoaPercentâ)
cocoa_pcts =
for cocoa_pct in cocoa_data[1:]:
cocoa_pcts.append(int(float(cocoa_pct.string[:-1])))
print(cocoa_pcts)
df[âCocoaPercentageâ] = cocoa_pcts
df.head()
plt.cla()
plt.scatter(df.CocoaPercentage, df.Rating)
z = np.polyfit(df.CocoaPercentage, df.Rating, 1)
line_function = np.poly1d(z)
plt.plot(df.CocoaPercentage, line_function(df.CocoaPercentage), ârââ)
plt.show()