Here is my code. Any comments appreciated. It was done using the Jupyter IDE, but Github is still confusing me…
Jean
Import necessary libraries
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
winners_steel = pd.read_csv("/users/jeanlawlis/downloads/roller_coaster_starting/Golden_Ticket_Award_Winners_Steel.csv")
winners_wood = pd.read_csv("/users/jeanlawlis/downloads/roller_coaster_starting/Golden_Ticket_Award_Winners_Wood.csv")
#sort by name and location
sorted_wood = winners_wood.sort_values(“Name”)
sorted_steel = winners_steel.sort_values(“Name”)
sorted_wood[“index”] = range(len(sorted_wood))
for easier management, create a np array of numerical values, and a separate np array of strings, First create the segregated df’s then convert to np arrays
#print(“winners wood”, winners_wood[‘Rank’], winners_wood[“Year Built”], winners_wood[‘Points’],winners_wood[“Year of Rank”])
coaster_data_wood = sorted_wood[[‘Rank’, “Year Built”, ‘Points’,“Year of Rank”]].copy()
coaster_descr_wood = sorted_wood[[“Name”, “Park”,“Location”,“Supplier”]].copy()
2
Create a function to plot rankings over time for 1 roller coaster
def plot_ranking_over_time(name, parkname):
print("\n Ranking over time of the ",name, "Roller Coaster at ", parkname)
temprat =
tempyear =
counter = 0
for index in range(len(winners_wood)):
if (coaster_descr_wood.loc[index, “Name”] == name) & (coaster_descr_wood.loc[index, “Park”] == parkname):
temprat.append(coaster_data_wood.loc[index,“Points”])
tempyear.append(coaster_data_wood.loc[index,“Year of Rank”])
else: continue
plt.clf()
plt.plot(tempyear, temprat)
plt.show()
plot_ranking_over_time(“Boulder Dash”, “Lake Compounce”)
3
Create a plot of El Toro ranking over time
plot_ranking_over_time(“El Toro”, “Six Flags Great Adventure”)
Create a plot of El Toro and Boulder dash hurricanes
4
Create a function to plot top n rankings over time
start by sorting by ranking, then split; return new df
def top_rankings(coasters, n):
sorted_coasters = coasters.sort_values(“Rank”)
highest_n_rankings = sorted_coasters.iloc[:n]
return highest_n_rankings
top = top_rankings(sorted_wood, 5)
print("\n", “Top Rankings of Wood Roller Coasters”)
plt.bar(top[“Year of Rank”], top[“Points”])
plt.show()
top = top_rankings(sorted_steel, 6)
print("\n", “Top Rankings of Steel Roller Coasters”)
plt.clf()
plt.bar(top[“Year of Rank”], top[“Points”])
plt.show()
5
load roller coaster data
roller_coaster_data = pd.read_csv("/users/jeanlawlis/downloads/roller_coaster_starting/roller_coasters.csv")
#print(roller_coaster_data.dtypes)
6
Create a function to plot histogram of column values
def histogram(coaster_data):
plt.clf()
plt.hist(coaster_data)
plt.show()
print("\n Coaster Speed Distribution ")
histogram(roller_coaster_data[“speed”])
Create histogram of roller coaster speed
print("\n Coaster Length Distribution" )
Create histogram of roller coaster length
histogram(roller_coaster_data[‘length’])
Create histogram of roller coaster number of inversions
print("\n Number of Inversions ")
histogram(roller_coaster_data[‘num_inversions’])
Create a function to plot histogram of height values
print("\n Roller Coaster Height ")
histogram(roller_coaster_data[‘height’])
Create a histogram of roller coaster height
7
Create a function to plot inversions by coaster at park
def inversions_by_coaster_by_park(park):
inversions =
rollers =
for index in range(len(roller_coaster_data)):
if roller_coaster_data.loc[index,“park”] == park:
inversions.append(roller_coaster_data.loc[index,“num_inversions”])
rollers.append(roller_coaster_data.loc[index,“name”])
plt.clf()
print("Park Name: ", park)
fig, ax = plt.subplots()
plt.hist(inversions,label = rollers)
ax.set_xlabel("coaster")
ax.set_title('Inversions Per Coaster')
print("Rollers : ", rollers)
list_ticks = list(range(len(rollers)))
ax.set_xticks(list_ticks)
ax.set_xticklabels(rollers, rotation = 70)
ax.set_yticks([0,1,2,3,4,5,6], )
ax.set_ylabel("number of inversions")
plt.show()
roller_coaster_data.sort_values(“park”)
#print(roller_coaster_data.head(30))
inversions_by_coaster_by_park(“Disneyland Park”)
Create barplot of inversions by roller coasters
8
Create a function to plot a pie chart of status.operating
def pie(operational, trait):
#To count the number of occurrences in e.g. a column in a
#dataframe, use Pandas value_counts() method.
sorted = operational.sort_values(trait)
set = sorted[trait].value_counts()
list_traits = sorted[trait].unique()
plt.clf()
plt.pie(set)
plt.legend(list_traits, loc = “upper right”, bbox_to_anchor = (2.0, 0.9))
plt.show()
pie(roller_coaster_data, “status”)
Create pie chart of roller coasters
9
Create a function to plot scatter of any two columns
def scatter(dataframe, column1, column2):
plt.clf()
print("\n Showing ", column1, " on the x axis, and “, column2, " on the y”)
plt.scatter(dataframe[column1],dataframe[column2])
plt.show()
scatter(roller_coaster_data, “speed”, “height”)
Create a function to plot scatter of speed vs height
Create a scatter plot of roller coaster height by speed
scatter(roller_coaster_data, “height”, “speed”)