Had a lot of fun using the fundamentals to play with the data. Took me about 2 hours to complete. I know i have a lot of repetitive code that i can replace with functions just need to figure that out.
import csv
with open("insurance.csv") as insurance_data:
insurance_data_dict = csv.DictReader(insurance_data)
age_list = []
sex_list = []
bmi_list = []
children_list = []
smoker_list = []
region_list = []
charges_list = []
for row in insurance_data_dict:
age_list.append(float(row['age']))
sex_list.append(row['sex'])
bmi_list.append(float(row['bmi']))
children_list.append(float(row['children']))
smoker_list.append(row['smoker'])
region_list.append(row['region'])
charges_list.append(float(row['charges']))
patient_dictionary = {'Ages': age_list, 'Sexes': sex_list, "BMIs": bmi_list, "Children": children_list, "Smoker": smoker_list, "Region": region_list, "Charges": charges_list}
class PatientData:
def __init__(self,age, sex, bmi, children, smoker, region, charges): # This class takes in list arguments to analyze data
# creating a class with methods that will give me
# averages and populations for given data
self.age = age
self.sex = sex
self.bmi = bmi
self.children = children
self.smoker = smoker
self.region = region
self.charges = charges
def population(self):
total_pop = 0
for age in self.age:
total_pop += 1
return "There are {} patients in this data set".format(total_pop)
def average_age(self):
total_age = 0
for age in self.age:
total_age += age
return "Average patient age: {}".format(str(total_age/len(self.age)))
def sex_dist(self):
num_males = 0
num_females = 0
for sex in self.sex:
if sex == 'male':
num_males +=1
if sex == 'female':
num_females += 1
return "There are {} males and {} females in this data set.".format(num_males, num_females)
def average_bmi(self):
total_bmi = 0
for bmi in self.bmi:
total_bmi += bmi
return "Average Patient BMI: {}".format(str(total_bmi/ len(self.bmi)))
def num_smokers(self):
num_smokers = 0
num_nonsmokers = 0
for yes_no in self.smoker:
if yes_no == 'yes':
num_smokers += 1
if yes_no == 'no':
num_nonsmokers +=1
return "There are {} smokers and {} non-smokers".format(num_smokers, num_nonsmokers)
def average_num_children(self):
total_children = 0
for child in self.children:
total_children += child
return "The average patient has {} children".format(str(total_children/len(self.children)))
def average_charges(self):
total_charges = 0
for charge in self.charges:
total_charges += charge
return "Average Charges: {} dollars".format(str(total_charges/len(self.charges)))
patient_data = PatientData(age_list, sex_list, bmi_list, children_list, smoker_list, region_list, charges_list)
print(patient_data.population())
print(patient_data.average_age())
print(patient_data.average_bmi())
print(patient_data.sex_dist())
print(patient_data.num_smokers())
print(patient_data.average_num_children())
print(patient_data.average_charges())
male_dictionary = {"Ages": [], "Sexes": [], "BMIs": [], "Children": [], "Smoker": [], "Region" : [], "Charges": []}
female_dictionary = {"Ages": [], "Sexes": [], "BMIs": [], "Children": [], "Smoker": [], "Region" : [], "Charges": []}
smoker_dictionary = {"Ages": [], "Sexes": [], "BMIs": [], "Children": [], "Smoker": [], "Region" : [], "Charges": []}
nonsmoker_dictionary = {"Ages": [], "Sexes": [], "BMIs": [], "Children": [], "Smoker": [], "Region" : [], "Charges": []}
parent_dictionary = {"Ages": [], "Sexes": [], "BMIs": [], "Children": [], "Smoker": [], "Region" : [], "Charges": []}
nonparent_dictionary = {"Ages": [], "Sexes": [], "BMIs": [], "Children": [], "Smoker": [], "Region" : [], "Charges": []}
#Lists are indexed from 0 up to 1337
#There is DEFINATLEY a function to replace all this looping
for i in range(1338):
if patient_dictionary["Sexes"][i] == 'male':
male_dictionary["Ages"].append(patient_dictionary["Ages"][i])
male_dictionary["Sexes"].append(patient_dictionary["Sexes"][i])
male_dictionary["BMIs"].append(patient_dictionary["BMIs"][i])
male_dictionary["Children"].append(patient_dictionary["Children"][i])
male_dictionary["Smoker"].append(patient_dictionary["Smoker"][i])
male_dictionary["Region"].append(patient_dictionary["Region"][i])
male_dictionary["Charges"].append(patient_dictionary["Charges"][i])
if patient_dictionary["Sexes"][i] == 'female':
female_dictionary["Ages"].append(patient_dictionary["Ages"][i])
female_dictionary["Sexes"].append(patient_dictionary["Sexes"][i])
female_dictionary["BMIs"].append(patient_dictionary["BMIs"][i])
female_dictionary["Children"].append(patient_dictionary["Children"][i])
female_dictionary["Smoker"].append(patient_dictionary["Smoker"][i])
female_dictionary["Region"].append(patient_dictionary["Region"][i])
female_dictionary["Charges"].append(patient_dictionary["Charges"][i])
if patient_dictionary["Smoker"][i] == 'yes':
smoker_dictionary["Ages"].append(patient_dictionary["Ages"][i])
smoker_dictionary["Sexes"].append(patient_dictionary["Sexes"][i])
smoker_dictionary["BMIs"].append(patient_dictionary["BMIs"][i])
smoker_dictionary["Children"].append(patient_dictionary["Children"][i])
smoker_dictionary["Smoker"].append(patient_dictionary["Smoker"][i])
smoker_dictionary["Region"].append(patient_dictionary["Region"][i])
smoker_dictionary["Charges"].append(patient_dictionary["Charges"][i])
if patient_dictionary["Smoker"][i] == 'no':
nonsmoker_dictionary["Ages"].append(patient_dictionary["Ages"][i])
nonsmoker_dictionary["Sexes"].append(patient_dictionary["Sexes"][i])
nonsmoker_dictionary["BMIs"].append(patient_dictionary["BMIs"][i])
nonsmoker_dictionary["Children"].append(patient_dictionary["Children"][i])
nonsmoker_dictionary["Smoker"].append(patient_dictionary["Smoker"][i])
nonsmoker_dictionary["Region"].append(patient_dictionary["Region"][i])
nonsmoker_dictionary["Charges"].append(patient_dictionary["Charges"][i])
if patient_dictionary["Children"][i] > 0:
parent_dictionary["Ages"].append(patient_dictionary["Ages"][i])
parent_dictionary["Sexes"].append(patient_dictionary["Sexes"][i])
parent_dictionary["BMIs"].append(patient_dictionary["BMIs"][i])
parent_dictionary["Children"].append(patient_dictionary["Children"][i])
parent_dictionary["Smoker"].append(patient_dictionary["Smoker"][i])
parent_dictionary["Region"].append(patient_dictionary["Region"][i])
parent_dictionary["Charges"].append(patient_dictionary["Charges"][i])
if patient_dictionary["Children"][i] == 0:
nonparent_dictionary["Ages"].append(patient_dictionary["Ages"][i])
nonparent_dictionary["Sexes"].append(patient_dictionary["Sexes"][i])
nonparent_dictionary["BMIs"].append(patient_dictionary["BMIs"][i])
nonparent_dictionary["Children"].append(patient_dictionary["Children"][i])
nonparent_dictionary["Smoker"].append(patient_dictionary["Smoker"][i])
nonparent_dictionary["Region"].append(patient_dictionary["Region"][i])
nonparent_dictionary["Charges"].append(patient_dictionary["Charges"][i])
#Now that i have dictionaries for several demographics, i can use my class to analyze each group more in depth
male_data = PatientData(male_dictionary["Ages"], male_dictionary["Sexes"], male_dictionary["BMIs"], male_dictionary["Children"], male_dictionary["Smoker"], male_dictionary["Region"], male_dictionary["Charges"])
print("For males, " + '\n' + male_data.population() + '\n' + male_data.average_age() + '\n' + male_data.average_bmi()
+ '\n' + male_data.sex_dist() + '\n' + male_data.num_smokers() + '\n' + male_data.average_num_children() + '\n' + male_data.average_charges())
female_data = PatientData(female_dictionary["Ages"], female_dictionary["Sexes"], female_dictionary["BMIs"], female_dictionary["Children"], female_dictionary["Smoker"], female_dictionary["Region"], female_dictionary["Charges"])
print("For females, " + '\n' + female_data.population() + '\n' + female_data.average_age() + '\n' + female_data.average_bmi()
+ '\n' + female_data.sex_dist() + '\n' + female_data.num_smokers() + '\n' + female_data.average_num_children() + '\n' + female_data.average_charges())
smoker_data = PatientData(smoker_dictionary["Ages"], smoker_dictionary["Sexes"], smoker_dictionary["BMIs"], smoker_dictionary["Children"], smoker_dictionary["Smoker"], smoker_dictionary["Region"], smoker_dictionary["Charges"])
print("For smokers, " + '\n' + smoker_data.population() + '\n' + smoker_data.average_age() + '\n' + smoker_data.average_bmi()
+ '\n' + smoker_data.sex_dist() + '\n' + smoker_data.num_smokers() + '\n' + smoker_data.average_num_children() + '\n' + smoker_data.average_charges())
nonsmoker_data = PatientData(nonsmoker_dictionary["Ages"], nonsmoker_dictionary["Sexes"], nonsmoker_dictionary["BMIs"], nonsmoker_dictionary["Children"], nonsmoker_dictionary["Smoker"], nonsmoker_dictionary["Region"], nonsmoker_dictionary["Charges"])
print("For non-smokers, " + '\n' + nonsmoker_data.population() + '\n' + nonsmoker_data.average_age() + '\n' + nonsmoker_data.average_bmi()
+ '\n' + nonsmoker_data.sex_dist() + '\n' + nonsmoker_data.num_smokers() + '\n' + nonsmoker_data.average_num_children() + '\n' + nonsmoker_data.average_charges())
parent_data = PatientData(parent_dictionary["Ages"], parent_dictionary["Sexes"], parent_dictionary["BMIs"], parent_dictionary["Children"], parent_dictionary["Smoker"], parent_dictionary["Region"], parent_dictionary["Charges"])
print("For Parents, " + '\n' + parent_data.population() + '\n' + parent_data.average_age() + '\n' + parent_data.average_bmi()
+ '\n' + parent_data.sex_dist() + '\n' + parent_data.num_smokers() + '\n' + parent_data.average_num_children() + '\n' + parent_data.average_charges())
nonparent_data = PatientData(nonparent_dictionary["Ages"], nonparent_dictionary["Sexes"], nonparent_dictionary["BMIs"], nonparent_dictionary["Children"], nonparent_dictionary["Smoker"], nonparent_dictionary["Region"], nonparent_dictionary["Charges"])
print("For non-Parents, " + '\n' + nonparent_data.population() + '\n' + nonparent_data.average_age() + '\n' + nonparent_data.average_bmi()
+ '\n' + nonparent_data.sex_dist() + '\n' + nonparent_data.num_smokers() + '\n' + nonparent_data.average_num_children() + '\n' + nonparent_data.average_charges())