This project was good at challenging my ability to work with functions. I spent about 15 hours putting the code together. It was good work to practice declaring a class with methods, and calling those methods. I felt like the level of difficulty was good for the questions I was trying to solve.
I feel like I could have shortened up my print statements with another method, but I didn’t clean that part of the code.
I didn’t write specific questions in the code, but it is implied I am finding to distinguish highest to lowest cost based on each header. I built some methods that do a great job of sorting the information.
My code can be found here:
import csv
import statistics
import collections
#class for pulling information from each dictionary
class WranglingData:
def init(self):
pass
#print(‘Class for manipulating each dictionary’)
#define function for pulling values from dictionary
def reading_dictionary(self, dictionary, heading):
self.dictionary = dictionary
self.heading = heading
list_of_heading = []
# filter the values by two attributes
def filter_by_two(self, dictionary, heading1, attribute1, heading2, attribute2 = 'null'):
self.dictionary = dictionary
self.heading1 = heading1
self.heading2 = heading2
self.attribute1 = attribute1
self.attribute2 = attribute2
filtered_list = []
#scan through the dictionary and save values
for line in self.dictionary:
if line.get(self.heading1) == attribute1:
filtered_list.append(line[heading2])
filtered_list = self.string_conversion(filtered_list)
return filtered_list
# sort the data by ages and another value
def sort_by_age(self, dictionary, filter_by):
self.dictionary = dictionary
# Get values sorted by heading used as a filter in a dictionary
new_dictionary = {}
for line in self.dictionary:
if line.get(filter_by) in new_dictionary:
new_dictionary[line.get(filter_by)].append(float(line.get('charges')))
else:
new_dictionary.update({line.get(filter_by) : [float(line.get('charges'))]})
#find the mean costs by each item in newly created dictionary
for value in new_dictionary:
new_dictionary[value] = statistics.mean(new_dictionary.get(value))
return new_dictionary
def print_dictionary(self, dictionary):
for key in sorted(dictionary):
print("Key: ", key, "Value: ", str(round(dictionary[key],2)))
#convert to int or float a list of strings and returns a list of ints or floats
def string_conversion(self, list_arg, new_type = 'float'):
conversion_list = []
#filter by type of value
if new_type == 'int':
conversion_list = (list(map(int,list_arg)))
elif new_type == 'float':
conversion_list = (list(map(float,list_arg)))
else:
print("Types can only be int or float")
return conversion_list
with open(‘insurance.csv’, newline = ‘\n’) as insurance_data:
new_insurance_data = csv.DictReader(insurance_data)
health_data = WranglingData()
new_list = []
for i in new_insurance_data:
new_list.append(i)
#*********do men or women pay more in insurance on average? ******
print("\n\nThe cost averages of men and women are: ")
print("Men are charged: ", round(statistics.mean(health_data.filter_by_two(new_list,‘sex’,‘male’,‘charges’)),2))
print("Women are charged: ", round(statistics.mean(health_data.filter_by_two(new_list,‘sex’,‘female’,‘charges’)),2))
#What is the most expensive regaion?*
print("\n\nThe cost average breakdown by region is:")
print("Northwest is charged: ", round(statistics.mean(health_data.filter_by_two(new_list,‘region’,‘northwest’,‘charges’)),2))
print("Southwest is charged: ", round(statistics.mean(health_data.filter_by_two(new_list,‘region’,‘southwest’,‘charges’)),2))
print("Northeast is charged: ", round(statistics.mean(health_data.filter_by_two(new_list,‘region’,‘northeast’,‘charges’)),2))
print("Southeast is charged: ", round(statistics.mean(health_data.filter_by_two(new_list,‘region’,‘southeast’,‘charges’)),2))
#What is the premium for being a smoker?*
print("\n\nThe cost average for smokers and non-smokers is :")
print("Smokers are charged: ", round(statistics.mean(health_data.filter_by_two(new_list,‘smoker’,‘yes’,‘charges’)),2))
print("Non-smokers are charged: ", round(statistics.mean(health_data.filter_by_two(new_list,‘smoker’,‘no’,‘charges’)),2))
#What is the premium by age?*
print("\n\nThe cost average by age is :")
print(health_data.print_dictionary(health_data.sort_by_age(new_list, ‘age’)))
#What is the premium by number of children?*
print("\n\nThe cost average by number of children is :")
print(health_data.print_dictionary(health_data.sort_by_age(new_list, ‘children’)))
#What is the premium by body mass index?*
print("\n\nThe cost average by body mass index is :")
print(health_data.print_dictionary(health_data.sort_by_age(new_list, ‘bmi’)))