Hello guys!
Here is my medical insurance project and I would like to get some feedbacks from you. Thanks you all very much!
import csv
with open('insurance.csv', 'r') as insurance_file:
data_file = csv.reader(insurance_file)
data_list = list(data_file)
#print(data_list) #To make sure if the transformation worked!
#let's find the age average from the data set:
#In the cvs file, the data is stored like this: age,sex,bmi,children,smoker,region,charges
def delete_first_index(lst, index): #this function will delete the first index of the list according to the criteria we want to get. If we want a list with the age data, we use index = 0
new_lst = []
for value in lst:
new_lst.append(value[index])
return new_lst[1:]
def calculate_totals(lst): #this will help us calculate the totals of each category
counter = 0
for value in ages:
counter += int(value)
return counter
ages = (delete_first_index(data_list, 0))
ages_sum = calculate_totals(ages)
average_age = ages_sum/len(ages)
print("The average age from this dataset is: {:.0f}".format(average_age))
#Let's now analyze where are the majority of individuals/patients from:
regions = delete_first_index(data_list, 5)
nw_total = regions.count('northwest')
ne_total = regions.count('northeast')
sw_total = regions.count('southwest')
se_total = regions.count('southeast')
regions_dictionary = {}
regions_dictionary.update([('Northwest', nw_total), ('Northeast', ne_total), ('Southwest', sw_total), ('Southeast', se_total)])
#print(regions_dictionary)
max_value = max(regions_dictionary, key=regions_dictionary.get)
print("The region where the majority of patients came from is {} with {} patients".format(max_value, regions_dictionary.get(max_value)))
min_value = min(regions_dictionary, key=regions_dictionary.get)
print("The region where we received less patients is {} with {} patients".format(min_value, regions_dictionary.get(min_value)))
#average age for someone who has a child:
children = delete_first_index(data_list, 3)
#We already have the age list above ready to use, so...
only_one_kid = []
for (age, child) in zip(ages, children):
if child == '1':
only_one_kid.append(age)
total_only_one_child = 0
for value in only_one_kid:
total_only_one_child += int(value)
avg_age_for_one_child_only = total_only_one_child/len(only_one_kid)
print("The average age for patients who has only 1 child is {:.0f}".format(avg_age_for_one_child_only))
sexes = delete_first_index(data_list, 1) #sex is in index=1 of the cvs file
women = 0
men = 0
for gender in sexes:
if gender == 'female':
women += 1
else:
men += 1
total_patients = women + men
def gender_percentage(x):
percentage = x * 100/total_patients
return percentage
male_ratio = gender_percentage(men)
female_ratio = gender_percentage(women)
print("Male ratio is {:.2f}%".format(male_ratio))
print("Female ratio is {:.2f}%".format(female_ratio))