My portfolio project: U.S. Medical Insurance Costs category

import csv

with open (‘insurance.csv’, ‘r’, newline=‘’) as csv_file:
csvreader = csv.DictReader(csv_file, delimiter = ‘,’)
insurances = {}
count = 0
for row in csvreader:
record = {}
count +=1
#print(row)
#age,sex,bmi,children,smoker,region,charges
record[‘age’] = row[‘age’]
record[‘sex’] = row[‘sex’]
record[‘bmi’] = row[‘bmi’]
record[‘children’] = row[‘children’]
record[‘smoker’] = row[‘smoker’]
record[‘region’] = row[‘region’]
record[‘charges’] = row[‘charges’]
insurances[count]= record
print(‘Loaded ’ + str(count) + ’ records.’)
#print(insurances)

def calc_average_cost(dictionary):
average_cost = 0.0
total_cost = 0.0
count = 0
record = {}
for key in dictionary:
count +=1
#print(insurances[key][“charges”])
total_cost += float(insurances[key][“charges”])
average_cost = total_cost/count
return round(average_cost,2)
def calc_average_age(dictionary):
average = 0.0
total = 0.0
count = 0
record = {}
for key in dictionary:
count +=1
total += float(insurances[key][“age”])
average = total/count
return round(average,2)

def get_regions(dictionary):
regions = {}
region = ‘’
count_insurances = 0
total_charges=0.0
for key in dictionary:
region = dictionary[key][‘region’]
if region not in regions:
total_charges = round(float(dictionary[key][‘charges’]),2)
regions[region]={“region”:region, “count_insurances”: 1, “total_charges”: float(dictionary[key][“charges”])}
else:
count_insurances = regions[region][‘count_insurances’]+1
total_charges = round(regions[region][‘total_charges’] + float(dictionary[key][‘charges’]),2)
regions[region].update({“region”:region,“count_insurances”: count_insurances, “total_charges”: total_charges})
return regions

regional_data = {}
print(str(calc_average_cost(insurances)))
print(str(calc_average_age(insurances)))
regional_data = get_regions(insurances)
print(regional_data)
for record in regional_data.values():
print(“Region: " + record[‘region’])
print(” count of insurances: " + str(record[‘count_insurances’]))
print(" Total value: " + str(round(record[‘total_charges’],2)))
print(" Average insurance : " + str(round(record[‘total_charges’]/record[‘count_insurances’],2)))

I think it would be better if this was a jupyter notebook and pushed to a GitHub repository. It would also help if there was an intro, citation for the data, and a list of possible data questions, code output, along with a wrap up/conclusion of your findings. Remember, you’re doing EDA, and telling a story here.
Search the forums here for others that have completed this project and posted a GH link just to see how they’ve handled the project as an example.

1 Like

Thank, was trying to figure out how to post, will prepare it better nextbtime!