import csv
#csv library is used for importing datasets
#generally stored as Excel datasheets.
# #before importing the dataset, it is crucial to first investigate on our own so that we can acquaint ourselves with the data.
# 1) names of columns and rows.
#
#
# 2) any noticeable missing data.
#
#
# 3) types of values(numeric or category).
#
#
# Investigating these will allow us to think more in depth about our analysis and what information is of use to us. This will also help us in planning out how we will be importing the data in our python notebook.
# In[47]:
#creating empty lists based on the columns in the dataset.
ages=[]
sexes=[]
bmis=[]
no_children=[]
smoker_status=[]
insurance_charge=[]
regions=[]
# In[48]:
import pandas as pd
df=pd.read_csv(r'C:\Users\jai yadav\OneDrive\Desktop\college assignments\python-portfolio-example-solution\insurance.csv')
ages=df['age'].tolist()
sexes=df['sex'].tolist()
bmis=df['bmi'].tolist()
no_children=df['children'].tolist()
smoker_status=df['smoker'].tolist()
regions=df['region'].tolist()
insurance_charge=df['charges'].tolist()
data_info=(ages,sexes,bmis,no_children,smoker_status,regions,insurance_charge)
print(data_info)
# In[49]:
class PatientsInfo:
# init method that takes in each list parameter
def __init__(self, patients_ages, patients_sexes, patients_bmis, patients_num_children,
patients_smoker_statuses, patients_regions, patients_charges):
self.patients_ages = patients_ages
self.patients_sexes = patients_sexes
self.patients_bmis = patients_bmis
self.patients_num_children = patients_num_children
self.patients_smoker_statuses = patients_smoker_statuses
self.patients_regions = patients_regions
self.patients_charges = patients_charges
# method that calcules the average ages of the patients in insurance.csv
def analyze_ages(self):
# initialize total age at zero
total_age = 0
# iterate through all ages in the ages list
for age in self.patients_ages:
# sum of the total age
total_age += int(age)
# return total age divided by the length of the patient list
return ("Average Patient Age: " + str(round(total_age/len(self.patients_ages), 2)) + " years")
# method that calculates the number of males and females in insurance.csv
def analyze_sexes(self):
# initialize number of males and females to zero
females = 0
males = 0
# iterate through each sex in the sexes list
for sex in self.patients_sexes:
# if female add to female variable
if sex == 'female':
females += 1
# if male add to male variable
elif sex == 'male':
males += 1
# print out the number of each
print("Number of females: ", females)
print("Number of males: ", males)
# method to find each unique region patients are from
def unique_regions(self):
# intialize empty list
unique_regions = []
# iterate through each region in regions list
for region in self.patients_regions:
# if the region is not already in the unique regions list
# then add it to the unique regions list
if region not in unique_regions:
unique_regions.append(region)
# return unique regions list
return unique_regions
# method to find average yearly medical charges for patients in insurance.csv
def average_charges(self):
# initalize total_charges variable
total_charges = 0
# iterate through charges in patients charges list
# add each charge to total_charge
for charge in self.patients_charges:
total_charges += float(charge)
# return the average charges rounded to the hundredths place
return ("Average Yearly Medical Insurance Charges: " +
str(round(total_charges/len(self.patients_charges), 2)) + " dollars.")
# method to create dictionary with all patients information
def create_dictionary(self):
self.patients_dictionary = {}
self.patients_dictionary["age"] = [int(age) for age in self.patients_ages]
self.patients_dictionary["sex"] = self.patients_sexes
self.patients_dictionary["bmi"] = self.patients_bmis
self.patients_dictionary["children"] = self.patients_num_children
self.patients_dictionary["smoker"] = self.patients_smoker_statuses
self.patients_dictionary["regions"] = self.patients_regions
self.patients_dictionary["charges"] = self.patients_charges
return self.patients_dictionary
# In[50]:
patient_info = PatientsInfo(ages, sexes, bmis, no_children, smoker_status, regions, insurance_charge)
# In[51]:
patient_info.analyze_sexes()
# In[52]:
patient_info.analyze_ages()
# In[53]:
patient_info.unique_regions()
# In[54]:
patient_info.average_charges()