# Hello, I will try to answer the question: how socio-demographic variables impact on the medical insurance charges

# depending variable is charges

# independing variables are

```
# age
# sex
# BMI
# no. of children
# smoking
# region
```

# INTRODUCTION first, importing everything what is needed for this project

import csv

!pip install pandas

import pandas as pd

from decimal import *

# INTRODUCTION second, converting csv file into a Python dictionary below in order to be able to analize this data later

dict_file =

with open(‘insurance.csv’) as insurance_file:

variable = csv.DictReader(insurance_file)

for row in variable:

dict_file.append(row)

# INTRODUCTION third, take a look only to the headings (columns) in the .csv file, which will be explore later

with open(‘insurance.csv’) as insurance_file:

variable = insurance_file.readline()

print(variable)

age,sex,bmi,children,smoker,region,charges

# INTRODUCTION fourth, let’s go throught the basic statistics and then start the analysis

df = pd.read_csv(“insurance.csv”)

df.describe().round(2)

# ANALYSIS

# creating new variables which will be used in the next steps

ages =

is_male = 0

is_female = 0

male_list =

female_list =

bmis =

children = 0

has_children_list =

has_no_children_list =

has_1_child_list =

has_3_children_list =

is_smoker = 0

is_not_smoker = 0

smoker_list =

not_smoker_list =

from_southwest = 0

from_southeast = 0

from_northwest = 0

from_northeast = 0

charges =

with open(‘insurance.csv’) as insurance_file:

variable = csv.DictReader(insurance_file)

for row in variable:

ages.append(row[‘age’])

bmis.append(row[‘bmi’])

charges.append(row[‘charges’])

with open(‘insurance.csv’) as insurance_file:

variable = csv.DictReader(insurance_file)

for row in variable:

if row[‘sex’] == ‘male’:

is_male+=1

if row[‘sex’] == ‘female’:

is_female+=1

if row[‘children’] != ‘0’:

children+= int(row[‘children’])

if row[‘smoker’] == ‘yes’:

is_smoker+=1

if row[‘smoker’] == ‘no’:

is_not_smoker +=1

if row[‘region’] == ‘southwest’:

from_southwest +=1

if row[‘region’] == ‘southeast’:

from_southeast +=1

if row[‘region’] == ‘northwest’:

from_northwest +=1

if row[‘region’] == ‘northeast’:

from_northeast +=1

with open(‘insurance.csv’) as insurance_file:

variable = csv.DictReader(insurance_file)

for row in variable:

if row[‘children’] == ‘1’:

has_1_child_list.append(row)

if row[‘children’] == ‘3’:

has_3_children_list.append(row)

with open(‘insurance.csv’) as insurance_file:

variable = csv.DictReader(insurance_file)

for row in variable:

if row[‘sex’] == ‘male’:

male_list.append(row)

if row[‘sex’] == ‘female’:

female_list.append(row)

if row[‘children’] != ‘0’:

has_children_list.append(row)

if row[‘children’] == ‘0’:

has_no_children_list.append(row)

if row[‘smoker’] == ‘yes’:

smoker_list.append(row)

if row[‘smoker’] == ‘no’:

not_smoker_list.append(row)

# let’s check whether age impact on charges

class ChargeByAge():

charges_of_18 =

charges_of_30 =

charges_of_40 =

charges_of_50 =

charges_of_60 =

```
def get_charges(self,one_list):
for row in one_list:
if int(row['age']) >= 18 and int(row['age']) < 30:
self.charges_of_18.append(row['charges'])
if int(row['age']) >= 30 and int(row['age']) < 40:
self.charges_of_30.append(row['charges'])
if int(row['age']) >= 40 and int(row['age']) < 50:
self.charges_of_40.append(row['charges'])
if int(row['age']) >= 50 and int(row['age']) < 60:
self.charges_of_50.append(row['charges'])
if int(row['age']) >= 60:
self.charges_of_60.append(row['charges'])
number_18 = 0
number_30 = 0
number_40 = 0
number_50 = 0
number_60 = 0
for row in self.charges_of_18:
number_18 += float(row)
result = Decimal(number_18 / len(self.charges_of_18))
output = round(result,2)
print("The average charge for a person below 30 is {num}.".format(num=output))
for row in self.charges_of_30:
number_30 += float(row)
result = Decimal(number_30 / len(self.charges_of_30))
output = round(result,2)
print("The average charge for a person bewteen 30 and 39 is {num}.".format(num=output))
for row in self.charges_of_40:
number_40 += float(row)
result = Decimal(number_40 / len(self.charges_of_40))
output = round(result,2)
print("The average charge for a person bewteen 40 and 49 is {num}.".format(num=output))
for row in self.charges_of_50:
number_50 += float(row)
result = Decimal(number_50 / len(self.charges_of_50))
output = round(result,2)
print("The average charge for a person bewteen 50 and 59 is {num}.".format(num=output))
for row in self.charges_of_60:
number_60 += float(row)
result = Decimal(number_60 / len(self.charges_of_60))
output = round(result,2)
print("The average charge for a person over 60 {num}.".format(num=output))
```

average_by_age = ChargeByAge()

average_by_age.get_charges(dict_file)

The average charge for a person below 30 is 9182.49.

The average charge for a person bewteen 30 and 39 is 11738.78.

The average charge for a person bewteen 40 and 49 is 14399.20.

The average charge for a person bewteen 50 and 59 is 16495.23.

The average charge for a person over 60 21248.02.

# let’s check whether gender impact on charges

def average_charges_by_gender(one_list, gender):

addition = 0

for row in one_list:

addition += float(row[‘charges’])

result = (addition/len(one_list))

output = round(result,2)

print(“The average charges for a {gen} is ${res}.”.format(gen=gender,res=str(output)))

average_charges_by_gender(male_list,“male”)

average_charges_by_gender(female_list, “female”)

The average charges for a male is $13956.75.

The average charges for a female is $12569.58.

# let’s check whether BMI impact on charges

class ChargeByBMI():

charges_of_0_20 =

charges_of_20_30 =

charges_of_30_40 =

charges_of_40_55 =

```
def get_charges(self,one_list):
for row in one_list:
if float(row['bmi']) < 20:
self.charges_of_0_20.append(row['charges'])
if float(row['bmi']) >= 20 and float(row['age']) < 30:
self.charges_of_20_30.append(row['charges'])
if float(row['bmi']) >= 30 and float(row['age']) < 40:
self.charges_of_30_40.append(row['charges'])
if float(row['bmi']) >= 40:
self.charges_of_40_55.append(row['charges'])
number_0_20 = 0
number_20_30 = 0
number_30_40 = 0
number_40_55 = 0
for row in self.charges_of_0_20:
number_0_20 += float(row)
result = Decimal(number_0_20 / len(self.charges_of_0_20))
output = round(result,2)
print("The average charge for a person below 20 BMI is {num}.".format(num=output))
for row in self.charges_of_20_30:
number_20_30 += float(row)
result = Decimal(number_20_30 / len(self.charges_of_20_30))
output = round(result,2)
print("The average charge for a person bewteen 20 and 30 BMI is {num}.".format(num=output))
for row in self.charges_of_30_40:
number_30_40 += float(row)
result = Decimal(number_30_40 / len(self.charges_of_30_40))
output = round(result,2)
print("The average charge for a person bewteen 30 and 40 BMI is {num}.".format(num=output))
for row in self.charges_of_40_55:
number_40_55 += float(row)
result = Decimal(number_40_55 / len(self.charges_of_40_55))
output = round(result,2)
print("The average charge for a person over 40 BMI is {num}.".format(num=output))
```

average_by_bmi = ChargeByBMI()

average_by_bmi.get_charges(dict_file)

The average charge for a person below 20 BMI is 8838.56.

The average charge for a person bewteen 20 and 30 BMI is 9325.10.

The average charge for a person bewteen 30 and 40 BMI is 12644.09.

The average charge for a person over 40 BMI is 16784.62.

# let’s check whether having child impact on charges

class Average_per_child():

def get_averages(self,one_dict,children):

charges = 0

length = 0

for row in one_dict:

if row[‘children’] == children:

charges += float(row[‘charges’])

length += 1

average = charges/length

result = round(average,2)

print(“The average charge for the {child} child is ${res}.”.format(child=children, res = result))

averages = Average_per_child()

averages.get_averages(dict_file, “0”)

averages.get_averages(dict_file, “1”)

averages.get_averages(dict_file, “2”)

averages.get_averages(dict_file, “3”)

The average charge for the 0 child is $12365.98.

The average charge for the 1 child is $12731.17.

The average charge for the 2 child is $15073.56.

The average charge for the 3 child is $15355.32.

# let’s check whether smoking impact on charges

def smoker_impact(one_list):

sum_charges = 0

for row in one_list:

sum_charges += float(row[‘charges’])

average = round(sum_charges / len(one_list),2)

print(“The average charge for a smoker in this dataset is ${a}.”.format(a=average))

def non_smoker_impact(one_list):

sum_charges = 0

for row in one_list:

sum_charges += float(row[‘charges’])

average = round(sum_charges / len(one_list),2)

print(“The average charge for a non smoker in this dataset is ${a}.”.format(a=average))

smoker_impact(smoker_list)

non_smoker_impact(not_smoker_list)

The average charge for a smoker in this dataset is $32050.23.

The average charge for a non smoker in this dataset is $8434.27.

# let’s check whether region impact on charges

class Average_per_region():

def get_averages(self,one_dict,region):

charges = 0

length = 0

for row in one_dict:

if row[‘region’] == region:

charges += float(row[‘charges’])

length += 1

average = charges/length

result = round(average,2)

print(“The average charge for the {reg} region is ${res}.”.format(reg=region, res = result))

averages = Average_per_region()

averages.get_averages(dict_file, “southeast”)

averages.get_averages(dict_file, “southwest”)

averages.get_averages(dict_file, “northwest”)

averages.get_averages(dict_file, “northeast”)

The average charge for the southeast region is $14735.41.

The average charge for the southwest region is $12346.94.

The average charge for the northwest region is $12417.58.

The average charge for the northeast region is $13406.38.

# That’s all. Thx for reading