My project submission for the analyze data with python skill path. Thank you for any replies and comments.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
coviddata = pd.read_csv(r"\Conditions_Contributing_to_COVID-19_Deaths__by_State_and_Age__Provisional_2020-2023.csv")
#death depending on age
coviddata.info()
coviddata.describe()
coviddataage = coviddata.groupby("Age Group", as_index=False).sum()
coviddataage
np.quantile(coviddataage["COVID-19 Deaths"], (0.25,0.5,0.75))
def iqr(data):
return np.quantile(data, 0.75) - np.quantile(data, 0.25)
iqr(coviddataage["COVID-19 Deaths"])
#visualization
fig = plt.figure(figsize=(10,5))
fig
coviddataagedropped = coviddataage.drop(8, axis=0)
plt.pie(coviddataagedropped["COVID-19 Deaths"], labels=coviddataagedropped["Age Group"], textprops={"fontsize":8}, rotatelabels=90)
plt.title("COVID-19 death depending on age")
plt.annotate("biggest age Group", (1,-0.9))
plt.figure(figsize=(15,5))
plt.bar(coviddataagedropped["Age Group"], coviddataagedropped["COVID-19 Deaths"])
coviddataage.describe()
#statistical tests
from scipy.stats import ttest_1samp
t_stat, p_value = ttest_1samp(coviddataage["COVID-19 Deaths"], popmean=66585.0)
#test if difference to group of young people is significant
t_stat
p_value
#Coviddeaths are strongly related to age as it can be seen in the graphs and the interquartile range. Moreover,
#when testing for the difference to the mean of death of young people, the ttest shows a significant difference.