Biodiversity Project Python

Hey everyone! Let me know whether or not you like my code for the Park Biodiversity Dataset. Any feedback is helpful!


import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import chi2_contingency

observations = pd.read_csv('https://raw.githubusercontent.com/lapianahayden/Biodiversity_Project/main/observations.csv?token=GHSAT0AAAAAACL4YHYF5ENFBMPC2JJZ5I2IZMENNCQ')
print(observations.head())
species = pd.read_csv('https://raw.githubusercontent.com/lapianahayden/Biodiversity_Project/main/species_info.csv?token=GHSAT0AAAAAACL4YHYFW7LFJ3M7R5WAIAPWZMENNXQ')
print(species.head())

obs_desc = observations.describe()
print(obs_desc)
spe_desc = species.describe()
print(spe_desc)
conserve_hist = species['conservation_status'].hist()
biodiversity = species.merge(observations)
#print(biodiversity)
status_by_species_type = biodiversity.groupby(['category','conservation_status'])['observations'].count().reset_index()
print(status_by_species_type)
contingency_table = pd.pivot_table(status_by_species_type, values = 'observations', index = 'category', columns = 'conservation_status',fill_value = 0)
print(contingency_table)
chi2, p, dof, expected = chi2_contingency(contingency_table)
print(chi2)
print(p)
#print(dof)
#print(expected)
species_per_park = biodiversity.groupby(['park_name','category'])['observations'].sum().reset_index()
print(species_per_park)
max_species = species_per_park.groupby('park_name')['category','observations'].max().reset_index()
print(max_species)

# The P-Value of 2.7594442674479634e-49 shows that the difference between species and their conservation status
# is statistically significant. This means that the differences in conservation status across different species
# categories are unlikely to be due to random chance, suggesting a meaningful relationship between the type of 
# species and their conservation status.