import pandas as pd
import numpy as np
import os
import math
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import matplotlib.pyplot as plt
import seaborn as sns
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
# In[2]:
df = pd.read_csv('cover_data.csv')
# In[28]:
print(df)
df.isna().sum()
print(df['class'])
# In[3]:
labels = df.iloc[:,-1]
features = df.iloc[:,0:-1]
# In[5]:
print(labels)
# In[6]:
from sklearn.compose import ColumnTransformer
features = pd.get_dummies(features)
x_train,x_test,y_train,y_test = train_test_split(features, labels, test_size = 0.3, random_state = 42)
ct = ColumnTransformer([("only numeric", StandardScaler(),features.select_dtypes(include = ['float64','int64']).columns)],remainder='passthrough')
x_train = ct.fit_transform(x_train)
x_test = ct.transform(x_test)
# In[7]:
from tensorflow.keras.callbacks import ModelCheckpoint
checkpoint = ModelCheckpoint('weights.hdf5', monitor='val_loss', save_best_only=True)
from tensorflow.keras.callbacks import ReduceLROnPlateau
reduce_lr = ReduceLROnPlateau(monitor='val_loss', mode='min', verbose=0, patience=50, factor=0.2)
es = EarlyStopping(monitor = 'val_loss', patience = 100)
# In[30]:
my_model = tf.keras.models.Sequential(name="deep_trees")
my_input = tf.keras.layers.InputLayer(input_shape = (features.shape[1],))
my_model.add(my_input)
my_model.add(tf.keras.layers.Dense(128,activation='relu'))
my_model.add(tf.keras.layers.Dense(128,activation='relu'))
my_model.add(tf.keras.layers.Dense(64,activation='relu'))
my_model.add(tf.keras.layers.Dense(32,activation='relu'))
my_model.add(tf.keras.layers.Dense(8,activation = 'softmax'))
print(my_model.summary())
my_opt = tf.keras.optimizers.Adam(learning_rate = 0.01)
my_model.compile(loss = 'sparse_categorical_crossentropy', metrics =['accuracy'],optimizer = my_opt)
history = my_model.fit(x_train,y_train,epochs = 400, batch_size = 512,validation_split = 0.2, verbose = 1, callbacks = [reduce_lr,checkpoint,es])
acc = my_model.evaluate(x_test,y_test,verbose=0)
print(acc)
# In[46]:
fig, axs = plt.subplots(1,2, figsize=(15, 8))
plt.suptitle('Loss functions and Number of Epochs')
(ax1, ax2) = axs
#first plot Categorical Crossentropy vs epochs
ax1.plot(history.history['loss'], label='train')
ax1.plot(history.history['val_loss'], label='validation')
ax1.legend(loc="upper right")
ax1.set_xlabel('Number of Epochs')
ax1.set_ylabel('sparse Categorical Crossentropy')
#second plot categorical accuracy vs epochs
ax2.plot(history.history['accuracy'], label='train')
ax2.plot(history.history['val_accuracy'], label='validation')
ax2.legend(loc="upper right")
ax2.set_xlabel('Number of Epochs')
ax2.set_ylabel('Accuracy')
print("Testing accuracy is:", str(math.floor(my_model.evaluate(x_test,y_test,verbose = 0)[1]*100))+"%")
# In[33]:
y_pred = my_model.predict(x_test)
# Convert the pred to discrete values
y_pred = np.argmax(y_pred, axis=1)
class_names = ['Spruce/Fir', 'Lodgepole Pine',
'Ponderosa Pine', 'Cottonwood/Willow',
'Aspen', 'Douglas-fir', 'Krummholz']
print(classification_report(y_test, y_pred, target_names=class_names))
# In[34]:
from sklearn.metrics import classification_report, confusion_matrix, f1_score
import seaborn as sns
print('')
#getting predictions for futher evaluation
preds = my_model.predict(x_test)
preds = np.argmax(preds, axis=1)
#looking at the values that were predicted and the true values
print('The predicted values are:')
print(preds[:9])
print('')
print('The actual values are:')
print(y_test[:9])
print('')
# In[35]:
def plot_heatmap(class_names, y_pred, y_test):
cm = confusion_matrix(y_test, y_pred)
fig, ax = plt.subplots(figsize=(15, 15))
heatmap = sns.heatmap(cm, fmt='g', cmap='Blues', annot=True, ax=ax)
ax.set_xlabel('Predicted class')
ax.set_ylabel('True class')
ax.set_title('Confusion Matrix')
ax.xaxis.set_ticklabels(class_names)
ax.yaxis.set_ticklabels(class_names)
# In[36]:
my_model.load_weights('weights.hdf5')
# In[37]:
class_names = ['Spruce/Fir', 'Lodgepole Pine',
'Ponderosa Pine', 'Cottonwood/Willow',
'Aspen', 'Douglas-fir', 'Krummholz']
plot_heatmap(class_names, preds, y_test)