Hi!

I just finalised my code for the “Predicting Credit Card Fraud” on the Data Science: Machine Learning Specialist skillpath, but my model score and predictions don’t add up (…seems to good). I have tried to identify where my error is, but I can’t figure it out. I’ve enclosed my code and would love some help (…and feedback!).

```
import seaborn
import pandas as pd
import numpy as np
import codecademylib3
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
# Load the data
transactions = pd.read_csv('transactions.csv')
print(transactions.head(10))
print(transactions.info)
# Summary statistics on amount column
print(transactions['amount'].describe())
# Create isPayment field
def cond_isPayment(x):
if x == 'PAYMENT':
return 1
elif x == 'DEBIT':
return 1
else:
return 0
func = np.vectorize(cond_isPayment)
isPayment = func(transactions['type'])
transactions['isPayment'] = isPayment
print(transactions['isPayment'].head(10))
# Create isMovement field
def cond_isMovement(x):
if x == 'CASH_OUT':
return 1
elif x == 'TRANSFER':
return 1
else:
return 0
func = np.vectorize(cond_isMovement)
isMovement = func(transactions['type'])
transactions['isMovement'] = isMovement
print(transactions['isMovement'].head(5))
# Create accountDiff field
transactions['accountDiff'] = transactions['oldbalanceOrg'] - transactions['oldbalanceDest']
print(transactions['accountDiff'].head(5))
# Create features and label variables
X = transactions[['amount', 'isPayment', 'isMovement', 'accountDiff']]
print(X.head(5))
y = transactions['isFraud']
print(y.head(5))
# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3)
# Normalize the features variables
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
# Fit the model to the training data
lr = LogisticRegression()
lr.fit(X_train_scaled, y_train)
# Score the model on the training data
print(lr.score(X_train_scaled, y_train))
# Score the model on the test data
print(lr.score(X_test_scaled, y_test))
# Print the model coefficients
print(lr.coef_)
# New transaction data
transaction1 = np.array([123456.78, 0.0, 1.0, 54670.1])
transaction2 = np.array([98765.43, 1.0, 0.0, 8524.75])
transaction3 = np.array([543678.31, 1.0, 0.0, 510025.5])
# Combine new transactions into a single array
sample_transactions = np.array([transaction1, transaction2, transaction3])
print(sample_transactions)
# Normalize the new transactions
sample_transactions_scaled = scaler.fit_transform(sample_transactions)
# Predict fraud on the new transactions
predicted_fraud = lr.predict(sample_transactions_scaled)
print(predicted_fraud)
# Show probabilities on the new transactions
predicted_prob_fraud = lr.predict_proba(sample_transactions_scaled)
print(predicted_prob_fraud)
```