-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscript.py
81 lines (59 loc) · 2.44 KB
/
script.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import seaborn
import pandas as pd
import numpy as np
import codecademylib3
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import codecademylib3
# Load the data
transactions = pd.read_csv('transactions_modified.csv')
print(transactions.head())
print(transactions.info())
# How many fraudulent transactions?
print(transactions[transactions["isFraud"] == 1].shape[0])
# Summary statistics on amount column
print(transactions[["amount"]].describe())
# Create isPayment field
transactions['isPayment'] = np.where((transactions['type'] == 'DEBIT') | (transactions['type'] == 'PAYMENT'), 1, 0)
# Create isMovement field
transactions['isMovement'] = np.where((
(transactions['type'] == 'CASH_OUT') | (transactions['type'] == 'TRANSFER')
),1,0)
# Create accountDiff field
transactions['accountDiff'] = transactions['oldbalanceDest'] - transactions['oldbalanceOrg']
# Create features and label variables
features = ['amount', 'isPayment', 'isMovement', 'accountDiff']
label = 'isFraud'
# Split dataset
X_train, X_test, y_train, y_test = train_test_split(transactions[features], transactions[label], test_size=0.3)
# Normalize the features variables
ssc = StandardScaler()
X_train_scaled = ssc.fit_transform(X_train)
X_test_scaled = ssc.transform(X_test)
# Fit the model to the training data
lr = LogisticRegression()
lr.fit(X_train_scaled, y_train)
# Score the model on the training data
training_score = lr.score(X_train_scaled, y_train)
print(f"Training score: {training_score}")
# Score the model on the test data
test_score = lr.score(X_test_scaled, y_test)
print(f"Test score: {test_score}")
# Print the model coefficients
print(f"Model coefficients: {lr.coef_}")
# New transaction data
transaction1 = np.array([123456.78, 0.0, 1.0, 54670.1])
transaction2 = np.array([98765.43, 1.0, 0.0, 8524.75])
transaction3 = np.array([543678.31, 1.0, 0.0, 510025.5])
# Create a new transaction
transaction4 = np.array([600000.31, 0.0, 1.0, 68999999.0])
# Combine new transactions into a single array
sample_transactions = np.stack((transaction1,transaction2,transaction3,transaction4))
# Normalize the new transactions
sample_transactions = ssc.transform(sample_transactions)
# Predict fraud on the new transactions
print(lr.predict(sample_transactions))
print(lr.predict_proba(sample_transactions))
# Show probabilities on the new transactions