Heart Attack Classification With Pytorch


        
        data-science


        
        machine-learning


        
        project

4 min read.

import pandas as pd
import numpy as np
import seaborn as sns
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torch.utils.data import TensorDataset
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.preprocessing import StandardScaler

# set default figure size
plt.rcParams['figure.figsize'] = (15, 7.0)

heart_data = '../input/heart-attack-analysis-prediction-dataset/heart.csv'

heart_df = pd.read_csv(heart_data)

heart_df.head()

	age	sex	cp	trtbps	chol	fbs	restecg	thalachh	exng	oldpeak	slp	thall	output
0	63	1	3	145	233	1	0	150	0	2.3	0	1	1
1	37	1	2	130	250	0	1	187	0	3.5	0	2	1
2	41	0	1	130	204	0	0	172	0	1.4	2	2	1
3	56	1	1	120	236	0	1	178	0	0.8	2	2	1
4	57	0	0	120	354	0	1	163	1	0.6	2	2	1

# describe the data
heart_df.describe()

	age	sex	cp	trtbps	chol	fbs	restecg	thalachh	exng	oldpeak	slp	caa	thall	output
count	303.000000	303.000000	303.000000	303.000000	303.000000	303.000000	303.000000	303.000000	303.000000	303.000000	303.000000	303.000000	303.000000	303.000000
mean	54.366337	0.683168	0.966997	131.623762	246.264026	0.148515	0.528053	149.646865	0.326733	1.039604	1.399340	0.729373	2.313531	0.544554
std	9.082101	0.466011	1.032052	17.538143	51.830751	0.356198	0.525860	22.905161	0.469794	1.161075	0.616226	1.022606	0.612277	0.498835
min	29.000000	0.000000	0.000000	94.000000	126.000000	0.000000	0.000000	71.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
25%	47.500000	0.000000	0.000000	120.000000	211.000000	0.000000	0.000000	133.500000	0.000000	0.000000	1.000000	0.000000	2.000000	0.000000
50%	55.000000	1.000000	1.000000	130.000000	240.000000	0.000000	1.000000	153.000000	0.000000	0.800000	1.000000	0.000000	2.000000	1.000000
75%	61.000000	1.000000	2.000000	140.000000	274.500000	0.000000	1.000000	166.000000	1.000000	1.600000	2.000000	1.000000	3.000000	1.000000
max	77.000000	1.000000	3.000000	200.000000	564.000000	1.000000	2.000000	202.000000	1.000000	6.200000	2.000000	4.000000	3.000000	1.000000

# checking data types
heart_df.dtypes

age           int64
sex           int64
cp            int64
trtbps        int64
chol          int64
fbs           int64
restecg       int64
thalachh      int64
exng          int64
oldpeak     float64
slp           int64
caa           int64
thall         int64
output        int64
dtype: object

# drop duplicates if any
heart_df.drop_duplicates()

# check missing valus
heart_df.isna().sum()

age         0
sex         0
cp          0
trtbps      0
chol        0
fbs         0
restecg     0
thalachh    0
exng        0
oldpeak     0
slp         0
caa         0
thall       0
output      0
dtype: int64

# check output column class distribution
sns.countplot(x='output', data=heart_df).set_title("output Column Distribution")

Text(0.5, 1.0, 'output Column Distribution')

png

# check sex column class distribution
sns.countplot(x='sex', data=heart_df).set_title("Sex Column Distribution")

Text(0.5, 1.0, 'Sex Column Distribution')

png

# box plot for output and cholestrol level
sns.boxplot(x="output",y="chol",data=heart_df)

<AxesSubplot:xlabel='output', ylabel='chol'>

png

# box plot for output and cholestrol level
sns.boxplot(x="output",y="thalachh",data=heart_df)

<AxesSubplot:xlabel='output', ylabel='thalachh'>

png

# box plot for output and cholestrol level
sns.boxplot(x="output",y="oldpeak",data=heart_df)

<AxesSubplot:xlabel='output', ylabel='oldpeak'>

png

# box plot for output and cholestrol level
sns.boxplot(x="output",y="age",data=heart_df)

<AxesSubplot:xlabel='output', ylabel='age'>

png

ax = sns.countplot(x='age', data=heart_df)

png

# check correlation
corr = heart_df.corr()

# Generate a mask for the upper triangle
mask = np.triu(np.ones_like(corr, dtype=bool))

# Set up the matplotlib figure
f, ax = plt.subplots(figsize=(11, 9))

# Generate a custom diverging colormap
cmap = sns.diverging_palette(230, 20, as_cmap=True)

# Draw the heatmap with the mask and correct aspect ratio
sns.heatmap(corr, mask=mask, cmap=cmap, vmax=.3, center=0,
            square=True, linewidths=.5, cbar_kws={"shrink": .5}).set_title("Columns Correlation")

Text(0.5, 1.0, 'Columns Correlation')

png

# split data for training
y = heart_df.output.to_numpy()
X = heart_df.drop('output', axis=1).to_numpy()

# scale X values
scaler = StandardScaler()
X = scaler.fit_transform(X)

# split data while keeping output class distribution consistent
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y)

# convert data to pytorch tensors
def df_to_tensor(df):
    return torch.from_numpy(df).float()

X_traint = df_to_tensor(X_train)
y_traint = df_to_tensor(y_train)
X_testt = df_to_tensor(X_test)
y_testt = df_to_tensor(y_test)

# create pytorch dataset
train_ds = TensorDataset(X_traint, y_traint)
test_ds = TensorDataset(X_testt, y_testt)

# create data loaders
batch_size = 5
train_dl = DataLoader(train_ds, batch_size, shuffle=True)
test_dl = DataLoader(test_ds, batch_size, shuffle=False)

# model architecture
class BinaryNetwork(nn.Module):
    def __init__(self, input_size, output_size):
        super().__init__()
        self.l1 = nn.Linear(input_size, 64)
        self.l2 = nn.Linear(64, 32)
        self.l3 = nn.Linear(32, 16)
        self.out = nn.Linear(16, output_size)

    def forward(self, x):
        x = self.l1(x)
        x = F.relu(x)
        x = self.l2(x)
        x = F.relu(x)
        x = self.l3(x)
        x = F.relu(x)
        x = self.out(x)
        return torch.sigmoid(x) # scaling values between 0 and 1

input_size = 13 # number of features
output_size = 1
model = BinaryNetwork(input_size, output_size)
loss_fn = nn.BCELoss() # Binary Cross Entropy
optim = torch.optim.Adam(model.parameters(), lr=1e-3)
model

BinaryNetwork(
  (l1): Linear(in_features=13, out_features=64, bias=True)
  (l2): Linear(in_features=64, out_features=32, bias=True)
  (l3): Linear(in_features=32, out_features=16, bias=True)
  (out): Linear(in_features=16, out_features=1, bias=True)
)

epochs = 100
losses = []
for i in range(epochs):
    epoch_loss = 0
    for feat, target in train_dl:
        optim.zero_grad()
        out = model(feat)
        loss = loss_fn(out, target.unsqueeze(1))
        epoch_loss += loss.item()
        loss.backward()
        optim.step()
    losses.append(epoch_loss)
    # print loss every 10
    if i % 10 == 0:
        print(f"Epoch: {i}/{epochs}, Loss = {loss:.5f}")

Epoch: 0/100, Loss = 0.79641
Epoch: 10/100, Loss = 0.03637
Epoch: 20/100, Loss = 0.07704
Epoch: 30/100, Loss = 0.02023
Epoch: 40/100, Loss = 0.00084
Epoch: 50/100, Loss = 0.00000
Epoch: 60/100, Loss = 0.00001
Epoch: 70/100, Loss = 0.00000
Epoch: 80/100, Loss = 0.00018
Epoch: 90/100, Loss = 0.00029

# plot losses
graph = sns.lineplot(x=[x for x in range(0, epochs)], y=losses)
graph.set(title="Loss change during training", xlabel='epochs', ylabel='loss')
plt.show()

png

# evaluate the model
y_pred_list = []
model.eval()
with torch.no_grad():
    for X, y in test_dl:
        y_test_pred = model(X)
        y_pred_tag = torch.round(y_test_pred)
        y_pred_list.append(y_pred_tag)

# convert predictions to a list of tensors with 1 dimention
y_pred_list = [a.squeeze() for a in y_pred_list]

# check confusion matrix (hstack will merge all tensor lists into one list)
cfm = confusion_matrix(y_test, torch.hstack(y_pred_list))
sns.heatmap(cfm / np.sum(cfm), annot=True, fmt='.2%')

<AxesSubplot:>

png

# print metrics
print(classification_report(y_test, torch.hstack(y_pred_list)))

              precision    recall  f1-score   support

           0       0.91      0.75      0.82        28
           1       0.82      0.94      0.87        33

    accuracy                           0.85        61
   macro avg       0.86      0.84      0.85        61
weighted avg       0.86      0.85      0.85        61