HW3 Use Pytorch to build NN

文章目录
  1. 1. Some Utils Functions Definition
  2. 2. Train Pytorch NN on Five Classifcation Data Sets
    1. 2.1. Testing part
    2. 2.2. Training on five binary classification data sets
  3. 3. Train Pytorch NN for Multi-class Data Sets

The detailed description of task and the dataset, including the report and source code please see in Github repository

Some Utils Functions Definition

1
2
3
4
5
6
7
# import useful packages
import time
import torch
import numpy as np
import matplotlib.pyplot as plt
from sklearn import metrics
from scipy.io import loadmat
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
def generate_k_folders(dataset, k):
"""
Generate K-folders

Input: dataset and k
Output: a list contains k dictionary, each dictionary contains training set, validation set and testing set
"""

x = dataset["x_train"]
y = dataset["y_train"]
x_test = dataset["x_test"]
y_test = dataset["y_test"]

k_folders = []

for i in range(k):
if i < (k-1):
a = i*int(x.shape[0]/k)
b = (i+1)*int(x.shape[0]/k)
k_folders.append({
"x_train": torch.cat((x[:a], x[b:]), dim=0),
"y_train": torch.cat((y[:a], y[b:])),
"x_val": x[a:b],
"y_val": y[a:b],
"x_test": x_test,
"y_test": y_test
})
else:
a = i*int(x.shape[0]/k)
k_folders.append({
"x_train": x[:a],
"y_train": y[:a],
"x_val": x[a:],
"y_val": y[a:],
"x_test": x_test,
"y_test": y_test
})

return k_folders
1
2
3
4
5
6
7
8
9
10
11
12
def unzip_dataset(dataset):
"""
upzip dataset
"""
x_train = dataset["x_train"]
y_train = dataset["y_train"]
x_val = dataset["x_val"]
y_val = dataset["y_val"]
x_test = dataset["x_test"]
y_test = dataset["y_test"]

return x_train, y_train, x_val, y_val, x_test, y_test
1
2
3
4
5
6
7
8
9
10
11
12
13
14
def confusion_mat_evaluate(y_test, y_pred):
"""
Evaluate the model performance by confusion matrix

Input: y_predict and the truth label y_test
Output: accuracy, precision, recall, f1
"""

accuracy = metrics.accuracy_score(y_test, y_pred)
precision = metrics.precision_score(y_test, y_pred, average="macro")
recall = metrics.recall_score(y_test, y_pred, average="macro")
f1 = metrics.f1_score(y_test, y_pred, average="macro")

return accuracy, precision, recall, f1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
def read_bi_data(dataset):
"""
Read binary-class dataset

Input: (numpy.array) dataset
Output: a list consists of x_train, y_train, x_test and y_test
"""
x_train = torch.from_numpy(dataset['train_X']).type(torch.FloatTensor).cuda()
y_train = torch.from_numpy(dataset['train_Y']).type(torch.FloatTensor).cuda()
x_test = torch.from_numpy(dataset['test_X']).type(torch.FloatTensor).cuda()
y_test = torch.from_numpy(dataset['test_Y']).type(torch.FloatTensor).cuda()

dataset = {
'x_train' : x_train,
'y_train' : y_train,
'x_test' : x_test,
'y_test' : y_test
}

return dataset
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
def read_multi_data():
"""
Read multi-class dataset

Output: a list consists of x_train, y_train, x_test and y_test
"""
x_train = torch.from_numpy(loadmat("datasets/multi-class/train_images.mat")["train_images"]).type(torch.FloatTensor).cuda()
y_train = torch.from_numpy(loadmat("datasets/multi-class/train_labels.mat")["train_labels"]).type(torch.LongTensor).cuda()
y_train = y_train.t().squeeze(dim=-1)


x_test = torch.from_numpy(loadmat("datasets/multi-class/test_images.mat")["test_images"]).type(torch.FloatTensor).cuda()
y_test = torch.from_numpy(loadmat("datasets/multi-class/test_labels.mat")["test_labels"]).type(torch.LongTensor).cuda()
y_test = y_test.t().squeeze(dim=-1)

dataset = {
'x_train' : x_train,
'y_train' : y_train,
'x_test' : x_test,
'y_test' : y_test
}

return dataset

Train Pytorch NN on Five Classifcation Data Sets

1
2
3
4
5
6
# Load data
breast_cancer_data = np.load("datasets/bi-class/breast-cancer.npz")
diabetes_data = np.load("datasets/bi-class/diabetes.npz")
digit_data = np.load("datasets/bi-class/digit.npz")
iris_data = np.load("datasets/bi-class/iris.npz")
wine_data = np.load("datasets/bi-class/wine.npz")
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
def train_bi_nn_model(dataset, H_list, device, learning_rate=1e-2, iteration=5000):

x_train, y_train, x_val, y_val, x_test, y_test = unzip_dataset(dataset)

N = x_train.shape[0]
D_in = x_train.shape[1]
D_out = 1

# Binary Cross Entropy Loss
loss_fn = torch.nn.BCELoss()

# The result table
# Each row preserves the related result of corresponding H
# 8 means we have 8 items to save => [H, best_accuracy_val, accuracy_test, auc, precision, recall, f1, training_time]
res_table = np.zeros((len(H_list),8))
res_table_ind = 0

for H in H_list:

model = torch.nn.Sequential(
torch.nn.Linear(D_in, H),
torch.nn.ReLU(),
torch.nn.Linear(H, D_out),
torch.nn.Sigmoid()
).to(device)

loss_history = []
accuracy_val = []
correct = 0.0
best_accuracy_val = 0.0
best_iteration = 0.0
best_model = None

# SGD optimizer
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

t0 = time.time()
for t in range(iteration):
y_train_pred = model(x_train).squeeze(dim=-1) # squeeze 2D of shape(x,1) to 1D of shape(x,)
loss = loss_fn(y_train_pred, y_train)
loss_history.append(loss.item())

# Transfer the prediction result from probability to [0,1] label
y_val_pred = model(x_val).squeeze(dim=-1)
y_val_pred[y_val_pred >= 0.5] = 1.0
y_val_pred[y_val_pred < 0.5] = 0.0

# Calculate accuracy on the validation set
correct = (y_val_pred == y_val).sum().item()
accuracy = correct / y_val.shape[0]
accuracy_val.append(accuracy)

# Save the best model and best accuracy
if accuracy > best_accuracy_val:
best_accuracy_val = accuracy
best_iteration = t
best_model = model

if t % 1000 == 0:
print("iteration: %s/%s" % (t, iteration))

optimizer.zero_grad()

# Backward pass
loss.backward()

# Update parameters
optimizer.step()

# Compute training time
t1 = time.time()
print("training time = %s(s)" % (t1 - t0))

# Use the best model to predict on testing dataset
y_test_pred = best_model(x_test).squeeze(dim=-1)

# Detach data from the graph and transfer to numpy array
yt = y_test.cpu().detach().numpy()
yp = y_test_pred.cpu().detach().numpy()

# Compute AUC (Area Under ROC Curve)
auc = metrics.roc_auc_score(yt, yp)

# Compute accuracy, precision, recall and f1
yp[yp >= 0.5] = 1.0 # probalility >= 0.5 atached to positive label
yp[yp < 0.5] = 0.0 # probability < 0.5 atached to negative label
accuracy_test, precision, recall, f1 = confusion_mat_evaluate(yt, yp)

# Save all the results in res_table
res_table[res_table_ind] += np.array([H, best_accuracy_val, accuracy_test, auc, precision, recall, f1, t1-t0])
res_table_ind += 1

# Output the result
print("The best model: iteration = %s" % (best_iteration))
print("On validation dataset: accuracy = %s" % (best_accuracy_val))
print("On testing dataset: accuracy = %s, auc = %s, precision = %s, recall = %s, f1 = %s" \
% (accuracy_test, auc, precision, recall, f1))

# Plot the loss curve and varlidation accuracy curve
fig = plt.figure(figsize=(15,4))
ax1 = plt.subplot(1,2,1)
ax2 = plt.subplot(1,2,2)
plt.sca(ax1)
plt.title("H = %s" % H)
plt.xlabel("iteration")
plt.ylabel("loss")
plt.plot(range(iteration), loss_history)
plt.sca(ax2)
plt.title("H = %s" % H)
plt.xlabel("iteration")
plt.ylabel("accuracy_val")
plt.plot(range(iteration), accuracy_val)
plt.show()

# Return the result tables
return res_table
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
def framework_run(data, device, k, H_list, lr, iteration):
# Generate K-folders
k_folders = generate_k_folders(read_bi_data(breast_cancer_data), k)

# The result table
# Each row preserves the related result of corresponding H
# 8 means we have 8 items to save => [H, best_accuracy_val, accuracy_test, auc, precision, recall, f1, training_time]
res_table = np.zeros((len(H_list), 8))

# Traversal all folders dataset
for index in range(k):
print("K-Folder index = %s" % index)
# Sum up the result table
res_table += train_bi_nn_model(k_folders[index], H_list, device, lr, iteration)

# Return the average table
return np.round(res_table / k, decimals=3)

Testing part

If this part running successfully, it means that all functions works well.

1
device = torch.device('cuda')
1
framework_run(breast_cancer_data, device, k=5, H_list=[5,6], lr=5e-3, iteration=500)

Training on five binary classification data sets

1
2
# breast_data features = 10 => H* = 3, lr = 1e-2
framework_run(breast_cancer_data, device, k=5, H_list=[1,2,3,4,5,6,7], lr=1e-2, iteration=2500)
H_breastNN Val_Accuracy Test_Accuracy AUC Precision Recall F1 Training Time
1 0.892 0.901 0.892 0.838 0.866 0.848 9.782(s)
2 0.973 0.963 0.997 0.962 0.957 0.96 9.643(s)
3 0.973 0.969 0.997 0.967 0.966 0.966 9.4(s)
4 0.965 0.966 0.998 0.964 0.962 0.963 10.327(s)
5 0.963 0.968 0.997 0.965 0.964 0.965 11.125(s)
6 0.965 0.968 0.997 0.965 0.964 0.965 11.011(s)
7 0.973 0.968 0.998 0.965 0.964 0.965 10.401(s)
1
2
# digit_data features = 64 => H* = 10, lr = 2e-3
framework_run(digit_data, device, k=5, H_list=[5,6,7,8,9,10], lr=2e-3, iteration=2500)
H_digitNN Val_Accuracy Test_Accuracy AUC Precision Recall F1 Training Time
5 0.914 0.859 0.987 0.818 0.8 0.793 9.569(s)
6 0.947 0.931 0.998 0.951 0.903 0.92 9.327(s)
7 0.927 0.935 0.997 0.956 0.908 0.924 9.11 (s)
8 0.941 0.929 0.998 0.951 0.9 0.918 11.058(s)
9 0.919 0.922 0.997 0.948 0.89 0.907 11.388(s)
10 0.956 0.94 0.997 0.958 0.915 0.93 10.615(s)
1
2
# diabetes_data features = 8 => H* = 4, lr = 5e-2
framework_run(diabetes_data, device, k=5, H_list=[1,2,3,4,5,6,7,8], lr=5e-2, iteration=2500)
H_diabetesNN Val_Accuracy Test_Accuracy AUC Precision Recall F1 Training Time
1 0.926 0.9 0.897 0.833 0.867 0.846 9.412(s)
2 0.973 0.965 0.997 0.961 0.961 0.961 9.183(s)
3 0.967 0.968 0.997 0.964 0.966 0.965 9.584(s)
4 0.971 0.969 0.997 0.966 0.967 0.966 9.897(s)
5 0.973 0.968 0.997 0.965 0.965 0.965 9.909(s)
6 0.976 0.965 0.997 0.961 0.962 0.961 10.188(s)
7 0.971 0.966 0.997 0.963 0.963 0.963 10.127(s)
8 0.971 0.968 0.997 0.963 0.966 0.965 9.489(s)
1
2
# iris_data features = 4 => H* = 3, lr = 1e-2
framework_run(iris_data, device, k=5, H_list=[1,2,3,4], lr=1e-2, iteration=2500)
H_irisNN Val_Accuracy Test_Accuracy AUC Precision Recall F1 Training Time
1 0.886 0.903 0.891 0.837 0.87 0.85 9.465(s)
2 0.962 0.965 0.997 0.963 0.959 0.961 9.192(s)
3 0.963 0.966 0.997 0.964 0.962 0.963 9.107(s)
4 0.965 0.963 0.998 0.962 0.957 0.96 9.198(s)
1
2
# wine_data 13 features => H* = 6, lr = 1e-3 
framework_run(wine_data, device, k=5, H_list=list(range(1,11)), lr=1e-3, iteration=2500)
H_wineNN Val_Accuracy Test_Accuracy AUC Precision Recall F1 Training Time
1 0.715 0.709 0.827 0.643 0.674 0.6 9.564(s)
2 0.822 0.781 0.979 0.678 0.691 0.66 9.17(s)
3 0.758 0.744 0.869 0.662 0.638 0.601 9.814(s)
4 0.864 0.841 0.979 0.901 0.778 0.791 10.202(s)
5 0.734 0.751 0.929 0.668 0.648 0.604 9.864(s)
6 0.861 0.834 0.997 0.902 0.765 0.773 9.523(s)
7 0.824 0.807 0.964 0.787 0.728 0.723 9.795(s)
8 0.87 0.815 0.996 0.892 0.737 0.748 9.733(s)
9 0.83 0.803 0.997 0.886 0.721 0.73 9.45(s)
10 0.813 0.799 0.997 0.885 0.715 0.716 10.286(s)

Train Pytorch NN for Multi-class Data Sets

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
def train_multi_nn_model(dataset, L1_list, L2_list, device, learning_rate=1e-2, iteration=5000):

x_train, y_train, x_val, y_val, x_test, y_test = unzip_dataset(dataset)

# Cross Entropy Loss: combines nn.LogSoftmax() and nn.NLLLoss()
loss_fn = torch.nn.CrossEntropyLoss()

# The result table
# Each row preserves the related result of corresponding combination of L1 and L2
# 5 means we have 5 items to save => [L1, L2, best_accuracy_val, accuracy_test, training_time]
res_table = np.zeros((len(L1_list)*len(L2_list),5))
res_table_ind = 0

for L1 in L1_list:
for L2 in L2_list:
t0 = time.time()
model = torch.nn.Sequential(
torch.nn.Linear(784, L1), # input dimension = 784, hidden layer1 = L1
torch.nn.ReLU(),
torch.nn.Linear(L1, L2), # hidden layer2 = L2
torch.nn.ReLU(),
torch.nn.Linear(L2, 10), # output probability on 10 classes
).to(device)

loss_history = []
accuracy_val = []
correct = 0.0
best_accuracy_val = 0.0
best_model = None

# SGD optimizer
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

for t in range(iteration):
y_train_pred = model(x_train).squeeze(dim=-1) # squeeze 2D of shape(x,1) to 1D of shape(x,)
loss = loss_fn(y_train_pred, y_train)
loss_history.append(loss.item())

# Choose the max possibility index as the prediction class
y_val_pred = model(x_val)
y_val_pred_label = torch.argmax(y_val_pred, dim=1)

# Calculate accuracy on the validation set
correct = (y_val_pred_label == y_val).sum().item()
accuracy = correct / y_val.shape[0]
accuracy_val.append(accuracy)

# Save the best model and best accuracy
if accuracy > best_accuracy_val:
best_accuracy_val = accuracy
best_model = model

if t % 200 == 0:
print("iteration: %s/%s" % (t, iteration))

optimizer.zero_grad()
loss.backward()
optimizer.step()

# Compute training time
t1 = time.time()
print("training time = %s(s)" % (t1 - t0))

# Use the best model to predict on testing dataset
y_test_pred = best_model(x_test)
y_test_pred_label = torch.argmax(y_test_pred, dim=1)

# Calculate accuracy on the testing dataset
correct = (y_test_pred_label == y_test).sum().item()
accuracy_test = correct / y_val.shape[0]

# Save in result table
res_table[res_table_ind] += np.array([L1, L2, best_accuracy_val, accuracy_test, t1-t0])
res_table_ind += 1

print("best_accuracy_val = %s, accuracy_test = %s" % (best_accuracy_val, accuracy_test))


# Plot the loss curve and varlidation accuracy curve
fig = plt.figure(figsize=(15,4))
ax1 = plt.subplot(1,2,1)
ax2 = plt.subplot(1,2,2)
plt.sca(ax1)
plt.title("L1 = %s, L2 = %s" % (L1, L2))
plt.xlabel("iteration")
plt.ylabel("loss")
plt.plot(range(iteration), loss_history)
plt.sca(ax2)
plt.title("L1 = %s, L2 = %s" % (L1, L2))
plt.xlabel("iteration")
plt.ylabel("accuracy_val")
plt.plot(range(iteration), accuracy_val)
fig.tight_layout(pad=0.4, w_pad=3.0, h_pad=3.0)
plt.show()

# Return the result tables
return res_table
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
# Generate 5-folders
k_folders = generate_k_folders(read_multi_data(), 5)

# The trying parameters of L1 and L2
L1_list = [50, 75, 100]
L2_list = [10, 15, 20]

# The result table
# Each row preserves the related result of corresponding combination of L1 and L2
# 5 means we have 5 items to save => [L1, L2, best_accuracy_val, accuracy_test, training_time]
res_table = np.zeros((len(L1_list)*len(L2_list),5))

for index in range(5):
print("K-Folder index = %s" % index)
# Sum up the result table
res_table += train_multi_nn_model(k_folders[index], L1_list, L2_list, device, learning_rate=1e-4, iteration=1000)

# Compute the average table
np.round(res_table / 5, decimals=3)
L1 L2 Val Accuracy Test Accuracy Training Time
50 10 0.454 0.221 11.726(s)
50 15 0.601 0.298 11.976(s)
50 20 0.704 0.352 12.125(s)
75 10 0.413 0.208 12.466(s)
75 15 0.615 0.304 12.493(s)
75 20 0.691 0.346 12.867(s)
100 10 0.529 0.256 13.829(s)
100 15 0.585 0.282 13.956(s)
100 20 0.651 0.32 13.946(s)