1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103
| import torch import pandas as pd import matplotlib.pyplot as plt from torch.utils.data import Dataset, DataLoader
class OttoDataset(Dataset): def __init__(self, filepath): df = pd.read_csv(filepath) df.drop('id', axis=1, inplace=True) self.len = df.shape[0] self.x_data = df.loc[:, df.columns != 'target'].values self.y_data = df.target.astype('category').cat.codes.values
def __getitem__(self, index): return self.x_data[index], self.y_data[index]
def __len__(self): return self.len
train_df = OttoDataset('./dataset/otto-group-product-classification-challenge/train.csv') train_loader = DataLoader(dataset=train_df, batch_size=64, shuffle=True, num_workers=0)
class Net(torch.nn.Module): def __init__(self): super(Net, self).__init__() self.linear1 = torch.nn.Linear(93, 64) self.linear2 = torch.nn.Linear(64, 32) self.linear3 = torch.nn.Linear(32, 16) self.linear4 = torch.nn.Linear(16, 9) self.relu = torch.nn.ReLU()
def forward(self, x): x = self.relu(self.linear1(x)) x = self.relu(self.linear2(x)) x = self.relu(self.linear3(x)) return self.linear4(x)
def predict(self, x): with torch.no_grad(): x = self.relu(self.linear1(x)) x = self.relu(self.linear2(x)) x = self.relu(self.linear3(x)) x = self.relu(self.linear4(x)) predicted = torch.max(x, dim=1)[1] y = pd.get_dummies(predicted) return y
model = Net() device = torch.device("cuda" if torch.cuda.is_available() else "cpu") cpu_device = torch.device("cpu") model.to(device) criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.5)
epoch_list = [] loss_list = []
def train(epoch): print("epoch:", epoch + 1, end=" ") running_loss = 0 for batch_idx, (inputs, target) in enumerate(train_loader): inputs, target = inputs.to(device), target.to(device) inputs = inputs.float() optimizer.zero_grad() outputs = model(inputs) loss = criterion(outputs, target.long()) loss.backward() optimizer.step()
running_loss += loss.item() print(running_loss) epoch_list.append(epoch) loss_list.append(running_loss)
if __name__ == "__main__": for epoch in range(100): train(epoch) plt.plot(epoch_list, loss_list) plt.ylabel('loss') plt.xlabel('epoch') plt.show()
model.to(cpu_device) test_df = pd.read_csv('./dataset/otto-group-product-classification-challenge/test.csv') inputs = torch.tensor(test_df.loc[:, test_df.columns != 'id'].values).float() outputs = model.predict(inputs) labels = ['Class_1', 'Class_2', 'Class_3', 'Class_4', 'Class_5', 'Class_6', 'Class_7', 'Class_8', 'Class_9'] outputs.columns = labels outputs.insert(0, 'id', test_df['id']) outputs = pd.DataFrame(outputs) outputs.to_csv('./otto_dataset/my_predict' + str(min(loss_list)) + '.csv', index=False)
|