1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129
| import torch import numpy as np import pandas as pd import matplotlib.pyplot as plt from torch.utils.data import Dataset from torch.utils.data import DataLoader
class TitanicDataset(Dataset): def __init__(self, filepath): features = ["Pclass", "Sex", "Age", "SibSp", "Parch", "Fare", "Cabin", "Embarked"] df = pd.read_csv(filepath) df.loc[df["Sex"] == "male", "Sex"] = 0 df.loc[df["Sex"] == "female", "Sex"] = 1 df["Age"] = df["Age"].fillna(df["Age"].median()) df.loc[df.Cabin.notnull(), 'Cabin'] = 1 df.loc[df.Cabin.isnull(), 'Cabin'] = 0 df["Embarked"] = df["Embarked"].fillna("S") df.loc[df["Embarked"] == "S", "Embarked"] = 0 df.loc[df["Embarked"] == "C", "Embarked"] = 1 df.loc[df["Embarked"] == "Q", "Embarked"] = 2 self.len = df.shape[0] self.x_data = torch.from_numpy(np.array(pd.get_dummies(df[features]))) self.y_data = torch.from_numpy(np.array(df["Survived"]))
def __getitem__(self, index): return self.x_data[index], self.y_data[index]
def __len__(self): return self.len
train_df = TitanicDataset('./dataset/train.csv') train_loader = DataLoader(dataset=train_df, batch_size=32, shuffle=True, num_workers=0)
class TitanicModel(torch.nn.Module): def __init__(self): super(TitanicModel, self).__init__() self.linear1 = torch.nn.Linear(12, 9) self.linear2 = torch.nn.Linear(9, 6) self.linear3 = torch.nn.Linear(6, 3) self.linear4 = torch.nn.Linear(3, 1) self.relu = torch.nn.ReLU() self.sigmoid = torch.nn.Sigmoid()
def forward(self, x): x = self.relu(self.linear1(x)) x = self.relu(self.linear2(x)) x = self.relu(self.linear3(x)) x = self.sigmoid(self.linear4(x)) return x
def predict(self, x): with torch.no_grad(): x = self.relu(self.linear1(x)) x = self.relu(self.linear2(x)) x = self.relu(self.linear3(x)) x = self.sigmoid(self.linear4(x)) y = [] for i in x: print(i) if i >= 0.5: y.append(1) else: y.append(0) return y
model = TitanicModel() criterion = torch.nn.BCELoss() optimizer = torch.optim.SGD(model.parameters(), lr=0.005)
if __name__ == "__main__": epoch_list = [] loss_list = [] for epoch in range(1000): loss_sum = 0 for idx, (inputs, labels) in enumerate(train_loader): inputs = inputs.float() labels = labels.float() y_pred = model(inputs) y_pred = y_pred.squeeze(-1) loss = criterion(y_pred, labels) loss_sum += loss.item() print(epoch, idx, loss.item()) optimizer.zero_grad() loss.backward() optimizer.step() epoch_list.append(epoch) loss_list.append(loss_sum) plt.plot(epoch_list, loss_list) plt.ylabel('loss') plt.xlabel('epoch') plt.show()
min_loss = min(loss_list) test_df = pd.read_csv("./dataset/test.csv") features = ["Pclass", "Sex", "Age", "SibSp", "Parch", "Fare", "Cabin", "Embarked"] test_df.loc[test_df["Sex"] == "male", "Sex"] = 0 test_df.loc[test_df["Sex"] == "female", "Sex"] = 1 test_df["Age"] = test_df["Age"].fillna(test_df["Age"].median()) test_df["Fare"] = test_df["Fare"].fillna(test_df["Fare"].median()) test_df.loc[test_df.Cabin.notnull(), 'Cabin'] = 1 test_df.loc[test_df.Cabin.isnull(), 'Cabin'] = 0 test_df["Embarked"] = test_df["Embarked"].fillna("S") test_df.loc[test_df["Embarked"] == "S", "Embarked"] = 0 test_df.loc[test_df["Embarked"] == "C", "Embarked"] = 1 test_df.loc[test_df["Embarked"] == "Q", "Embarked"] = 2 test = torch.from_numpy(np.array(pd.get_dummies(test_df[features]))) y = model.predict(test.float()) output = pd.DataFrame({'PassengerId': test_df.PassengerId, 'Survived': y}) output.to_csv('./titanic_dataset/my_predict' + str(min_loss) + '.csv', index=False)
|