#python #pandas #deep-learning #iterator #pytorch
#python #pandas #глубокое обучение #итератор #pytorch
Вопрос:
Я пытаюсь изучить PyTorch и создать свою первую нейронную сеть. Я использую пользовательский набор данных, вот образец данных:
ID_REF cg00001854 cg00270460 cg00293191 cg00585219 cg00702638 cg01434611 cg02370734 cg02644867 cg02879967 cg03036557 cg03123104 cg03670302 cg04146801 cg04570540 cg04880546 cg07044749 cg07135408 cg07303143 cg07475178 cg07553761 cg07917901 cg08016257 cg08548498 cg08715791 cg09334636 cg11153071 cg11441796 cg11642652 cg12256803 cg12352902 cg12541127 cg13313833 cg13500819 cg13975075 cg14061946 cg14086922 cg14224196 cg14530143 cg15456742 cg16230982 cg16734549 cg17166941 cg17290213 cg17292667 cg18266594 cg18335535 cg18584803 cg19273773 cg19378199 cg19523692 cg20115827 cg20558024 cg20608895 cg20899581 cg21186299 cg22115892 cg22454769 cg22549547 cg23098693 cg23193759 cg23500537 cg23606718 cg24079702 cg24888989 cg25090514 cg25344401 cg25635000 cg25726357 cg25743481 cg26019498 cg26647566 cg26792755 cg26928195 cg26940620 Age
0 0.252486 0.284724 0.243242 0.200685 0.904132 0.102795 0.473919 0.264084 0.367480 0.671434 0.075955 0.329343 0.217375 0.210861 1.000000 0.356048 0.577945 0.557148 0.249014 0.847134 0.254539 0.319858 0.220589 0.796789 0.361994 0.296101 0.105965 0.239796 0.169738 0.357586 0.365674 0.132575 0.250932 0.283227 1.000000 0.262259 0.208146 0.290623 0.113049 0.255710 0.555382 0.281046 0.168826 0.492007 0.442871 0.509569 0.219183 0.641244 0.339088 0.164062 0.227678 0.340220 0.541491 0.423010 0.621303 0.243750 0.869947 0.124120 0.317660 0.985243 0.645869 0.590888 0.841485 0.825372 0.904037 0.407343 0.223722 0.352113 0.855653 0.289593 0.428849 0.719758 0.800240 0.473586 68
1 0.867671 0.606590 0.803673 0.845942 0.086222 0.996915 0.871998 0.791823 0.877639 0.095326 0.857108 0.959701 0.688322 0.650640 0.062329 0.920434 0.687537 0.193038 0.891809 0.273775 0.583457 0.793486 0.798427 0.102910 0.773496 0.658568 0.759050 0.754877 0.787817 0.585895 0.792240 0.734543 0.854528 0.735642 0.389495 0.736709 0.600386 0.775989 0.819579 0.696350 0.110374 0.878199 0.659849 0.716714 0.771206 0.870711 0.919629 0.359592 0.677752 0.693433 0.683448 0.792423 0.933971 0.170669 0.249908 0.879879 0.111498 0.623053 0.626821 0.000000 0.157429 0.197567 0.160809 0.183031 0.202754 0.597896 0.826429 0.886736 0.086038 0.844088 0.761793 0.056548 0.270670 0.940083 21
2 0.789439 0.594060 0.857086 0.633195 0.000000 0.953293 0.832107 0.692119 0.641294 0.169303 0.935807 0.674698 0.789146 0.796555 0.208590 0.791318 0.777537 0.221895 0.804405 0.138006 0.738616 0.758083 0.749127 0.180998 0.769312 0.592938 0.578885 0.896125 0.553588 0.781393 0.898768 0.705339 0.861029 0.966552 0.274496 0.575738 0.490313 0.951172 0.833724 0.901890 0.115235 0.651489 0.619196 0.760758 0.902768 0.835082 0.610065 0.294962 0.907979 0.703284 0.775867 0.910324 0.858090 0.190595 0.041909 0.792941 0.146005 0.615639 0.761822 0.254161 0.101765 0.343289 0.356166 0.088915 0.114347 0.628616 0.697758 0.910687 0.133282 0.775737 0.809420 0.129848 0.126485 0.875580 20
3 0.615803 0.710968 0.874037 0.771136 0.199428 0.861378 0.861346 0.695713 0.638599 0.158479 0.903668 0.758718 0.581146 0.857357 0.307756 0.977337 0.805049 0.188333 0.788991 0.312119 0.706578 0.782006 0.793232 0.288111 0.691131 0.758102 0.829221 1.000000 0.742666 0.897607 0.797869 0.803221 0.912101 0.736800 0.315636 0.760577 0.609101 0.733923 0.578598 0.796944 0.096960 0.924135 0.612601 0.727117 0.905177 0.776481 0.727865 0.429820 0.666803 0.924595 0.567474 0.752196 0.742709 0.303662 0.168286 0.720899 0.099313 0.595328 0.734024 0.268583 0.293437 0.244840 0.311726 0.213415 0.418673 0.819981 0.816660 0.684730 0.146797 0.686270 0.777680 0.087826 0.335125 1.000000 23
4 0.847329 0.735766 0.858018 0.896453 0.186994 0.831964 0.762522 0.840186 0.830930 0.199264 0.788487 0.912629 0.702284 0.838771 0.065271 0.959230 0.912387 0.377203 0.794480 0.207909 0.766246 0.582117 0.902944 0.301144 0.765401 0.715115 0.646735 0.812084 0.697886 0.714310 0.890658 0.826644 0.944022 0.729517 0.530379 0.756268 0.764899 0.914573 0.825766 0.673394 0.017316 0.949335 0.614375 0.650553 0.898788 0.685396 0.823348 0.210175 0.831852 0.829067 0.858212 0.916433 0.778864 0.241186 0.144072 0.889536 0.058360 0.703567 0.852496 0.094223 0.341236 0.284903 0.231957 0.125196 0.333207 0.752592 0.899356 0.839006 0.174601 0.937948 0.716135 0.000000 0.114062 0.969760 22
Я разделяю данные на данные train / test / val следующим образом:
train_df, rest_df = train_test_split(df, test_size=0.4)
test_df, val_df = train_test_split(rest_df, test_size=0.5)
x_train_tensor = torch.tensor(train_df.drop('Age', axis=1).to_numpy(), requires_grad=True)
y_train_tensor = torch.tensor(train_df['Age'].to_numpy())
x_test_tensor = torch.tensor(test_df.drop('Age', axis=1).to_numpy(), requires_grad=True)
y_test_tensor = torch.tensor(test_df['Age'].to_numpy())
x_val_tensor = torch.tensor(val_df.drop('Age', axis=1).to_numpy(), requires_grad=True)
y_val_tensor = torch.tensor(val_df['Age'].to_numpy())
bs = len(train_df.index)//10
train_dl = DataLoader(train_df, bs, shuffle=True)
test_dl = DataLoader(test_df, len(test_df), shuffle=False)
val_dl = DataLoader(val_df, bs, shuffle=False)
И вот сеть до сих пор (очень простая, просто чтобы проверить, работает ли она):
class Net(nn.Module):
def __init__(self):
super().__init__()
input_size = len(df.columns)-1
self.fc1 = nn.Linear(input_size, input_size//2)
self.fc2 = nn.Linear(input_size//2, input_size//4)
self.fc3 = nn.Linear(input_size//4, 1)
def forward(self, x):
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = F.relu(self.fc3(x))
return x
net = Net()
print(net)
Вот где я получаю ошибку в последней строке:
loss = torch.nn.MSELoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)
EPOCHS = 3
STEPS_PER_EPOCH = len(train_dl.dataset)//bs
iterator = iter(train_dl)
print(train_dl.dataset)
for epoch in range(EPOCHS):
for s in range(STEPS_PER_EPOCH):
print(iterator)
iterator.next()
ID_REF cg00001854 cg00270460 cg00293191 ... cg26928195 cg26940620 Age
29 0.781979 0.744825 0.744579 ... 0.242138 0.854054 19
44 0.185400 0.299145 0.160084 ... 0.638449 0.413286 69
21 0.085470 0.217421 0.277675 ... 0.863455 0.512334 75
4 0.847329 0.735766 0.858018 ... 0.114062 0.969760 22
20 0.457293 0.462984 0.323835 ... 0.584259 0.481060 68
33 0.784562 0.845031 0.958335 ... 0.122210 0.854005 19
25 0.258434 0.354822 0.405620 ... 0.677245 0.540463 70
27 0.737131 0.768188 0.897724 ... 0.203228 0.831175 20
37 0.002051 0.202403 0.134198 ... 0.753844 0.302229 70
10 0.737427 0.537413 0.614343 ... 0.464244 0.723953 23
0 0.252486 0.284724 0.243242 ... 0.800240 0.473586 68
32 0.927260 1.000000 0.853864 ... 0.261990 0.892503 18
7 0.035825 0.271602 0.236109 ... 1.000000 0.471256 69
17 0.000000 0.202986 0.132144 ... 0.874550 0.342981 79
18 0.282112 0.479775 0.218852 ... 0.908217 0.426143 79
11 0.708797 0.536074 0.721171 ... 0.048768 0.699540 27
15 0.686921 0.639198 0.858981 ... 0.305142 0.978350 24
38 0.246031 0.186011 0.235928 ... 0.754013 0.342380 70
30 0.814767 0.771483 0.437789 ... 0.000000 0.658354 18
43 0.247471 0.399231 0.271619 ... 0.895016 0.468336 72
46 0.000428 0.263164 0.163303 ... 0.567005 0.252806 76
3 0.615803 0.710968 0.874037 ... 0.335125 1.000000 23
5 0.777925 0.821814 0.636676 ... 0.233359 0.753266 20
34 0.316262 0.307535 0.203090 ... 0.570755 0.351226 73
23 0.133038 0.000000 0.208442 ... 0.631202 0.459593 76
6 0.746102 0.585211 0.626580 ... 0.311914 0.753994 25
1 0.867671 0.606590 0.803673 ... 0.270670 0.940083 21
47 0.444606 0.502357 0.207560 ... 0.987106 0.446959 71
[28 rows x 75 columns]
<torch.utils.data.dataloader._SingleProcessDataLoaderIter object at 0x7f166241c048>
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
/usr/local/lib/python3.6/dist-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
2645 try:
-> 2646 return self._engine.get_loc(key)
2647 except KeyError:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 13
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
6 frames
/usr/local/lib/python3.6/dist-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
2646 return self._engine.get_loc(key)
2647 except KeyError:
-> 2648 return self._engine.get_loc(self._maybe_cast_indexer(key))
2649 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
2650 if indexer.ndim > 1 or indexer.size > 1:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 13
Я действительно понятия не имею, что означает ошибка или где искать.
Я был бы очень признателен за некоторые рекомендации, спасибо!
Ответ №1:
Используйте Numpy
массив вместо dataframe
. Вы можете использовать to_numpy()
для преобразования фрейма данных в массив numpy.
train_dl = DataLoader(train_df.to_numpy(), bs, shuffle=True)
test_dl = DataLoader(test_df.to_numpy(), len(test_df), shuffle=False)
val_dl = DataLoader(val_df.to_numpy(), bs, shuffle=False)