Я пытаюсь использовать VGG16 с обучением передаче, но получаю ошибки:
model = torchvision.models.vgg16(pretrained=True) print(model) for param in model.parameters(): param.requires_grad = False input_size = model.classifier[0].in_features model.classifier[0] = nn.Sequential( nn.Linear(input_size, 128), nn.ReLU(), nn.Linear(128, 2)) torchinfo.summary(model, (64, 3, 224, 224))
VGG( (features): Sequential( (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (1): ReLU(inplace=True) (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (3): ReLU(inplace=True) (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (6): ReLU(inplace=True) (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (8): ReLU(inplace=True) (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (11): ReLU(inplace=True) (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (13): ReLU(inplace=True) (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (15): ReLU(inplace=True) (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) (17): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (18): ReLU(inplace=True) (19): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (20): ReLU(inplace=True) (21): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (22): ReLU(inplace=True) (23): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) (24): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (25): ReLU(inplace=True) (26): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (27): ReLU(inplace=True) (28): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (29): ReLU(inplace=True) (30): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) ) (avgpool): AdaptiveAvgPool2d(output_size=(7, 7)) (classifier): Sequential( (0): Linear(in_features=25088, out_features=4096, bias=True) (1): ReLU(inplace=True) (2): Dropout(p=0.5, inplace=False) (3): Linear(in_features=4096, out_features=4096, bias=True) (4): ReLU(inplace=True) (5): Dropout(p=0.5, inplace=False) (6): Linear(in_features=4096, out_features=1000, bias=True) ) )
--------------------------------------------------------------------------- RuntimeError Traceback (most recent call last) ~/.local/lib/python3.8/site-packages/torchinfo/torchinfo.py in forward_pass(model, x, batch_dim, cache_forward_pass, device, **kwargs) 260 if isinstance(x, (list, tuple)): --gt; 261 _ = model.to(device)(*x, **kwargs) 262 elif isinstance(x, dict): ~/.local/lib/python3.8/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs) 1101 or _global_forward_hooks or _global_forward_pre_hooks): -gt; 1102 return forward_call(*input, **kwargs) 1103 # Do not call functions when jit is used ~/.local/lib/python3.8/site-packages/torchvision/models/vgg.py in forward(self, x) 51 x = torch.flatten(x, 1) ---gt; 52 x = self.classifier(x) 53 return x ~/.local/lib/python3.8/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs) 1119 -gt; 1120 result = forward_call(*input, **kwargs) 1121 if _global_forward_hooks or self._forward_hooks: ~/.local/lib/python3.8/site-packages/torch/nn/modules/container.py in forward(self, input) 140 for module in self: --gt; 141 input = module(input) 142 return input ~/.local/lib/python3.8/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs) 1119 -gt; 1120 result = forward_call(*input, **kwargs) 1121 if _global_forward_hooks or self._forward_hooks: ~/.local/lib/python3.8/site-packages/torch/nn/modules/linear.py in forward(self, input) 102 def forward(self, input: Tensor) -gt; Tensor: --gt; 103 return F.linear(input, self.weight, self.bias) 104 ~/.local/lib/python3.8/site-packages/torch/nn/functional.py in linear(input, weight, bias) 1847 return handle_torch_function(linear, (input, weight, bias), input, weight, bias=bias) -gt; 1848 return torch._C._nn.linear(input, weight, bias) 1849 RuntimeError: mat1 and mat2 shapes cannot be multiplied (64x2 and 4096x4096) The above exception was the direct cause of the following exception: RuntimeError Traceback (most recent call last) /tmp/ipykernel_8204/406510959.py in lt;modulegt; 11 nn.Linear(128, 2)) 12 ---gt; 13 torchinfo.summary(model, (64, 3, 224, 224)) ~/.local/lib/python3.8/site-packages/torchinfo/torchinfo.py in summary(model, input_size, input_data, batch_dim, cache_forward_pass, col_names, col_width, depth, device, dtypes, row_settings, verbose, **kwargs) 192 input_data, input_size, batch_dim, device, dtypes 193 ) --gt; 194 summary_list = forward_pass( 195 model, x, batch_dim, cache_forward_pass, device, **kwargs 196 ) ~/.local/lib/python3.8/site-packages/torchinfo/torchinfo.py in forward_pass(model, x, batch_dim, cache_forward_pass, device, **kwargs) 268 except Exception as e: 269 executed_layers = [layer for layer in summary_list if layer.executed] --gt; 270 raise RuntimeError( 271 "Failed to run torchinfo. See above stack traces for more details. " 272 f"Executed layers up to: {executed_layers}" RuntimeError: Failed to run torchinfo. See above stack traces for more details. Executed layers up to: [Sequential: 1, Conv2d: 2, ReLU: 2, Conv2d: 2, ReLU: 2, MaxPool2d: 2, Conv2d: 2, ReLU: 2, Conv2d: 2, ReLU: 2, MaxPool2d: 2, Conv2d: 2, ReLU: 2, Conv2d: 2, ReLU: 2, Conv2d: 2, ReLU: 2, MaxPool2d: 2, Conv2d: 2, ReLU: 2, Conv2d: 2, ReLU: 2, Conv2d: 2, ReLU: 2, MaxPool2d: 2, Conv2d: 2, ReLU: 2, Conv2d: 2, ReLU: 2, Conv2d: 2, ReLU: 2, MaxPool2d: 2, AdaptiveAvgPool2d: 1, Sequential: 2, Linear: 3, ReLU: 3, Linear: 3, ReLU: 2, Dropout: 2]
Я использую следующие версии пакетов pytorch:
torch==1.10.0 torchinfo==1.5.3 torchvision==0.11.1
- Что случилось ?
- Что мне нужно изменить, чтобы использовать VGG16 (с обучением по переводу) ?
1. Я вижу
RuntimeError: mat1 and mat2 shapes cannot be multiplied (64x2 and 4096x4096)
в журналах, что вы можете не соответствовать измерению при изменении модели.классификатор[0]
Ответ №1:
В случае, если вы пытаетесь изменить окончательный классификатор, вы должны изменить весь, а не только один слой:
model.classifier = nn.Sequential( nn.Linear(input_size, 128), nn.ReLU(), nn.Linear(128, 2))