PyTorch是目前最流行的深度学习框架,以动态图和Pythonic的API著称。本文从Tensor操作到训练一个MNIST手写数字识别模型,覆盖PyTorch的核心概念。
1. Tensor基本操作
Tensor是PyTorch的核心数据结构,类似NumPy的ndarray,但支持GPU加速和自动求导。
import torch
# 创建Tensor
a = torch.tensor([1.0, 2.0, 3.0])
b = torch.zeros(3, 4)
c = torch.randn(3, 4) # 标准正态分布
d = torch.arange(0, 10, 2) # [0, 2, 4, 6, 8]
# 形状操作
x = torch.randn(2, 3, 4)
print(x.shape) # torch.Size([2, 3, 4])
y = x.reshape(6, 4) # 改变形状
z = x.permute(2, 0, 1) # 交换维度 -> [4, 2, 3]
# 数学运算
a = torch.tensor([1.0, 2.0, 3.0])
b = torch.tensor([4.0, 5.0, 6.0])
print(a + b) # tensor([5., 7., 9.])
print(torch.dot(a, b)) # tensor(32.) 点积
print(a @ b) # 同上
# 矩阵乘法
m1 = torch.randn(3, 4)
m2 = torch.randn(4, 5)
result = m1 @ m2 # [3, 5]
# GPU加速
if torch.cuda.is_available():
x_gpu = x.to("cuda")
print(x_gpu.device) # cuda:0
2. 自动求导(autograd)
PyTorch的自动求导机制是训练神经网络的基础:
# 创建需要梯度的Tensor
x = torch.tensor(3.0, requires_grad=True)
y = x ** 2 + 2 * x + 1 # y = x^2 + 2x + 1
# 反向传播计算梯度
y.backward()
print(x.grad) # tensor(8.) 即 dy/dx = 2x + 2 = 2*3 + 2 = 8
更复杂的例子:
x = torch.randn(3, requires_grad=True)
y = (x * 2).sum()
y.backward()
print(x.grad) # tensor([2., 2., 2.])
# 注意:grad会累积,需要手动清零
x.grad.zero_()
requires_grad=True让PyTorch记录所有对该Tensor的操作,构建计算图。调用.backward()时沿计算图反向传播,计算每个叶子节点的梯度。
3. 构建神经网络(nn.Module)
import torch.nn as nn
import torch.nn.functional as F
class SimpleNet(nn.Module):
def __init__(self, input_size, hidden_size, num_classes):
super().__init__()
self.fc1 = nn.Linear(input_size, hidden_size)
self.fc2 = nn.Linear(hidden_size, hidden_size)
self.fc3 = nn.Linear(hidden_size, num_classes)
self.dropout = nn.Dropout(0.2)
def forward(self, x):
x = F.relu(self.fc1(x))
x = self.dropout(x)
x = F.relu(self.fc2(x))
x = self.dropout(x)
x = self.fc3(x) # 最后一层不加激活(交给损失函数处理)
return x
model = SimpleNet(784, 256, 10)
print(model)
# 查看参数量
total_params = sum(p.numel() for p in model.parameters())
print(f"Total parameters: {total_params:,}")
nn.Module是所有网络的基类。只需定义__init__(声明层)和forward(定义前向传播)。反向传播由autograd自动处理。
4. 损失函数与优化器
# 分类任务常用交叉熵损失
criterion = nn.CrossEntropyLoss()
# SGD优化器
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
# 或者Adam
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
损失函数计算预测值和真实值的差距,优化器根据梯度更新参数。
5. 训练循环
完整的训练流程:
def train_one_epoch(model, dataloader, criterion, optimizer, device):
model.train() # 开启训练模式(启用Dropout等)
total_loss = 0
correct = 0
total = 0
for batch_idx, (data, target) in enumerate(dataloader):
data, target = data.to(device), target.to(device)
# 前向传播
output = model(data)
loss = criterion(output, target)
# 反向传播
optimizer.zero_grad() # 清零梯度
loss.backward() # 计算梯度
optimizer.step() # 更新参数
# 统计
total_loss += loss.item()
pred = output.argmax(dim=1)
correct += pred.eq(target).sum().item()
total += target.size(0)
avg_loss = total_loss / len(dataloader)
accuracy = correct / total
return avg_loss, accuracy
def evaluate(model, dataloader, criterion, device):
model.eval() # 关闭Dropout
total_loss = 0
correct = 0
total = 0
with torch.no_grad(): # 不记录梯度,节省内存
for data, target in dataloader:
data, target = data.to(device), target.to(device)
output = model(data)
loss = criterion(output, target)
total_loss += loss.item()
pred = output.argmax(dim=1)
correct += pred.eq(target).sum().item()
total += target.size(0)
avg_loss = total_loss / len(dataloader)
accuracy = correct / total
return avg_loss, accuracy
训练三步走:optimizer.zero_grad() -> loss.backward() -> optimizer.step(),这是PyTorch训练的固定模式。
6. MNIST手写数字识别
把上面的组件组装成完整的MNIST项目:
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
# 数据预处理
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,)) # MNIST的均值和标准差
])
# 加载数据集
train_dataset = datasets.MNIST('./data', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST('./data', train=False, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=1000)
# 初始化
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = SimpleNet(784, 256, 10).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
# 训练(注意:MNIST输入需要展平为784维向量)
class MNISTNet(nn.Module):
def __init__(self):
super().__init__()
self.flatten = nn.Flatten()
self.fc1 = nn.Linear(28 * 28, 256)
self.fc2 = nn.Linear(256, 128)
self.fc3 = nn.Linear(128, 10)
self.dropout = nn.Dropout(0.2)
def forward(self, x):
x = self.flatten(x)
x = F.relu(self.fc1(x))
x = self.dropout(x)
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
model = MNISTNet().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
# 训练循环
for epoch in range(1, 11):
train_loss, train_acc = train_one_epoch(model, train_loader, criterion, optimizer, device)
test_loss, test_acc = evaluate(model, test_loader, criterion, device)
print(f"Epoch {epoch}: "
f"Train Loss={train_loss:.4f} Acc={train_acc:.4f} | "
f"Test Loss={test_loss:.4f} Acc={test_acc:.4f}")
10个epoch后测试集准确率通常能达到97%以上。这只是全连接网络,换成CNN(卷积神经网络)可以轻松突破99%。
小结
PyTorch的核心概念链:Tensor -> autograd -> nn.Module -> 损失函数+优化器 -> 训练循环。掌握这条链路就能搭建和训练任意模型。后续可以深入CNN、RNN、Transformer等网络架构。