PyTorch is the preferred framework for AI research and increasingly for production. Its dynamic computation graph and Pythonic API make debugging intuitive.

Installation

  pip install torch torchvision
  
  import torch
print(f"PyTorch {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
  

Tensors

  import torch

# Create tensors
x = torch.tensor([1.0, 2.0, 3.0])
matrix = torch.randn(3, 4)  # random normal
zeros = torch.zeros(2, 3)

# GPU acceleration
if torch.cuda.is_available():
    x = x.cuda()

# Operations
a = torch.randn(2, 3)
b = torch.randn(3, 4)
c = torch.mm(a, b)  # matrix multiply
print(c.shape)
  

Autograd — Automatic Differentiation

PyTorch tracks operations for gradient computation:

  x = torch.tensor(2.0, requires_grad=True)
y = x ** 2 + 3 * x + 1
y.backward()
print(x.grad)  # dy/dx = 2x + 3 = 7.0
  

Build a Neural Network

  import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(784, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 10)
        self.dropout = nn.Dropout(0.2)

    def forward(self, x):
        x = x.view(-1, 784)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = F.relu(self.fc2(x))
        return self.fc3(x)

model = Net()
print(model)
  

Training Loop

  from torchvision import datasets, transforms
from torch.utils.data import DataLoader

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,)),
])

train_dataset = datasets.MNIST('./data', train=True, download=True, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

for epoch in range(5):
    model.train()
    total_loss = 0
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)

        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    print(f"Epoch {epoch+1}, Loss: {total_loss/len(train_loader):.4f}")
  

Evaluation

  model.eval()
correct = 0
total = 0

test_dataset = datasets.MNIST('./data', train=False, transform=transform)
test_loader = DataLoader(test_dataset, batch_size=1000)

with torch.no_grad():
    for data, target in test_loader:
        data, target = data.to(device), target.to(device)
        output = model(data)
        pred = output.argmax(dim=1)
        correct += pred.eq(target).sum().item()
        total += target.size(0)

print(f"Accuracy: {100. * correct / total:.2f}%")
  

Save and Load

  torch.save(model.state_dict(), 'model.pth')
model.load_state_dict(torch.load('model.pth'))
  

PyTorch’s flexibility and strong research community make it the top choice for cutting-edge AI development.