30 PyTorch

30.1 Linear Regression

An introduction to using tensors and gradients

import numpy as np
import torch

30.1.1 Synthetic Data

x = np.random.randn(200, 2).astype('float32')

Note y is a column vector

y = np.array([12 - 0.8 * x[:, 0] + 1.2 * x[:, 1]], dtype='float32').T

x = torch.as_tensor(x)
x.shape

torch.Size([200, 2])

y = torch.as_tensor(y)
y.shape

torch.Size([200, 1])

30.1.2 Init bias and weights

b = torch.tensor(0., requires_grad=True)

w = torch.randn(2, 1, requires_grad=True)

30.1.3 Train

def model(x):
    return x @ w.t() + b

def mse(t1, t2):
    diff = t1 - t2
    return torch.sum(diff * diff) / diff.numel()

lr = 0.5
niter = 100

for i in range(niter):
    y_hat = b + torch.mm(x, w)
    loss = torch.mean(torch.pow(y - y_hat, 2))
    
    loss.backward()
    
    with torch.no_grad():
        w -= w.grad * lr
        b -= b.grad * lr
        w.grad.zero_()
        b.grad.zero_()

print(w)

tensor([[-0.8000],
        [ 1.2000]], requires_grad=True)

30.2 A simple network

30.2.1 Synthetic Data

np.random.RandomState
x = np.random.rand(500, 10)
w = np.random.rand(10)
y = x @ w + np.random.rand(500)/10

res = np.random.choice(range(500), 400, replace=False)
x_train = x[res]
y_train = y[res]
x_test = np.delete(x, res, axis = 0)
y_test = np.delete(y, res, axis = 0)

x_train.shape, x_test.shape

((400, 10), (100, 10))

30.2.2 Network

import torch
import torch.nn as nn
import torch.nn.functional as F

x_tr = torch.from_numpy(x_train).float()
y_tr = torch.from_numpy(y_train).float()
x_te = torch.from_numpy(x_test).float()
y_te = torch.from_numpy(y_test).float()

x_tr.size(), y_tr.size()

(torch.Size([400, 10]), torch.Size([400]))

x_tr.dtype, y_tr.dtype

(torch.float32, torch.float32)

30.2.3 Train

# N is batch size; D_in is input dimension;
# H is hidden dimension; D_out is output dimension.
N, D_in, H, D_out = 50, 10, 10, 1

# Create random Tensors to hold inputs and outputs
# x = torch.randn(N, D_in)
# y = torch.randn(N, D_out)

# Use the nn package to define our model and loss function.
model = torch.nn.Sequential(
    torch.nn.Linear(D_in, H),
    torch.nn.ReLU(),
    torch.nn.Linear(H, D_out),
)
loss_fn = torch.nn.MSELoss(reduction='sum')

# Use the optim package to define an Optimizer that will update the weights of
# the model for us. Here we will use Adam; the optim package contains many other
# optimization algorithms. The first argument to the Adam constructor tells the
# optimizer which Tensors it should update.
learning_rate = 1e-2
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
for t in range(500):
    # Forward pass: compute predicted y by passing x to the model.
    y_pred = model(x_tr)

    # Compute and print loss.
    loss = loss_fn(y_pred, y_tr)
    if t % 100 == 99:
        print(t, loss.item())

    # Before the backward pass, use the optimizer object to zero all of the
    # gradients for the variables it will update (which are the learnable
    # weights of the model). This is because by default, gradients are
    # accumulated in buffers( i.e, not overwritten) whenever .backward()
    # is called. Checkout docs of torch.autograd.backward for more details.
    optimizer.zero_grad()

    # Backward pass: compute gradient of the loss with respect to model
    # parameters
    loss.backward()

    # Calling the step function on an Optimizer makes an update to its
    # parameters
    optimizer.step()

/Users/egenn/sw/miniforge3/envs/rtemis/lib/python3.10/site-packages/torch/nn/modules/loss.py:530: UserWarning:

Using a target size (torch.Size([400])) that is different to the input size (torch.Size([400, 1])). This will likely lead to incorrect results due to broadcasting. Please ensure they have the same size.

99 85912.296875
199 79523.796875

299 73427.34375

399 67984.4921875
499 64096.19140625

30.3 Resources

PyTorch Documentation