Get Started
Tensor
Pytorch is similar with Numpy, but tensor can be accelerated on GPU.
import torch
import numpy as np
numpy_tensor = np.random.randn(2,3)
# Numpy ndarry -> PyTorch Tensor
pytorch_tensor1 = np.Tensor(numpy_tensor)
pytorch_tensor2 = np.from_numpy(numpy_tensor)
# get the shape of the tensor
pytorch_tensor1.shape
pytorch_tensor1.size()
# get the datatype of the tensor
pytorch_tensor1.type()
# get the dimension of the tensor
pytorch_tensor1.dim()
# get the number of all elements in the tensor
pytorch_tensor1.numel()
# create a matrix, which elements are 1 and size is (2,3)
x = torch.ones(2,3)
# create a matrix with random value
x = torch.randn(4,3)
# get the largest value in each row
max_value, max_idx = torch.max(x, dim=1)
# get sum for each row
sum_x = torch.sum(x,dim=1)
Variable
Variable is encapsulation of tensor. There’re three attributes of Variable:.data
.grad
.grad_fn
.
from torch.autograd import Variable
x_tensor = torch.randn(10,5)
y_tensor = torch.randn(10,5)
# tensor -> Variable
x = Variable(x_tensor, requires_grad = True) # require computing gradient
y = Variable(y_tensor, requires_grad = True)
z = torch.sum(x+y)
print(z.data)
print(z.grad_fn)
z.backward()
print(x.grad)
print(y.grad)
Automatically Derivation
import torch
from torch.autograd import Variable
x = Variable(torch.Tensor([2]),requires_grad=True)
y = x + 2
z = y**2 +3
z.backward()
print(x.grad)
x = Variable(torch.randn(10,20), requires_grad=True)
y = Variable(torch.randn(10,5), requires_grad=True)
w = Variable(torch.randn(20,5), requires_grad=True)
# torch.matmul is matrix multiplication
# torch.mean is to get the avgerage value
out = torch.mean(y - torch.matmul(x,w))
out.backward()
Linear Modle & Gradient descent
To the opposite direction of the gradient, we can get the minimum point by updating the value of w and b, till the best w and b with the minimum loss.
Learning rate means “stride”. A large learning rate may causes convergence hardly. A small learning rate may causes waste of time.
import torch
import numpy as np
from torch.autograd import Variable
import matplotlib.pyplot as plt
x_train = np.array([[3.3], [4.4], [5.5], [6.71], [6.93], [4.168],
[9.779], [6.182], [7.59], [2.167], [7.042],
[10.791], [5.313], [7.997], [3.1]], dtype=np.float32)
y_train = np.array([[1.7], [2.76], [2.09], [3.19], [1.694], [1.573],
[3.366], [2.596], [2.53], [1.221], [2.827],
[3.465], [1.65], [2.904], [1.3]], dtype=np.float32)
plt.plot(x_train, y_train,'bo')
x_train = torch.Tensor(x_train)
y_train = torch.Tensor(y_train)
w = Variable(torch.randn(1), requires_grad=True)
b = Variable(torch.randn(1), requires_grad=True)
x_train = Variable(x_train)
y_train = Variable(y_train)
def linear_modedl(x):
return x * w + b
y_ = linear_model(x_train)
plt.plot(x_train.data.numpy(), y_train.data.numpy(), 'bo', label='real')
plt.plot(x_train.data.numpy(), y_.data.numpy(), 'ro', label='estimated')
plt.legend()
![ori](/img/pytorch/linear1.png)
![new](/img/pytorch/linear2.png)
The left img shows the original data. The right img shows the result that only updates w and b by one time.
def get_loss(y_, y_train):
return torch.mean((y_ - y_train) ** 2)
loss = get_loss(y_, y_train)
loss.backward()
w.data = w.data - 1e-2 * w.grad.data
b.data = b.data - 1e-2 * b.grad.data
for i in range(10):
y_ = linear_model(x_train)
loss = get_loss(y_,y_train)
w.grad.zero_()
b.grad.zero_()
loss.backward()
w.data = w.data - 1e-2 * w.grad.data
b.data = b.data - 1e-2 * b.grad.data
print('epoch: {}, loss: {}'.format(i, loss.data[0]))
y_ = linear_model(x_train)
plt.plot(x_train.data.numpy(), y_train.data.numpy(), 'bo', label='real')
plt.plot(x_train.data.numpy(), y_.data.numpy(), 'ro', label='estimated')
plt.legend()
plt.show()
![result](/img/pytorch/linear3.png)
We can see the final result completes linear regression.
Below is something about plt()
Various line types, plot symbols and colors may be obtained with
plot(X,Y,S) where S is a character string made from one element
from any or all the following 3 columns:
b blue . point - solid
g green o circle : dotted
r red x x-mark -. dashdot
c cyan + plus -- dashed
m magenta * star (none) no line
y yellow s square
k black d diamond
w white v triangle (down)
^ triangle (up)
< triangle (left)
> triangle (right)
p pentagram
h hexagram
Initial Parameters
import numpy as np
import torch
from torch import nn
class sim_net(nn.Module):
def __init__(self):
super(sim_net, self).__init__()
self.l1 = nn.Sequential(
nn.Linear(30, 40),
nn.ReLU()
)
self.l1[0].weight.data = torch.randn(40, 30) # initial for one layer
self.l2 = nn.Sequential(
nn.Linear(40, 50),
nn.ReLU()
)
self.l3 = nn.Sequential(
nn.Linear(50, 10),
nn.ReLU()
)
def forward(self, x):
x = self.l1(x)
x =self.l2(x)
x = self.l3(x)
return x
for i in net2.children():
print(i)
for i i in net2.modules():
print(i)
for layer in net2.modules():
if isinstance(layer, nn.Linear):
param_shape = layer.weight.shape
layer.weight.data = torch.from_numpy(np.random.normal(0, 0.5, size=param_shape))
torch.nn.init
from torch.nn import init
print(net2[0].weight)
init.wavier_uniform(net2[0].weight)
Batch/Dataloader
Dataloader
is the tool to package data, firstly we should convert data from numpy array or other format to Tensor, and then put it in the Dataloader
. It can help us iterate data efficiently.
MNIST
Image.shape = [1,28,28]
import os
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.utils.data as Data
import torchvision
import matplotlib.pyplot as plt
import numpy as np
torch.manual_seed(1)
EPOCH = 5
BATCH_SIZE = 50
LR = 0.001
DOWNLOAD_MNIST = False
train_data = torchvision.datasets.MNIST(
root = './mnist',
train=True,
transform = torchvision.transforms.ToTensor(),
download = DOWNLOAD_MNIST,
)
test_data = torchvision.datasets.MNIST(root = './mnist', train=False)
train_loader = Data.DataLoader(dataset = train_data, batch_size=BATCH_SIZE, shuffle=True)
test_x = Variable(torch.unsqueeze(test_data.test_data, dim=1), volatile=True).type(torch.FloatTensor)[:2000]/255. # shape from (2000, 28, 28) to (2000, 1, 28, 28), value in range(0,1)
test_y = test_data.test_labels[:2000]
class CNN(nn.Module):
def __init__(self):
super(CNN, self).__init__()
self.conv1 = nn.Sequential(
nn.Conv2d(
in_channels=1,
out_channels=16,
kernel_size=5,
stride=1,
padding=2,
),
nn.ReLU(),
nn.MaxPool2d(2),
)
self.conv2 = nn.Sequential(
nn.Conv2d(16,32,5,1,2),
nn.ReLU(),
nn.MaxPool2d(2),
)
self.out = nn.Linear(32*7*7, 10)
def forward(self,x):
x = self.conv1(x)
x = self.conv2(x)
x = x.view(x.size(0),-1)
output = self.out(x)
return output
cnn = CNN()
optimizer = torch.optim.Adam(cnn.parameters(),lr = LR)
loss_func = nn.CrossEntropyLoss()
losses = []
acces = []
for epoch in range(EPOCH):
train_loss = 0
train_acc = 0
for count,(x,y) in enumerate(train_loader):
b_x = Variable(x)
b_y = Variable(y)
output = cnn(b_x)
loss = loss_func(output, b_y)
optimizer.zero_grad()
loss.backward()
optimizer.step()
train_loss += loss.data[0]
pred = torch.max(output,1)[1]
num_correct = (pred == b_y).sum().data[0]
acc = num_correct/b_x.shape[0]
train_acc += acc
losses.append(train_loss/len(train_loader))
acces.append(train_acc/len(train_loader))
print('EPOCH:',epoch,',train loss:',train_loss/len(train_loader),',train acc:',train_acc/len(train_loader))
plt.title('train acc')
plt.plot(np.arange(len(acces)), acces)
plt.show()
CIFAR10
import numpy as np
import torch
from torch import nn
from torch.autograd import Variable
from torchvision.datasets import CIFAR10
torch.manual_seed(1)
def data_tf(x):
x = np.array(x, dtype='float32') / 255
x = (x - 0.5) / 0.5
x = x.transpose((2, 0, 1))
x = torch.from_numpy(x)
return x
train_set = CIFAR10('./data', train=True, transform=data_tf)
train_data = torch.utils.data.DataLoader(train_set, batch_size=64, shuffle=True)
test_set = CIFAR10('./data', train=False, transform=data_tf)
test_data = torch.utils.data.DataLoader(test_set, batch_size=128, shuffle=False)
class VGG(nn.Module):
def __init__(self):
super(VGG,self).__init__()
self.conv1 = nn.Sequential(
nn.Conv2d(
in_channels=3,
out_channels=64,
kernel_size=3,
stride=1,
padding=1,
),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2)
)
self.conv2 = nn.Sequential(
nn.Conv2d(64, 128, 3, 1, 1),
nn.ReLU(),
nn.MaxPool2d(2)
)
self.conv3 = nn.Sequential(
nn.Conv2d(128, 256, 3, 1, 1),
nn.ReLU(),
nn.Conv2d(256, 256, 3, 1, 1),
nn.ReLU(),
nn.MaxPool2d(2)
)
self.conv4 = nn.Sequential(
nn.Conv2d(256, 512, 3, 1, 1),
nn.ReLU(),
nn.Conv2d(512, 512, 3, 1, 1),
nn.ReLU(),
nn.MaxPool2d(2)
)
self.conv5 = nn.Sequential(
nn.Conv2d(512, 512, 3, 1, 1),
nn.ReLU(),
nn.Conv2d(512, 512, 3, 1, 1),
nn.ReLU(),
nn.MaxPool2d(2)
)
self.fc = nn.Sequential(
nn.Linear(512, 100),
nn.ReLU(),
nn.Linear(100, 10)
)
def forward(self, x):
x = self.conv1(x)
x = self.conv2(x)
x = self.conv3(x)
x = self.conv4(x)
x = self.conv5(x)
x = x.view(x.shape[0], -1)
x = self.fc(x)
return x
net = VGG()
optimizer = torch.optim.SGD(net.parameters(), lr=1e-1)
criterion = nn.CrossEntropyLoss()
def train(net, train_data, valid_data, num_epochs, optimizer, criterion):
for epoch in range(num_epochs):
train_loss = 0
train_acc = 0
for im, label in train_data:
im = Variable(im)
label = Variable(label)
output = net(im)
loss = criterion(output, label)
optimizer.zero_grad()
loss.backward()
optimizer.step()
train_loss += loss.data[0]
pred = torch.max(output,1)[1]
num_correct = (pred == label).sum().data[0]
acc = num_correct/im.shape[0]
train_acc += acc
# train_acc += get_acc(output, label)
print('EPOCH:',epoch,',train loss:',train_loss/len(train_data),',train acc',train_acc/len(train_data))
train(net,train_data,test_data,10,optimizer,criterion)