Architectures

Autoencoder

TODO: Description of Autoencoder use case and basic architecture. Figure from [1].

_images/autoencoder.png

Model

An example implementation in PyTorch.

class Autoencoder(nn.Module):
    def __init__(self, in_shape):
        super().__init__()
        c,h,w = in_shape
        self.encoder = nn.Sequential(
            nn.Linear(c*h*w, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 12),
            nn.ReLU()
        )
        self.decoder = nn.Sequential(
            nn.Linear(12, 64),
            nn.ReLU(),
            nn.Linear(64, 128),
            nn.ReLU(),
            nn.Linear(128, c*h*w),
            nn.Sigmoid()
        )

    def forward(self, x):
        bs,c,h,w = x.size()
        x = x.view(bs, -1)
        x = self.encoder(x)
        x = self.decoder(x)
        x = x.view(bs, c, h, w)
        return x

Training

def train(net, loader, loss_func, optimizer):
    net.train()
    for inputs, _ in loader:
        inputs = Variable(inputs)

        output = net(inputs)
        loss = loss_func(output, inputs)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

Further reading

CNN

TODO: Description of CNN use case and basic architecture. Figure from [2].

_images/cnn.jpeg

Model

An example implementation in PyTorch.

Training

Further reading

GAN

A Generative Adversarial Network (GAN) is a type of network which creates novel tensors (often images, voices, etc.). The generative portion of the architecture competes with the discriminator part of the architecture in a zero-sum game. The goal of the generative network is to create novel tensors which the adversarial network attempts to classify as real or fake. The goal of the generative network is generate tensors where the discriminator network determines that the tensor has a 50% chance of being fake and a 50% chance of being real.

Figure from [3].

_images/gan.png

Model

An example implementation in PyTorch.

Generator

class Generator(nn.Module):
    def __init__(self):
        super()
        self.net = nn.Sequential(
            nn.ConvTranspose2d( 200, 32 * 8, 4, 1, 0, bias=False),
            nn.BatchNorm2d(32 * 8),
            nn.ReLU(),
            nn.ConvTranspose2d(32 * 8, 32 * 4, 4, 2, 1, bias=False),
            nn.BatchNorm2d(32 * 4),
            nn.ReLU(),
            nn.ConvTranspose2d( 32 * 4, 32 * 2, 4, 2, 1, bias=False),
            nn.BatchNorm2d(32 * 2),
            nn.ReLU(),
            nn.ConvTranspose2d( 32 * 2, 32, 4, 2, 1, bias=False),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.ConvTranspose2d( 32, 1, 4, 2, 1, bias=False),
            nn.Tanh()
        )
    def forward(self, tens):
        return self.net(tens)

Discriminator

class Discriminator(nn.Module):
    def __init__(self):
        super()
        self.net = nn.Sequential(
            nn.Conv2d(1, 32, 4, 2, 1, bias=False),
            nn.LeakyReLU(0.2),
            nn.Conv2d(32, 32 * 2, 4, 2, 1, bias=False),
            nn.BatchNorm2d(32 * 2),
            nn.LeakyReLU(0.2),
            nn.Conv2d(32 * 2, 32 * 4, 4, 2, 1, bias=False),
            nn.BatchNorm2d(32 * 4),
            nn.LeakyReLU(0.2),
            # state size. (32*4) x 8 x 8
            nn.Conv2d(32 * 4, 32 * 8, 4, 2, 1, bias=False),
            nn.BatchNorm2d(32 * 8),
            nn.LeakyReLU(0.2),
            # state size. (32*8) x 4 x 4
            nn.Conv2d(32 * 8, 1, 4, 1, 0, bias=False),
            nn.Sigmoid()
        )

    def forward(self, tens):
        return self.net(tens)

Training

def train(netD, netG, loader, loss_func, optimizerD, optimizerG, num_epochs):
    netD.train()
    netG.train()
    device = "cuda:0" if torch.cuda.is_available() else "cpu"
    for epoch in range(num_epochs):
        for i, data in enumerate(loader, 0):
            netD.zero_grad()
            realtens = data[0].to(device)
            b_size = realtens.size(0)
            label = torch.full((b_size,), 1, dtype=torch.float, device=device) # gen labels
            output = netD(realtens)
            errD_real = loss_func(output, label)
            errD_real.backward() # backprop discriminator fake and real based on label
            noise = torch.randn(b_size, 200, 1, 1, device=device)
            fake = netG(noise)
            label.fill_(0)
            output = netD(fake.detach()).view(-1)
            errD_fake = loss_func(output, label)
            errD_fake.backward() # backprop discriminator fake and real based on label
            errD = errD_real + errD_fake # discriminator error
            optimizerD.step()
            netG.zero_grad()
            label.fill_(1)  
            output = netD(fake).view(-1)
            errG = loss_func(output, label) # generator error
            errG.backward()
            optimizerG.step()

Further reading

MLP

A Multi Layer Perceptron (MLP) is a neural network with only fully connected layers. Figure from [5].

_images/mlp.jpg

Model

An example implementation on FMNIST dataset in PyTorch. Full Code

  1. The input to the network is a vector of size 28*28 i.e.(image from FashionMNIST dataset of dimension 28*28 pixels flattened to sigle dimension vector).
  2. 2 fully connected hidden layers.
  3. Output layer with 10 outputs.(10 classes)
class MLP(nn.Module):
    def __init__(self):
        super(MLP,self).__init__()
        # define layers
        self.fc1 = nn.Linear(in_features=28*28, out_features=500)
        self.fc2 = nn.Linear(in_features=500, out_features=200)
        self.fc3 = nn.Linear(in_features=200, out_features=100)
        self.out = nn.Linear(in_features=100, out_features=10)


    def forward(self, t):
        # fc1  make input 1 dimentional
        t = t.view(-1,28*28)
        t = self.fc1(t)
        t = F.relu(t)
        # fc2
        t = self.fc2(t)
        t = F.relu(t)
        # fc3
        t = self.fc3(t)
        t = F.relu(t)
        # output
        t = self.out(t)
        return t

Training

def train(net, loader, loss_func, optimizer):
    net.train()
    n_batches = len(loader)
    for inputs, targets in loader:
        inputs = Variable(inputs)
        targets = Variable(targets)

        output = net(inputs)
        loss = loss_func(output, targets)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
         # print statistics
    running_loss = loss.item()
    print('Training loss: %.3f' %( running_loss))

Evaluating

def main():
    train_set = torchvision.datasets.FashionMNIST(
        root = './FMNIST',
        train = True,
        download = False,
        transform = transforms.Compose([
            transforms.ToTensor()
        ])
    )
    mlp = MLP()
    loader = torch.utils.data.DataLoader(train_set, batch_size = 1000)
    optimizer = optim.Adam(mlp.parameters(), lr=0.01)
    loss_func=nn.CrossEntropyLoss()
    for i in range(0,15):
        train(mlp,loader,loss_func,optimizer)
    print("Finished Training")
    torch.save(mlp.state_dict(), "./mlpmodel.pt")
    test_set = torchvision.datasets.FashionMNIST(
        root = './FMNIST',
        train = False,
        download = False,
        transform = transforms.Compose([
            transforms.ToTensor()
        ])
    )
    testloader = torch.utils.data.DataLoader(test_set, batch_size=4)
    correct = 0
    total = 0
    with torch.no_grad():
        for data in testloader:
            images, labels = data
            outputs = mlp(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    print('Accuracy of the network on the 10000 test images: %d %%' % (
        100 * correct / total))

Further reading

TODO

RNN

Description of RNN use case and basic architecture.

_images/rnn.png

Model

class RNN(nn.Module):
    def __init__(self, n_classes):
        super().__init__()
        self.hid_fc = nn.Linear(185, 128)
        self.out_fc = nn.Linear(185, n_classes)
        self.softmax = nn.LogSoftmax()
    
    def forward(self, inputs, hidden):
        inputs = inputs.view(1,-1)
        combined = torch.cat([inputs, hidden], dim=1)
        hid_out = self.hid_fc(combined)
        out = self.out_fc(combined)
        out = self.softmax(out)
        return out, hid_out

Training

In this example, our input is a list of last names, where each name is a variable length array of one-hot encoded characters. Our target is is a list of indices representing the class (language) of the name.

  1. For each input name..
  2. Initialize the hidden vector
  3. Loop through the characters and predict the class
  4. Pass the final character’s prediction to the loss function
  5. Backprop and update the weights
def train(model, inputs, targets):
    for i in range(len(inputs)):
        target = Variable(targets[i])
        name = inputs[i]
        hidden = Variable(torch.zeros(1,128))
        model.zero_grad()
        
        for char in name:
            input_ = Variable(torch.FloatTensor(char))
            pred, hidden = model(input_, hidden)
        
        loss = criterion(pred, target)
        loss.backward()
        
        for p in model.parameters():
            p.data.add_(-.001, p.grad.data)

Further reading

VAE

Autoencoders can encode an input image to a latent vector and decode it, but they can’t generate novel images. Variational Autoencoders (VAE) solve this problem by adding a constraint: the latent vector representation should model a unit gaussian distribution. The Encoder returns the mean and variance of the learned gaussian. To generate a new image, we pass a new mean and variance to the Decoder. In other words, we “sample a latent vector” from the gaussian and pass it to the Decoder. It also improves network generalization and avoids memorization. Figure from [4].

_images/vae.png

Loss Function

The VAE loss function combines reconstruction loss (e.g. Cross Entropy, MSE) with KL divergence.

def vae_loss(output, input, mean, logvar, loss_func):
    recon_loss = loss_func(output, input)
    kl_loss = torch.mean(0.5 * torch.sum(
        torch.exp(logvar) + mean**2 - 1. - logvar, 1))
    return recon_loss + kl_loss

Model

An example implementation in PyTorch of a Convolutional Variational Autoencoder.

class VAE(nn.Module):
    def __init__(self, in_shape, n_latent):
        super().__init__()
        self.in_shape = in_shape
        self.n_latent = n_latent
        c,h,w = in_shape
        self.z_dim = h//2**2 # receptive field downsampled 2 times
        self.encoder = nn.Sequential(
            nn.BatchNorm2d(c),
            nn.Conv2d(c, 32, kernel_size=4, stride=2, padding=1),  # 32, 16, 16
            nn.BatchNorm2d(32),
            nn.LeakyReLU(),
            nn.Conv2d(32, 64, kernel_size=4, stride=2, padding=1),  # 32, 8, 8
            nn.BatchNorm2d(64),
            nn.LeakyReLU(),
        )
        self.z_mean = nn.Linear(64 * self.z_dim**2, n_latent)
        self.z_var = nn.Linear(64 * self.z_dim**2, n_latent)
        self.z_develop = nn.Linear(n_latent, 64 * self.z_dim**2)
        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(64, 32, kernel_size=3, stride=2, padding=0),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.ConvTranspose2d(32, 1, kernel_size=3, stride=2, padding=1),
            CenterCrop(h,w),
            nn.Sigmoid()
        )

    def sample_z(self, mean, logvar):
        stddev = torch.exp(0.5 * logvar)
        noise = Variable(torch.randn(stddev.size()))
        return (noise * stddev) + mean

    def encode(self, x):
        x = self.encoder(x)
        x = x.view(x.size(0), -1)
        mean = self.z_mean(x)
        var = self.z_var(x)
        return mean, var

    def decode(self, z):
        out = self.z_develop(z)
        out = out.view(z.size(0), 64, self.z_dim, self.z_dim)
        out = self.decoder(out)
        return out

    def forward(self, x):
        mean, logvar = self.encode(x)
        z = self.sample_z(mean, logvar)
        out = self.decode(z)
        return out, mean, logvar

Training

def train(model, loader, loss_func, optimizer):
    model.train()
    for inputs, _ in loader:
        inputs = Variable(inputs)

        output, mean, logvar = model(inputs)
        loss = vae_loss(output, inputs, mean, logvar, loss_func)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

Further reading

References

[1]https://hackernoon.com/autoencoders-deep-learning-bits-1-11731e200694
[2]http://cs231n.github.io/convolutional-networks
[3]http://guertl.me/post/162759264070/generative-adversarial-networks
[4]http://kvfrans.com/variational-autoencoders-explained