Specific Task Helper
n-gram models
Task-agnostic Feature Learners
word2vec, context modelling
Transfer Learning for NLP
ELMO, GPT, BERT
(pre-train,fine-tune)
General Language Models
GPT-4, Llama
(emerging abilities)
\(x_1\)
\(x_2\)
\(x_n\)
\(W_1\)
\(W_1\)
\(b_1\)
\(W_2\)
\(b_2\)
\(W_3\)
\(b_3\)
torch.nn.Module`
in PyTorchtorch.nn.Module`
class MLP(nn.Module):
def __init__(self,x_dim,h1_dim,h2_dim,out_dim):
super().__init__()
self.w1 = nn.Linear(x_dim,h1_dim,bias=True)
self.w2 = nn.Linear(h1_dim,h2_dim,bias=True)
self.w3 = nn.Linear(h2_dim,out_dim,bias=True)
def forward(self,x):
out = torch.relu(self.w1(x))
out = torch.relu(self.w2(out))
out = torch.relu(self.w3(out))
return out
\(x_1\)
\(x_2\)
\(x_n\)
\(W_1\)
\(W_1\)
\(b_1\)
\(W_2\)
\(b_2\)
\(W_3\)
\(b_3\)
class CNN(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(3, 6, 5)
self.pool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(6, 16, 5)
self.fc1 = nn.Linear(16 * 5 * 5, 10)
self.fc2 = nn.Linear(10, 4)
self.fc3 = nn.Linear(4, 2)
def forward(self, x):
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
x = x.view(-1, 16 * 5 * 5)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
class RNN(torch.nn.Module):
def __init__(self,vocab_size,embed_dim,hidden_dim,num_class):
super().__init__()
self.embedding = nn.Embedding(vocab_size,
embed_dim,
padding_idx=0)
self.rnn = nn.RNN(embed_dim,
hidden_dim,
batch_first=True)
self.fc = nn.Linear(hidden_dim, num_class)
def forward(self, x,length)
x = self.embedding(x)
x = pack_padded_sequence(x,
lengths=length,
enforce_sorted=False,
batch_first=True)
x = self.rnn(x)
x = self.fc(x[-1])
return x
class TRANSFORMER(torch.nn.Module):
def __init__(self,vocab_size,embed_dim,hidden_dim,num_class):
super().__init__()
self.embedding = nn.Embedding(vocab_size,
embed_dim,
padding_idx=0)
self.transformer = nn.Transformer(dmodel,
nhead,
num_encoder_layers,
num_decoder_layers,
dim_feedforward)
self.fc = Linear(dmodel, vocab_size)
def forward(self, x,length)
x = self.embedding(x)
x = self.transformer(x)
x = self.fc(x[-1])
return x
Multi-Head Attention
Feed forward NN
Add&Norm
Add&Norm
Multi-Head cross Attention
Feed forward NN
Add&Norm
Add&Norm
Multi-Head Masked Attention
Add&Norm
class TRANSFORMER(torch.nn.Module):
def __init__(self,vocab_size,embed_dim,hidden_dim,num_class):
super().__init__()
self.embedding = nn.Embedding(vocab_size,
embed_dim,
padding_idx=0)
self.transformer = nn.Transformer(dmodel,
nhead,
num_encoder_layers,
num_decoder_layers,
dim_feedforward)
self.fc = Linear(dmodel, vocab_size)
def forward(self, x,length)
x = self.embedding(x)
x = self.transformer(x)
x = self.fc(x[1])
return x
class RNN(torch.nn.Module):
def __init__(self,vocab_size,embed_dim,hidden_dim,num_class):
super().__init__()
self.embedding = nn.Embedding(vocab_size,
embed_dim,
padding_idx=0)
self.rnn = nn.RNN(embed_dim,
hidden_dim,
batch_first=True)
self.fc = nn.Linear(hidden_dim, num_class)
def forward(self, x,length)
x = self.embedding(x)
x = pack_padded_sequence(x,
lengths=length,
enforce_sorted=False,
batch_first=True)
x = self.rnn(x)
x = self.fc(x[1])
return x
class CNN(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(3, 6, 5)
self.pool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(6, 16, 5)
self.fc1 = nn.Linear(16 * 5 * 5, 10)
self.fc2 = nn.Linear(10, 4)
self.fc3 = nn.Linear(4, 2)
def forward(self, x):
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
x = x.view(-1, 16 * 5 * 5)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
import datasets
import tokenizers
tokenizer.encode()
tokenizer.decode()
import transformers
import evalaute
import datasets
import tokenizers
import transformers
import evalaute
Datasets
Models
Spaces
150k
350k
170k
import datasets
import tokenizers
import transformers
import evalaute
from accelarate ..
from optimum ..
from peft ..
import bitsandbytes
Multi-Head Attention
Feed forward NN
Add&Norm
Add&Norm
Multi-Head cross Attention
Feed forward NN
Add&Norm
Add&Norm
Multi-Head Masked Attention
Add&Norm
Input text
Predict the class/sentiment
Input text
Summarize
Answer
Question
Input text
Prompt: Input text
Output response conditioned on prompt
Prompt: Predict sentiment, summarize, fill in the blank, generate story
Raw text data
(cleaned)
import datasets
import tokenizers
tokenizer.encode()
tokenizer.decode()
import transformers
import evalaute
import datasets
import tokenizers
tokenizer.encode()
tokenizer.decode()
import transformers
import evalaute
import datasets
import tokenizers
tokenizer.encode()
tokenizer.decode()
import transformers
import evalaute