Specific Task Helper
n-gram models
Task-agnostic Feature Learners
word2vec, context modelling
Transfer Learning for NLP
ELMO, GPT, BERT
(pre-train,fine-tune)
General Language Models
GPT-4, Llama
(emerging abilities)
\(x_1\)
\(x_2\)
\(x_n\)
\(W_1\)
\(W_1\)
\(b_1\)
\(W_2\)
\(b_2\)
\(W_3\)
\(b_3\)
torch.nn.Module` in PyTorchtorch.nn.Module` class MLP(nn.Module):
def __init__(self,x_dim,h1_dim,h2_dim,out_dim):
super().__init__()
self.w1 = nn.Linear(x_dim,h1_dim,bias=True)
self.w2 = nn.Linear(h1_dim,h2_dim,bias=True)
self.w3 = nn.Linear(h2_dim,out_dim,bias=True)
def forward(self,x):
out = torch.relu(self.w1(x))
out = torch.relu(self.w2(out))
out = torch.relu(self.w3(out))
return out\(x_1\)
\(x_2\)
\(x_n\)
\(W_1\)
\(W_1\)
\(b_1\)
\(W_2\)
\(b_2\)
\(W_3\)
\(b_3\)
class CNN(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(3, 6, 5)
self.pool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(6, 16, 5)
self.fc1 = nn.Linear(16 * 5 * 5, 10)
self.fc2 = nn.Linear(10, 4)
self.fc3 = nn.Linear(4, 2)
def forward(self, x):
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
x = x.view(-1, 16 * 5 * 5)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return xclass RNN(torch.nn.Module):
def __init__(self,vocab_size,embed_dim,hidden_dim,num_class):
super().__init__()
self.embedding = nn.Embedding(vocab_size,
embed_dim,
padding_idx=0)
self.rnn = nn.RNN(embed_dim,
hidden_dim,
batch_first=True)
self.fc = nn.Linear(hidden_dim, num_class)
def forward(self, x,length)
x = self.embedding(x)
x = pack_padded_sequence(x,
lengths=length,
enforce_sorted=False,
batch_first=True)
x = self.rnn(x)
x = self.fc(x[-1])
return xclass TRANSFORMER(torch.nn.Module):
def __init__(self,vocab_size,embed_dim,hidden_dim,num_class):
super().__init__()
self.embedding = nn.Embedding(vocab_size,
embed_dim,
padding_idx=0)
self.transformer = nn.Transformer(dmodel,
nhead,
num_encoder_layers,
num_decoder_layers,
dim_feedforward)
self.fc = Linear(dmodel, vocab_size)
def forward(self, x,length)
x = self.embedding(x)
x = self.transformer(x)
x = self.fc(x[-1])
return xMulti-Head Attention
Feed forward NN
Add&Norm
Add&Norm
Multi-Head cross Attention
Feed forward NN
Add&Norm
Add&Norm
Multi-Head Masked Attention
Add&Norm
class TRANSFORMER(torch.nn.Module):
def __init__(self,vocab_size,embed_dim,hidden_dim,num_class):
super().__init__()
self.embedding = nn.Embedding(vocab_size,
embed_dim,
padding_idx=0)
self.transformer = nn.Transformer(dmodel,
nhead,
num_encoder_layers,
num_decoder_layers,
dim_feedforward)
self.fc = Linear(dmodel, vocab_size)
def forward(self, x,length)
x = self.embedding(x)
x = self.transformer(x)
x = self.fc(x[1])
return xclass RNN(torch.nn.Module):
def __init__(self,vocab_size,embed_dim,hidden_dim,num_class):
super().__init__()
self.embedding = nn.Embedding(vocab_size,
embed_dim,
padding_idx=0)
self.rnn = nn.RNN(embed_dim,
hidden_dim,
batch_first=True)
self.fc = nn.Linear(hidden_dim, num_class)
def forward(self, x,length)
x = self.embedding(x)
x = pack_padded_sequence(x,
lengths=length,
enforce_sorted=False,
batch_first=True)
x = self.rnn(x)
x = self.fc(x[1])
return xclass CNN(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(3, 6, 5)
self.pool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(6, 16, 5)
self.fc1 = nn.Linear(16 * 5 * 5, 10)
self.fc2 = nn.Linear(10, 4)
self.fc3 = nn.Linear(4, 2)
def forward(self, x):
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
x = x.view(-1, 16 * 5 * 5)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return ximport datasetsimport tokenizerstokenizer.encode()tokenizer.decode()import transformersimport evalauteimport datasetsimport tokenizersimport transformersimport evalauteDatasets
Models
Spaces
150k
350k
170k
import datasetsimport tokenizersimport transformersimport evalautefrom accelarate ..from optimum ..from peft ..import bitsandbytes Multi-Head Attention
Feed forward NN
Add&Norm
Add&Norm
Multi-Head cross Attention
Feed forward NN
Add&Norm
Add&Norm
Multi-Head Masked Attention
Add&Norm
Input text
Predict the class/sentiment
Input text
Summarize
Answer
Question
Input text
Prompt: Input text
Output response conditioned on prompt
Prompt: Predict sentiment, summarize, fill in the blank, generate story
Raw text data
(cleaned)
import datasetsimport tokenizerstokenizer.encode()tokenizer.decode()import transformersimport evalauteimport datasetsimport tokenizerstokenizer.encode()tokenizer.decode()import transformersimport evalauteimport datasetsimport tokenizerstokenizer.encode()tokenizer.decode()import transformersimport evalaute