【深層学習】CS224N-Assignment 3
Code
PyTorch使用
Model 模型类
class ParserModel(nn.Module):
""" Feedforward neural network with an embedding layer and single hidden layer.
The ParserModel will predict which transition should be applied to a
given partial parse configuration.
PyTorch Notes:
- Note that "ParserModel" is a subclass of the "nn.Module" class. In PyTorch all neural networks
are a subclass of this "nn.Module".
- The "__init__" method is where you define all the layers and their respective parameters
(embedding layers, linear layers, dropout layers, etc.).
- "__init__" gets automatically called when you create a new instance of your class, e.g.
when you write "m = ParserModel()".
- Other methods of ParserModel can access variables that have "self." prefix. Thus,
you should add the "self." prefix layers, values, etc. that you want to utilize
in other ParserModel methods.
- For further documentation on "nn.Module" please see https://pytorch.org/docs/stable/nn.html.
"""
#初始化函数,也就是给模型初始化好各种参数。隐藏层大小,类别数,dropout率,输入的embeddings,还有维度
def __init__(self, embeddings, n_features=36,
hidden_size=200, n_classes=3, dropout_prob=0.5):
""" Initialize the parser model.
@param embeddings (Tensor): word embeddings (num_words, embedding_size)
@param n_features (int): number of input features
@param hidden_size (int): number of hidden units
@param n_classes (int): number of output classes
@param dropout_prob (float): dropout probability
"""
super(ParserModel, self).__init__()
self.n_features = n_features
self.n_classes = n_classes
self.dropout_prob = dropout_prob
self.embed_size = embeddings.shape[1]
self.hidden_size = hidden_size
self.pretrained_embeddings = nn.Embedding(embeddings.shape[0], self.embed_size)
self.pretrained_embeddings.weight = nn.Parameter(torch.tensor(embeddings))
### YOUR CODE HERE (~5 Lines)
### TODO:
### 1) Construct `self.embed_to_hidden` linear layer, initializing the weight matrix
#创建一个输入embed到隐藏层的线性函数
self.embed_to_hidden = nn.Linear(self.embed_size * self.n_features, self.hidden_size) #隐藏层
#初始化权重矩阵
nn.init.xavier_uniform_(self.embed_to_hidden.weight, gain=1)
### with the `nn.init.xavier_uniform_` function with `gain = 1` (default)
### 2) Construct `self.dropout` layer.
#设置隐藏层dropout
self.dropout = nn.Dropout(p=self.dropout_prob)
### 3) Construct `self.hidden_to_logits` linear layer, initializing the weight matrix
#隐藏层到输出logits层
self.hidden_to_logits = nn.Linear(self.hidden_size, self.n_classes)
#初始化权重
nn.init.xavier_uniform_(self.hidden_to_logits.weight, gain=1)
### with the `nn.init.xavier_uniform_` function with `gain = 1` (default)
###
### Note: Here, we use Xavier Uniform Initialization for our Weight initialization.
### It has been shown empirically, that this provides better initial weights
### for training networks than random uniform initialization.
### For more details checkout this great blogpost:
### http://andyljones.tumblr.com/post/110998971763/an-explanation-of-xavier-initialization
### Hints:
### - After you create a linear layer you can access the weight
### matrix via:
### linear_layer.weight
###
### Please see the following docs for support:
### Linear Layer: https://pytorch.org/docs/stable/nn.html#torch.nn.Linear
### Xavier Init: https://pytorch.org/docs/stable/nn.html#torch.nn.init.xavier_uniform_
### Dropout: https://pytorch.org/docs/stable/nn.html#torch.nn.Dropout
### END YOUR CODE
#从输入tokens映射到嵌入向量,并返回嵌入向量
def embedding_lookup(self, t):
""" Utilize `self.pretrained_embeddings` to map input `t` from input tokens (integers)
to embedding vectors.
PyTorch Notes:
- `self.pretrained_embeddings` is a torch.nn.Embedding object that we defined in __init__
- Here `t` is a tensor where each row represents a list of features. Each feature is represented by an integer (input token).
- In PyTorch the Embedding object, e.g. `self.pretrained_embeddings`, allows you to
go from an index to embedding. Please see the documentation (https://pytorch.org/docs/stable/nn.html#torch.nn.Embedding)
to learn how to use `self.pretrained_embeddings` to extract the embeddings for your tensor `t`.
@param t (Tensor): input tensor of tokens (batch_size, n_features)
@return x (Tensor): tensor of embeddings for words represented in t
(batch_size, n_features * embed_size)
"""
### YOUR CODE HERE (~1-3 Lines)
### TODO:
### 1) Use `self.pretrained_embeddings` to lookup the embeddings for the input tokens in `t`.
x = self.pretrained_embeddings(t)
### 2) After you apply the embedding lookup, you will have a tensor shape (batch_size, n_features, embedding_size).
### Use the tensor `view` method to reshape the embeddings tensor to (batch_size, n_features * embedding_size)
###
x = x.view(x.size()[0], -1) # shape (batch_size, n_features * embedding_size)
### Note: In order to get batch_size, you may need use the tensor .size() function:
### https://pytorch.org/docs/stable/tensors.html#torch.Tensor.size
###
### Please see the following docs for support:
### Embedding Layer: https://pytorch.org/docs/stable/nn.html#torch.nn.Embedding
### View: https://pytorch.org/docs/stable/tensors.html#torch.Tensor.view
### END YOUR CODE
return x
#前向传播
def forward(self, t):
""" Run the model forward.
Note that we will not apply the softmax function here because it is included in the loss function nn.CrossEntropyLoss
PyTorch Notes:
- Every nn.Module object (PyTorch model) has a `forward` function.
- When you apply your nn.Module to an input tensor `t` this function is applied to the tensor.
For example, if you created an instance of your ParserModel and applied it to some `t` as follows,
the `forward` function would called on `t` and the result would be stored in the `output` variable:
model = ParserModel()
output = model(t) # this calls the forward function
- For more details checkout: https://pytorch.org/docs/stable/nn.html#torch.nn.Module.forward
@param t (Tensor): input tensor of tokens (batch_size, n_features)
@return logits (Tensor): tensor of predictions (output after applying the layers of the network)
without applying softmax (batch_size, n_classes)
"""
### YOUR CODE HERE (~3-5 lines)
### TODO:
### 1) Apply `self.embedding_lookup` to `t` to get the embeddings
### 2) Apply `embed_to_hidden` linear layer to the embeddings
### 3) Apply relu non-linearity to the output of step 2 to get the hidden units.
### 4) Apply dropout layer to the output of step 3.
### 5) Apply `hidden_to_logits` layer to the output of step 4 to get the logits.
###
### Note: We do not apply the softmax to the logits here, because
### the loss function (torch.nn.CrossEntropyLoss) applies it more efficiently.
###
### Please see the following docs for support:
### ReLU: https://pytorch.org/docs/stable/nn.html?highlight=relu#torch.nn.functional.relu
x = self.embedding_lookup(t) #生成向量输入
#扔到网络里
x = self.embed_to_hidden(x)
x = F.relu(x)
x = self.dropout(x)
logits = self.hidden_to_logits(x)
### END YOUR CODE
return logits
训练函数
def train(parser, train_data, dev_data, output_path, batch_size=1024, n_epochs=10, lr=0.0005):
""" Train the neural dependency parser.
@param parser (Parser): Neural Dependency Parser
@param train_data ():
@param dev_data ():
@param output_path (str): Path to which model weights and results are written.
@param batch_size (int): Number of examples in a single batch
@param n_epochs (int): Number of training epochs
@param lr (float): Learning rate
"""
best_dev_UAS = 0
### YOUR CODE HERE (~2-7 lines)
### TODO:
### 1) Construct Adam Optimizer in variable `optimizer`
#定义优化器
optimizer = optim.Adam(model.parameters())
### 2) Construct the Cross Entropy Loss Function in variable `loss_func`
#定义损失函数类型
loss_func = torch.nn.CrossEntropyLoss()
###
### Hint: Use `parser.model.parameters()` to pass optimizer
### necessary parameters to tune.
### Please see the following docs for support:
### Adam Optimizer: https://pytorch.org/docs/stable/optim.html
### Cross Entropy Loss: https://pytorch.org/docs/stable/nn.html#crossentropyloss
### END YOUR CODE
for epoch in range(n_epochs):
print("Epoch {:} out of {:}".format(epoch + 1, n_epochs))
dev_UAS = train_for_epoch(parser, train_data, dev_data, optimizer, loss_func, batch_size)
if dev_UAS > best_dev_UAS:
best_dev_UAS = dev_UAS
print("New best dev UAS! Saving model.")
torch.save(parser.model.state_dict(), output_path)
print("")
train_for_epoch(对一个epoch进行训练)
epoch:迭代次数,1个epoch等于使用训练集中的全部样本训练一次;一个epoch = 所有训练样本的一个正向传递和一个反向传递
iteration:中文翻译为迭代,1个iteration等于使用batchsize个样本训练一次;一个迭代 = 一个正向通过+一个反向通过
batchsize:中文翻译为批大小(批尺寸)。在深度学习中,一般采用SGD训练,即每次训练在训练集中取batchsize个样本训练;
def train_for_epoch(parser, train_data, dev_data, optimizer, loss_func, batch_size):
""" Train the neural dependency parser for single epoch.
Note: In PyTorch we can signify train versus test and automatically have
the Dropout Layer applied and removed, accordingly, by specifying
whether we are training, `model.train()`, or evaluating, `model.eval()`
@param parser (Parser): Neural Dependency Parser
@param train_data ():
@param dev_data ():
@param optimizer (nn.Optimizer): Adam Optimizer
@param loss_func (nn.CrossEntropyLoss): Cross Entropy Loss Function
@param batch_size (int): batch size
@param lr (float): learning rate
@return dev_UAS (float): Unlabeled Attachment Score (UAS) for dev data
"""
parser.model.train() # Places model in "train" mode, i.e. apply dropout layer
n_minibatches = math.ceil(len(train_data) / batch_size)
loss_meter = AverageMeter()
with tqdm(total=(n_minibatches)) as prog:
for i, (train_x, train_y) in enumerate(minibatches(train_data, batch_size)):
optimizer.zero_grad() # remove any baggage in the optimizer
loss = 0. # store loss for this batch here
train_x = torch.from_numpy(train_x).long()
train_y = torch.from_numpy(train_y.nonzero()[1]).long()
### YOUR CODE HERE (~5-10 lines)
### TODO:
### 1) Run train_x forward through model to produce `logits`
logist=model(train_x) #通过模型获取结果
optimizer.zero_grad() #初始化梯度
loss=loss_func(logist,train_y) #让模型计算损失
loss.backward() #反向传播
optimizer.step() #优化
### 2) Use the `loss_func` parameter to apply the PyTorch CrossEntropyLoss function.
### This will take `logits` and `train_y` as inputs. It will output the CrossEntropyLoss
### between softmax(`logits`) and `train_y`. Remember that softmax(`logits`)
### are the predictions (y^ from the PDF).
### 3) Backprop losses
### 4) Take step with the optimizer
### Please see the following docs for support:
### Optimizer Step: https://pytorch.org/docs/stable/optim.html#optimizer-step
### END YOUR CODE
prog.update(1)
loss_meter.update(loss.item())
print ("Average Train Loss: {}".format(loss_meter.avg))
print("Evaluating on dev set",)
parser.model.eval() # Places model in "eval" mode, i.e. don't apply dropout layer
dev_UAS, _ = parser.parse(dev_data)
print("- dev UAS: {:.2f}".format(dev_UAS * 100.0))
return dev_UAS
基于图的算法:
创建一个矩阵,每个元素表示a与b依存的概率。然后在这个矩阵中找出一颗最小生成树