长短期记忆与门控 长短期记忆网络:: 接上期博客内容,在RNN的基础上添加LSTM和GRU,只需要把神经网络的模型修改一下即可,所以我们就可以新建一个LSTM类用于测试学习效果:
1 2 3 4 5 6 7 8 9 10 11 class LSTM (nn.Module): def __init__ (self, input_size, hidden_size, output_size ): super (LSTM, self).__init__() self.net = nn.LSTM(input_size, hidden_size, num_layers=1 , batch_first=True ) self.fc = nn.Linear(hidden_size, output_size) def forward (self, x ): out, _ = self.net(x) out = self.fc(out[:, -1 , :]) return out
1 2 3 4 5 6 7 8 9 def LSTMtry (train_X, train_Y, test_X, test_Y ): net = LSTM(1 , 3 , 1 ) optims = optim.Adam(net.parameters(), lr=0.3 ) loss = nn.MSELoss() train(net, train_X, train_Y, optims, loss) test(net, test_X, test_Y, loss)
1 2 3 4 5 6 7 8 9 epoch 0, loss 0.0347 epoch 10, loss 0.0293 epoch 20, loss 0.0131 epoch 30, loss 0.0052 epoch 40, loss 0.0029 epoch 50, loss 0.0022 epoch 60, loss 0.0020 epoch 70, loss 0.0020 test loss 0.0173
原理 为了保证能够拥有记忆时效,我们需要重新整理一下节点的结构,他需要一个输入(output),一个输出(input)和一个遗忘(forget),我们把这三部分都用门控表示,得到:
1 2 3 4 5 6 7 8 9 10 class GRU (nn.Module): def __init__ (self, input_size, hidden_size, output_size ): super (GRU, self).__init__() self.net = nn.GRU(input_size, hidden_size, num_layers=1 , batch_first=True ) self.fc = nn.Linear(hidden_size, output_size) def forward (self, x ): out, _ = self.net(x) out = self.fc(out[:, -1 , :]) return out
1 2 3 4 5 6 def GRUtry (train_X, train_Y, test_X, test_Y ): net = GRU(1 , 3 , 1 ) optims = optim.Adam(net.parameters(), lr=0.1 ) loss = nn.MSELoss() train(net, train_X, train_Y, optims, loss) test(net, test_X, test_Y, loss)
1 2 3 4 5 6 7 8 9 epoch 0, loss 0.0336 epoch 10, loss 0.0161 epoch 20, loss 0.0033 epoch 30, loss 0.0026 epoch 40, loss 0.0027 epoch 50, loss 0.0024 epoch 60, loss 0.0021 epoch 70, loss 0.0019test loss 0.0137
序列到序列 按顺序讲,序列到序列其实算自然语言处理部分了,当然这里也不难,直接开始说,接下来我们以中英文对照数据 演示,首先我们需要实现两个重要结构,对于Seq2Seq而言,我们需要将内容进行编码和解码,具体原理和流程可以看:https://blog.minloha.cn/posts/131918540f8d872023021922.html
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 class Encoder (nn.Module): def __init__ (self, input_size, hidden_size ): super (Encoder, self).__init__() self.hidden_size = hidden_size self.embedding = nn.Embedding(input_size, hidden_size) self.gru = nn.GRU(hidden_size, hidden_size) def forward (self, input , hidden ): embedded = self.embedding(input ).view(1 , 1 , -1 ) output = embedded output, hidden = self.gru(output, hidden) return output, hidden def initHidden (self ): return torch.zeros(1 , 1 , self.hidden_size, device=device)class Decoder (nn.Module): def __init__ (self, hidden_size, output_size ): super (Decoder, self).__init__() self.hidden_size = hidden_size self.embedding = nn.Embedding(output_size, hidden_size) self.gru = nn.GRU(hidden_size, hidden_size) self.out = nn.Linear(hidden_size, output_size) self.softmax = nn.LogSoftmax(dim=1 ) def forward (self, input , hidden ): output = self.embedding(input ).view(1 , 1 , -1 ) output = F.relu(output) output, hidden = self.gru(output, hidden) output = self.softmax(self.out(output[0 ])) return output, hidden def initHidden (self ): return torch.zeros(1 , 1 , self.hidden_size, device=device)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 from io import open import unicodedataimport reimport numpy as np SOS_token = 0 EOS_token = 1 class ToLang : def __init__ (self, name ): self.name = name self.word2index = {} self.word2count = {} self.index2word = {0 : "SOS" , 1 : "EOS" } self.n_words = 2 def addSentence (self, sentence ): for word in sentence.split(" " ): self.addWord(word) def addWord (self, word ): if word not in self.word2index: self.word2index[word] = self.n_words self.word2count[word] = 1 self.index2word[self.n_words] = word self.n_words += 1 else : self.word2count[word] += 1 def unicodeToAscii (s ): return '' .join( c for c in unicodedata.normalize('NFD' , s) if unicodedata.category(c) != 'Mn' )def normalizeString (s ): s = unicodeToAscii(s.lower().strip()) s = re.sub(r"([.!?])" , r" \1" , s) return sdef readLangs (lang1, lang2, reverse=False ): lines = open (r"D:\python\RL\translate\data.txt" , encoding="utf-8" ).read().strip().split("\n" ) pairs = [[normalizeString(s) for s in l.split("\t" )] for l in lines] pairs = np.delete(pairs, 2 , axis=1 ) if reverse: pairs = [list (reversed (p)) for p in pairs] input_lang = ToLang(lang2) output_lang = ToLang(lang1) else : input_lang = ToLang(lang1) output_lang = ToLang(lang2) return input_lang, output_lang, pairs lang1 = "cmn" lang2 = "fra" input_lang, output_lang, pairs = readLangs(lang1, lang2) MAX_LENGTH = 10 eng_prefixes = ( "i am " , "i m " , "he is" , "he s " , "she is" , "she s " , "you are" , "you re " , "we are" , "we re " , "they are" , "they re " )def filterPair (p ): return len (p[0 ].split(' ' )) < MAX_LENGTH and \ len (p[1 ].split(' ' )) < MAX_LENGTH and \ p[1 ].startswith(eng_prefixes)def filterPairs (pairs ): return [pair for pair in pairs if filterPair(pair)]def prepareData (lang1, lang2, reverse=False ): input_lang, output_lang, pairs = readLangs(lang1, lang2, reverse) print ("Read %s sentence pairs" % len (pairs)) pairs = filterPairs(pairs) print ("Trimmed to %s sentence pairs" % len (pairs)) for pair in pairs: input_lang.addSentence(pair[0 ]) output_lang.addSentence(pair[1 ]) return input_lang, output_lang, pairs input_lang, output_lang, pairs = prepareData('eng' , 'cmn' , True )
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 import dataTreat as dtdef indexesFromSentence (lang, sentence ): return [lang.word2index[word] for word in sentence.split(' ' )]def tensorFromSentence (lang, sentence ): indexes = indexesFromSentence(lang, sentence) indexes.append(dt.EOS_token) return torch.tensor(indexes, dtype=torch.long, device=device).view(-1 , 1 )def tensorsFromPair (pair ): input_tensor = tensorFromSentence(dt.input_lang, pair[0 ]) target_tensor = tensorFromSentence(dt.output_lang, pair[1 ]) return input_tensor, target_tensor
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 def train (input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, max_length=dt.MAX_LENGTH ): encoder_hidden = encoder.initHidden() encoder_optimizer.zero_grad() decoder_optimizer.zero_grad() input_length = input_tensor.size(0 ) target_length = target_tensor.size(0 ) encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device) loss = 0 for ei in range (input_length): encoder_output, encoder_hidden = encoder( input_tensor[ei], encoder_hidden) encoder_outputs[ei] = encoder_output[0 , 0 ] decoder_input = torch.tensor([[dt.SOS_token]], device=device) decoder_hidden = encoder_hidden use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False if use_teacher_forcing: for di in range (target_length): decoder_output, decoder_hidden = decoder( decoder_input, decoder_hidden) loss += criterion(decoder_output, target_tensor[di]) else : for di in range (target_length): decoder_output, decoder_hidden = decoder( decoder_input, decoder_hidden) topv, topi = decoder_output.topk(1 ) decoder_input = topi.squeeze().detach() loss += criterion(decoder_output, target_tensor[di]) if decoder_input.item() == dt.EOS_token: break loss.backward() encoder_optimizer.step() decoder_optimizer.step() return loss.item() / target_length
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 def asMinutes (s ): m = math.floor(s / 60 ) s -= m * 60 return '%dm %ds' % (m, s)def timeSince (since, percent ): now = time.time() s = now - since es = s / percent return '%s ' % asMinutes(s)''' :param encoder:编码器 :param decoder:解码器 :param n_iters:迭代次数 :param print_every:每隔多少次打印一次 :param plot_every:每隔多少次画一次图 :param learning_rate:学习率 ''' def trainIters (encoder, decoder, n_iters, print_every=100 , plot_every=100 , learning_rate=0.01 ): start = time.time() plot_losses = [] print_loss_total = 0 plot_loss_total = 0 encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate) decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate) training_pairs = [tensorsFromPair(random.choice(dt.pairs)) for i in range (n_iters)] criterion = nn.NLLLoss() for iter in range (1 , n_iters + 1 ): training_pair = training_pairs[iter - 1 ] input_tensor = training_pair[0 ] target_tensor = training_pair[1 ] loss = train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion) print_loss_total += loss plot_loss_total += loss if iter % print_every == 0 : print_loss_avg = print_loss_total / print_every print_loss_total = 0 print ('Time: %s Iterator: %d%% Acc: %.4f' % (timeSince(start, iter / n_iters), iter / n_iters * 100 , print_loss_avg)) if iter % plot_every == 0 : plot_loss_avg = plot_loss_total / plot_every plot_losses.append(plot_loss_avg) plot_loss_total = 0 showPlot(plot_losses)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 def evaluate (encoder, decoder, sentence, max_length=dt.MAX_LENGTH ): with torch.no_grad(): input_tensor = tensorFromSentence(dt.input_lang, sentence) input_length = input_tensor.size()[0 ] encoder_hidden = encoder.initHidden() encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device) for ei in range (input_length): encoder_output, encoder_hidden = encoder(input_tensor[ei], encoder_hidden) encoder_outputs[ei] += encoder_output[0 , 0 ] decoder_input = torch.tensor([[dt.SOS_token]], device=device) decoder_hidden = encoder_hidden decoded_words = [] decoder_attentions = torch.zeros(max_length, max_length) for di in range (max_length): decoder_output, decoder_hidden = decoder( decoder_input, decoder_hidden) topv, topi = decoder_output.data.topk(1 ) if topi.item() == dt.EOS_token: decoded_words.append('<EOS>' ) break else : decoded_words.append(dt.output_lang.index2word[topi.item()]) decoder_input = topi.squeeze().detach() return decoded_words, decoder_attentions[:di + 1 ]
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 def showPlot (points ): plt.figure() fig, ax = plt.subplots() loc = ticker.MultipleLocator(base=0.2 ) ax.yaxis.set_major_locator(loc) plt.plot(points) decoder_hidden = [10 , 5 , 10 ]if __name__ == "__main__" : hidden_size = 256 encoder = Encoder(dt.input_lang.n_words, hidden_size).to(device) decoder = Decoder(hidden_size, dt.output_lang.n_words).to(device) trainIters(encoder, decoder, 20000 , print_every=500 ) back, att = evaluate(encoder, decoder, "我是天才" ) print (back)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 Read 29668 sentence pairs Trimmed to 672 sentence pairs Time: 0m 4s Iterator: 2% Acc: 4.8165 Time: 0m 8s Iterator: 5% Acc: 4.1715 Time: 0m 12s Iterator: 7% Acc: 4.0555 Time: 0m 16s Iterator: 10% Acc: 4.1072 Time: 0m 20s Iterator: 12% Acc: 4.1130 Time: 0m 24s Iterator: 15% Acc: 4.3046 Time: 0m 28s Iterator: 17% Acc: 4.0388 Time: 0m 32s Iterator: 20% Acc: 4.1469 Time: 0m 36s Iterator: 22% Acc: 4.2702 Time: 0m 40s Iterator: 25% Acc: 4.1994 Time: 0m 44s Iterator: 27% Acc: 4.1615 Time: 0m 48s Iterator: 30% Acc: 3.9064 Time: 0m 52s Iterator: 32% Acc: 4.2069 Time: 0m 56s Iterator: 35% Acc: 4.0334 Time: 1m 0s Iterator: 37% Acc: 4.2500 Time: 1m 4s Iterator: 40% Acc: 4.2576 Time: 1m 7s Iterator: 42% Acc: 4.0754 Time: 1m 11s Iterator: 45% Acc: 3.9563 Time: 1m 15s Iterator: 47% Acc: 3.6001 Time: 1m 19s Iterator: 50% Acc: 3.7663 Time: 1m 23s Iterator: 52% Acc: 3.9296 Time: 1m 27s Iterator: 55% Acc: 3.5670 Time: 1m 30s Iterator: 57% Acc: 3.9190 Time: 1m 34s Iterator: 60% Acc: 3.8559 Time: 1m 38s Iterator: 62% Acc: 4.0905 Time: 1m 42s Iterator: 65% Acc: 4.1999 Time: 1m 46s Iterator: 67% Acc: 4.1665 Time: 1m 49s Iterator: 70% Acc: 3.9940 Time: 1m 53s Iterator: 72% Acc: 3.7515 Time: 1m 57s Iterator: 75% Acc: 3.7801 Time: 2m 1s Iterator: 77% Acc: 4.1297 Time: 2m 5s Iterator: 80% Acc: 4.2860 Time: 2m 9s Iterator: 82% Acc: 4.0266 Time: 2m 13s Iterator: 85% Acc: 4.0970 Time: 2m 16s Iterator: 87% Acc: 4.3627 Time: 2m 20s Iterator: 90% Acc: 4.5892 Time: 2m 24s Iterator: 92% Acc: 4.5988 Time: 2m 28s Iterator: 95% Acc: 4.4202 Time: 2m 32s Iterator: 97% Acc: 4.2970 Time: 2m 36s Iterator: 100% Acc: 4.2186 ['she' , 'is' , 'a' , 'inches' , '.' , '.' , '.' , '.' , '.' , '.' ]
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 class AttnDecoder (nn.Module): def __init__ (self, hidden_size, output_size, dropout_p=0.1 , max_length=MAX_LENGTH ): super (AttnDecoder, self).__init__() self.hidden_size = hidden_size self.output_size = output_size self.dropout_p = dropout_p self.max_length = max_length self.embedding = nn.Embedding(self.output_size, self.hidden_size) self.attn = nn.Linear(self.hidden_size * 2 , self.max_length) self.attn = nn.Linear(self.hidden_size * 2 , self.max_length) self.attn_combine = nn.Linear(self.hidden_size * 2 , self.hidden_size) self.dropout = nn.Dropout(self.dropout_p) self.gru = nn.GRU(self.hidden_size, self.hidden_size) self.out = nn.Linear(self.hidden_size, self.output_size) def forward (self, input , hidden, encoder_outputs ): embedded = self.embedding(input ).view(1 , 1 , -1 ) embedded = self.dropout(embedded) attn_weights = F.softmax( self.attn(torch.cat((embedded[0 ], hidden[0 ]), 1 )), dim=1 ) attn_applied = torch.bmm(attn_weights.unsqueeze(0 ), encoder_outputs.unsqueeze(0 )) output = torch.cat((embedded[0 ], attn_applied[0 ]), 1 ) output = self.attn_combine(output).unsqueeze(0 ) output = F.relu(output) output, hidden = self.gru(output, hidden) output = F.log_softmax(self.out(output[0 ]), dim=1 ) return output, hidden, attn_weights def initHidden (self ): return torch.zeros(1 , 1 , self.hidden_size, device=device)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 def train (input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, max_length=dt.MAX_LENGTH ): encoder_hidden = encoder.initHidden() encoder_optimizer.zero_grad() decoder_optimizer.zero_grad() input_length = input_tensor.size(0 ) target_length = target_tensor.size(0 ) encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device) loss = 0 for ei in range (input_length): encoder_output, encoder_hidden = encoder( input_tensor[ei], encoder_hidden) encoder_outputs[ei] = encoder_output[0 , 0 ] decoder_input = torch.tensor([[dt.SOS_token]], device=device) decoder_hidden = encoder_hidden use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False if use_teacher_forcing: for di in range (target_length): decoder_output, decoder_hidden, decoder_attention = decoder( decoder_input, decoder_hidden, encoder_outputs) loss += criterion(decoder_output, target_tensor[di]) else : for di in range (target_length): decoder_output, decoder_hidden, decoder_attention = decoder( decoder_input, decoder_hidden, encoder_outputs) topv, topi = decoder_output.topk(1 ) decoder_input = topi.squeeze().detach() loss += criterion(decoder_output, target_tensor[di]) if decoder_input.item() == dt.EOS_token: break loss.backward() encoder_optimizer.step() decoder_optimizer.step() return loss.item() / target_length def trainIters (encoder, decoder, n_iters, print_every=100 , plot_every=100 , learning_rate=0.01 ): start = time.time() plot_losses = [] print_loss_total = 0 plot_loss_total = 0 encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate) decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate) training_pairs = [tensorsFromPair(random.choice(dt.pairs)) for i in range (n_iters)] criterion = nn.NLLLoss() for iter in range (1 , n_iters + 1 ): training_pair = training_pairs[iter - 1 ] input_tensor = training_pair[0 ] target_tensor = training_pair[1 ] loss = train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion) print_loss_total += loss plot_loss_total += loss if iter % print_every == 0 : print_loss_avg = print_loss_total / print_every print_loss_total = 0 print ('Time: %s Iterator: %d%% Acc: %.4f' % (timeSince(start, iter / n_iters), iter / n_iters * 100 , print_loss_avg)) if iter % plot_every == 0 : plot_loss_avg = plot_loss_total / plot_every plot_losses.append(plot_loss_avg) plot_loss_total = 0 showPlot(plot_losses)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 def evaluate (encoder, decoder, sentence, max_length=dt.MAX_LENGTH ): with torch.no_grad(): input_tensor = tensorFromSentence(dt.input_lang, sentence) input_length = input_tensor.size()[0 ] encoder_hidden = encoder.initHidden() encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device) for ei in range (input_length): encoder_output, encoder_hidden = encoder(input_tensor[ei], encoder_hidden) encoder_outputs[ei] += encoder_output[0 , 0 ] decoder_input = torch.tensor([[dt.SOS_token]], device=device) decoder_hidden = encoder_hidden decoded_words = [] decoder_attentions = torch.zeros(max_length, max_length) for di in range (max_length): decoder_output, decoder_hidden, decoder_attention = decoder( decoder_input, decoder_hidden, encoder_outputs) decoder_attentions[di] = decoder_attention.data topv, topi = decoder_output.data.topk(1 ) if topi.item() == dt.EOS_token: decoded_words.append('<EOS>' ) break else : decoded_words.append(dt.output_lang.index2word[topi.item()]) decoder_input = topi.squeeze().detach() return decoded_words, decoder_attentions[:di + 1 ]def evaluateRandomly (encoder, decoder, n=10 ): for i in range (n): pair = random.choice(dt.pairs) print ('>' , pair[0 ]) print ('=' , pair[1 ]) output_words, attentions = evaluate(encoder, decoder, pair[0 ]) output_sentence = ' ' .join(output_words) print ('<' , output_sentence) print ('' )def showAttention (input_sentence, output_words, attentions ): fig = plt.figure() ax = fig.add_subplot(111 ) cax = ax.matshow(attentions.numpy(), cmap='bone' ) fig.colorbar(cax) ax.set_ticks(range (len (input_sentence.split(' ' ))), input_sentence.split(' ' )) ax.xaxis.set_major_locator(ticker.MultipleLocator(1 )) ax.yaxis.set_major_locator(ticker.MultipleLocator(1 )) plt.show()def evaluateAndShowAttention (input_sentence ): output_words, attentions = evaluate( encoder, decoder, input_sentence) print ('input =' , input_sentence) print ('output =' , ' ' .join(output_words)) showAttention(input_sentence, output_words, attentions)
1 2 3 4 5 6 7 if __name__ == "__main__" : hidden_size = 256 encoder = Encoder(dt.input_lang.n_words, hidden_size).to(device) decoder = AttnDecoder(hidden_size, dt.output_lang.n_words).to(device) trainIters(encoder, decoder, 20000 , print_every=500 ) evaluateAndShowAttention("我最帅!" )
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 Read 29668 sentence pairs Trimmed to 672 sentence pairs Time: 0m 6s Iterator: 2% Acc: 3.7084 Time: 0m 11s Iterator: 5% Acc: 3.5018 Time: 0m 16s Iterator: 7% Acc: 3.3890 Time: 0m 21s Iterator: 10% Acc: 3.3418 Time: 0m 26s Iterator: 12% Acc: 3.2065 Time: 0m 31s Iterator: 15% Acc: 3.1283 Time: 0m 36s Iterator: 17% Acc: 3.0031 Time: 0m 42s Iterator: 20% Acc: 2.8943 Time: 0m 47s Iterator: 22% Acc: 2.8353 Time: 0m 53s Iterator: 25% Acc: 2.7512 Time: 0m 58s Iterator: 27% Acc: 2.6268 Time: 1m 3s Iterator: 30% Acc: 2.4636 Time: 1m 8s Iterator: 32% Acc: 2.3403 Time: 1m 13s Iterator: 35% Acc: 2.2529 Time: 1m 18s Iterator: 37% Acc: 2.0536 Time: 1m 23s Iterator: 40% Acc: 1.9667 Time: 1m 28s Iterator: 42% Acc: 1.7806 Time: 1m 33s Iterator: 45% Acc: 1.6436 Time: 1m 39s Iterator: 47% Acc: 1.5148 Time: 1m 44s Iterator: 50% Acc: 1.3833 Time: 1m 50s Iterator: 52% Acc: 1.3173 Time: 1m 56s Iterator: 55% Acc: 1.1492 Time: 2m 2s Iterator: 57% Acc: 1.0208 Time: 2m 7s Iterator: 60% Acc: 0.8369 Time: 2m 12s Iterator: 62% Acc: 0.7971 Time: 2m 17s Iterator: 65% Acc: 0.6642 Time: 2m 22s Iterator: 67% Acc: 0.6291 Time: 2m 28s Iterator: 70% Acc: 0.5314 Time: 2m 33s Iterator: 72% Acc: 0.4829 Time: 2m 38s Iterator: 75% Acc: 0.4303 Time: 2m 43s Iterator: 77% Acc: 0.3285 Time: 2m 48s Iterator: 80% Acc: 0.2829 Time: 2m 53s Iterator: 82% Acc: 0.2468 Time: 2m 59s Iterator: 85% Acc: 0.2233 Time: 3m 4s Iterator: 87% Acc: 0.1972 Time: 3m 9s Iterator: 90% Acc: 0.1789 Time: 3m 14s Iterator: 92% Acc: 0.1774 Time: 3m 20s Iterator: 95% Acc: 0.1612 Time: 3m 25s Iterator: 97% Acc: 0.1309 Time: 3m 30s Iterator: 100% Acc: 0.1419 ['i' , 'am' , 'cool' , '' , '.' , '.' , '.' , '.' , '.' , '.' ]
总结 本期博客介绍了循环神经网络系列内容,当然Seq2Seq内容过长,学起来难度不小,所以需要反复复习才可以精通,基础内容也不能拉下,共勉!