import tensorflow as tf
import numpy as np
import tensorflow as keras
from tensorflow.keras import losses,Sequential,optimizers,layers,datasetsbatchsz=128#批量大小
total_words=10000#词汇表大小N_vocab
max_review_len=80#句子最大长度 s,大于的句子部分将截断,小于的将填充
embedding_len=100#词向量特征长度(x_train,y_train),(x_test,y_test)=datasets.imdb.load_data(num_words=total_words)
#print(x_train.shape,len(x_train[0]),y_train.shape)
#print(x_test.shape,len(x_test[0]),y_test.shape)x_train=tf.keras.preprocessing.sequence.pad_sequences(x_train,maxlen=max_review_len)
x_test=tf.keras.preprocessing.sequence.pad_sequences(x_test,maxlen=max_review_len)
train_db=tf.data.Dataset.from_tensor_slices((x_train,y_train))
test_db=tf.data.Dataset.from_tensor_slices((x_test,y_test))
train_db=train_db.shuffle(1000).batch(batchsz,drop_remainder=True)
test_db=test_db.batch(batchsz,drop_remainder=True)#统计数据集属性
#print('x_train shape: ',x_train.shape,tf.reduce_max(y_train),tf.reduce_min(y_train))
#print('x_test shape: ',x_test.shape)class MyLMST(tf.keras.Model):def __init__(self,units):super(MyLMST,self).__init__()self.state0=[tf.zeros([batchsz,units])]self.state1=[tf.zeros([batchsz,units])]#词向量编码[b,80]=>[b,80,100]self.embedding=layers.Embedding(total_words,embedding_len,input_length=max_review_len)#构建2个cell,使用dropout技术防止过拟合self.run_cell0=layers.GRUCell(units,dropout=0.5)self.run_cell1=layers.GRUCell(units,dropout=0.5)self.outlayer = Sequential([layers.Dense(units),layers.Dropout(rate=0.5),layers.ReLU(),layers.Dense(1)])def call(self,inputs,training=None):x=inputs#获取词向量[b,80]=>[b,80,100]x=self.embedding(x)#通过2个LSTM CELL,[b,80,100]=>[b,64]state0=self.state0state1=self.state1for word in tf.unstack(x,axis=1):out0,state0=self.run_cell0(word,state0,training)out1,state1=self.run_cell1(out0,state1,training)#末层最后一个输出作为分类网络的输入:[6,64]=>[b,1]x=self.outlayer(out1,training)#通过激活函数p(y is pos[x])prob=tf.sigmoid(x)return probdef main():units=64epochs=6learning_rate=0.001model=MyLMST(units)#装配model.compile(optimizer=optimizers.Adam(learning_rate),loss=losses.BinaryCrossentropy(),metrics=['accuracy'],experimental_run_tf_function=False)#训练和验证model.fit(train_db,epochs=epochs,validation_data=test_db)#测试model.evaluate(test_db)if __name__=='__main__':main()
Epoch 1/6
195/195 [==============================] - 272s 1s/step - loss: 0.5361 - accuracy: 0.6149 - val_loss: 0.3710 - val_accuracy: 0.8345
Epoch 2/6
195/195 [==============================] - 260s 1s/step - loss: 0.3333 - accuracy: 0.8485 - val_loss: 0.3599 - val_accuracy: 0.8411
Epoch 3/6
195/195 [==============================] - 262s 1s/step - loss: 0.2674 - accuracy: 0.8832 - val_loss: 0.4213 - val_accuracy: 0.8343
Epoch 4/6
195/195 [==============================] - 265s 1s/step - loss: 0.2288 - accuracy: 0.9078 - val_loss: 0.4900 - val_accuracy: 0.8303
Epoch 5/6
195/195 [==============================] - 272s 1s/step - loss: 0.1968 - accuracy: 0.9234 - val_loss: 0.4947 - val_accuracy: 0.8196
Epoch 6/6
195/195 [==============================] - 268s 1s/step - loss: 0.1711 - accuracy: 0.9320 - val_loss: 0.5421 - val_accuracy: 0.8242
195/195 [==============================] - 73s 373ms/step - loss: 0.5421 - accuracy: 0.8242