import tensorflow as tf
1. IMDB Data_Set Load & Review
1-1. Load IMDB Data_Set
from tensorflow.keras.datasets import imdb
(train_data, train_labels), (test_data, test_labels) = imdb.load_data(num_words = 10000)
max(max(W) for W in train_data)
9999
1-2. Visualization & Frequency(Optional)
import matplotlib.pyplot as plt
print('리뷰 최대 길이 :', max(len(L) for L in train_data))
print('리뷰 평균 길이 :', sum(map(len, train_data))/len(train_data))
plt.figure(figsize = (9, 6))
plt.hist([len(L) for L in train_data], bins = 50)
plt.xlabel('Length of train_data')
plt.ylabel('Number of train_data')
plt.show()
import numpy as np
unique_elements, counts_elements = np.unique(train_labels, return_counts = True)
print('Label 빈도수:')
print(np.asarray((unique_elements, counts_elements)))
1-3. Data Structure Review(Optional)
# 전체 train_data 개수
print(len(train_data))
# 첫번째 train_data 정보
print(len(train_data[0]))
print(train_data[0][0:10])
print(train_data[0].count(4))
print(train_labels[0])
1-4. Vector to Word(Optional)
word_index = imdb.get_word_index() #get_word_index( ) : 단어와 인덱스를 매핑한 사전
print(word_index)
# 0, 1, 2 : '패딩', '문서 시작', '사전에 없음'
# 인덱스와 단어 위치 변경
reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])
print(reverse_word_index)
#0번 영화 리뷰 디코딩(1:긍정)
decoded_review = ' '.join([reverse_word_index.get(i - 3, '?') for i in train_data[0]])
print(decoded_review)
print(train_labels[0])
#1번 영화리뷰 디코딩(0:부정)
ecoded_review = ' '.join([reverse_word_index.get(i - 3, '?') for i in train_data[1]])
print(decoded_review)
print(train_labels[1])
2. Tensor Transformation
2-1. X_train & X_test : (25000, 10000)
import numpy as np
def vectorize_sequences(sequences, dimension = 10000):
results = np.zeros((len(sequences), dimension))
for i, sequence in enumerate(sequences):
results[i, sequence] = 1.0
return results
r = np.zeros((5, 10))
v = [1, 3, 5, 7, 9]
for i, v in enumerate(v):
r[i, v] = 1.0
print(r)
X_train = vectorize_sequences(train_data)
X_test = vectorize_sequences(test_data)
X_train.shape, X_test.shape
((25000, 10000), (25000, 10000))
print(X_train[0][:21])
print(X_train[0][9979:])
print(X_test[0][:21])
print(X_test[0][9979:])
2-2. y_train & y_test
y_train = np.asarray(train_labels).astype(float)
y_test = np.asarray(test_labels).astype(float)
print(y_train[:21])
print(y_test[:21])
2-3. Train vs. Validation Split
X_valid = X_train[:10000]
partial_X_train = X_train[10000:]
y_valid = y_train[:10000]
partial_y_train = y_train[10000:]
partial_X_train.shape, partial_y_train.shape, X_valid.shape, y_valid.shape
((15000, 10000), (15000,), (10000, 10000), (10000,))
3. IMDB Keras Modeling
3-1. Model Define
from tensorflow.keras import models
from tensorflow.keras import layers
imdb = models.Sequential()
imdb.add(layers.Dense(16, activation = 'relu', input_shape = (10000,)))
imdb.add(layers.Dense(16, activation = 'relu'))
imdb.add(layers.Dense(1, activation = 'sigmoid'))
imdb.summary()
3-2. Model Compile
imdb.compile(loss = 'binary_crossentropy',
optimizer = 'rmsprop',
metrics = ['accuracy'])
3-3. Model Fit
%%time
Hist_imdb = imdb.fit(partial_X_train, partial_y_train,
epochs = 50,
batch_size = 512,
validation_data = (X_valid, y_valid))
3-4.학습 결과 시각화
import matplotlib.pyplot as plt
epochs = range(1, len(Hist_imdb.history['loss']) + 1)
plt.figure(figsize = (9, 6))
plt.plot(epochs, Hist_imdb.history['loss'])
plt.plot(epochs, Hist_imdb.history['val_loss'])
plt.title('Training & Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend(['Training Loss', 'Validation Loss'])
plt.grid()
plt.show()
import matplotlib.pyplot as plt
epochs = range(1, len(Hist_imdb.history['accuracy']) + 1)
plt.figure(figsize = (9, 6))
plt.plot(epochs, Hist_imdb.history['accuracy'])
plt.plot(epochs, Hist_imdb.history['val_accuracy'])
plt.title('Training & Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend(['Training Accuracy', 'Validation Accuracy'])
plt.grid()
plt.show()
3-5.Model Evaluate
loss, accuracy = imdb.evaluate(X_test, y_test)
print('Loss = {:.5f}'.format(loss))
print('Accuracy = {:.5f}'.format(accuracy))
3-6. Model Predict
np.round(imdb.predict(X_test))
DNN(Deep Neural Network)-MNIST _Categorical Classification (0) | 2022.06.20 |
---|---|
DNN(Deep Neural Network)-MNIST _Categorical Classification (0) | 2022.06.17 |
TensorFlow (0) | 2022.06.13 |
Error Backpropagation (0) | 2022.06.13 |
Artificial Neural Network(ANN)-Softmax Activation (1) | 2022.06.13 |