import pandas as pd
import re
import string
import tensorflow as tf
from tensorflow.keras.layers.experimental.preprocessing import TextVectorization
from tensorflow.keras.datasets import imdb
(train_data, train_labels), (test_data, test_labels) = imdb.load_data(
num_words=10000)
test_sentence = "Everyone except my father loves the released film."
#리뷰를 다시 텍스트로 디코딩하기
word_index = imdb.get_word_index()
reverse_word_index = dict(
[(value, key) for (key, value) in word_index.items()])
decoded_review = " ".join(
[reverse_word_index.get(i - 3, "?") for i in train_data[0]])
def standardize(text):
text = text.lower()
return "".join(char for char in text if char not in string.punctuation)
def sentence_to_words(text):
words = []
text = standardize(text)
tokens = text.split()
for token in tokens:
words.append(token)
return words
words = sentence_to_words(decoded_review)
words
def custom_standardization_fn(string_tensor):
lowercase_string = tf.strings.lower(string_tensor)
return tf.strings.regex_replace(
lowercase_string, f"[{re.escape(string.punctuation)}]", "")
def custom_split_fn(string_tensor):
return tf.strings.split(string_tensor)
text_vectorization = TextVectorization(
output_mode="int",
standardize=custom_standardization_fn,
split=custom_split_fn,
)
text_vectorization.adapt(words)
encoded_sentence = text_vectorization(test_sentence)
cnt=0
for word_idx in encoded_sentence:
if word_idx == 1:
cnt+=1
print(cnt)
from tensorflow.keras.datasets import imdb
(train_data, train_labels), (test_data, test_labels) = imdb.load_data(
num_words=10000)
word_index = imdb.get_word_index()
reverse_word_index = dict(
[(value, key) for (key, value) in word_index.items()])
decoded_review = " ".join(
[reverse_word_index.get(i - 3, "?") for i in train_data[0]])
import numpy as np
def vectorize_sequences(sequences, dimension=10000):
results = np.zeros((len(sequences), dimension))
for i, sequence in enumerate(sequences):
for j in sequence:
results[i, j] = 1.
return results
x_train = vectorize_sequences(train_data)
x_test = vectorize_sequences(test_data)
y_train = np.asarray(train_labels).astype("float32")
y_test = np.asarray(test_labels).astype("float32")
from tensorflow import keras
from tensorflow.keras import layers
model = keras.Sequential([
layers.Dense(16, activation="relu"),
layers.Dense(16, activation="relu"),
layers.Dense(1, activation="sigmoid") #--- 1
])
model.compile(optimizer="rmsprop",
loss="binary_crossentropy", #--- 2
metrics=["accuracy"])
x_val = x_train[:10000] #--- 3
partial_x_train = x_train[10000:] #--- 3
y_val = y_train[:10000] #--- 3
partial_y_train = y_train[10000:] #--- 3
history = model.fit(partial_x_train,
partial_y_train,
epochs=20,
batch_size=512,
validation_data=(x_val, y_val))
from tensorflow import keras
from tensorflow.keras import layers
inputs = keras.Input(shape=(28, 28, 1))
x = layers.Conv2D(filters=32, kernel_size=3, activation="relu")(inputs)
x= layers.MaxPooling2D(pool_size=2)(x)
x = layers.Conv2D(filters=64, kernel_size=3, activation="relu")(x)
x= layers.MaxPooling2D(pool_size=2)(x)
x = layers.Conv2D(filters=128, kernel_size=3, activation="relu")(x)
x = layers.Flatten()(x)
outputs = layers.Dense(10, activation="softmax")(x)
model = keras.Model(inputs=inputs, outputs=outputs)
P1 → 2 → B
P2 → 3 → A
P3 → 1 → C
4,5번
1,4,6번
1,2,5번
1 → sigmoid → binary_crossentropy