@tf.function 데코레이터 사용시 ValueError 문제 질문

21.01.15 17:36 작성 조회수 147

0

안녕하세요? 알찬 교육에 정말 감사드립니다.

trainer(), validation(), tester() 함수를 정의해서 학습을 시킬때... 그냥 학습시키면 작동이 되는데, @tf.function 데코레이터만 붙여서 사용하면 ValueError 예외가 발생하는데요, 집의 컴퓨터에서 돌릴때와 코랩에서 돌릴 때 동일한 issue 가 생깁니다. 

만약  for epoch in range(EPOCHS): 이후의 코드 대신에 

model.compile(optimizer='adam',loss = 'sparse_categorical_crossentropy', metrics =['accuracy'])
history = model.fit(train_ds, validation_data=validation_ds, epochs=EPOCHS)

와 같이 model.compile 이나 model.fit 을 통해서 학습시키는 경우에는 @tf.function 데코레이터가 제대로 작동합니다;

제가 만든 아래 코드 중에서 뭐가 문제일까요?? ㅠㅠ 그리고 학습 데이터 뿐아니라 validation 이나 test data 도 100% 예측성능이 나와서 제대로 한것이 맞나도 모르겠네요 ㅠㅠ 코드를 한번 점검해 주실 수 있으실지.... 문의 드립니다.

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-1-896c4f98ef4d> in <module>()
    120 
    121 for epoch in range(EPOCHS):
--> 122     trainer()
    123     validation()
    124 


8 frames
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/func_graph.py in wrapper(*args, **kwargs)
    975           except Exception as e:  # pylint:disable=broad-except
    976             if hasattr(e, "ag_error_metadata"):
--> 977               raise e.ag_error_metadata.to_exception(e)
    978             else:
    979               raise

ValueError: in user code:

    <ipython-input-1-896c4f98ef4d>:91 trainer  *
        for images, labels in train_ds:
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/autograph/operators/control_flow.py:424 for_stmt
        iter_, extra_test, body, get_state, set_state, symbol_names, opts)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/autograph/operators/control_flow.py:719 _tf_dataset_for_stmt
        _verify_loop_init_vars(init_vars, symbol_names)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/autograph/operators/control_flow.py:193 _verify_loop_init_vars
        raise ValueError(error_msg)

    ValueError: 'predictions' must be defined before the loop.




--- 아래 부터 full code 입니다 ---

import numpy as np
import matplotlib.pyplot as plt
from termcolor import colored

import tensorflow as tf
import tensorflow_datasets as tfds
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Activation, Dropout
from tensorflow.keras.losses import SparseCategoricalCrossentropy
from tensorflow.keras.optimizers import Adam, SGD
from tensorflow.keras.metrics import Mean, SparseCategoricalAccuracy

# data pre processing
def get_mnist_dataset_and_normalize(ration_batch_trainn_batch_test):
    
    def normalization(imageslabels):
        images = tf.cast(images, tf.float32)/255
        labels = tf.cast(labels, tf.int32)
        return images, labels
    
    (train_validation_ds, test_ds), ds_info = tfds.load(name='mnist', as_supervised = True, shuffle_files = True
                                                        with_info = True, split = ['train''test'])

    n_train_validation = ds_info.splits['train'].num_examples
    n_train = int(ratio * n_train_validation)
    n_validation = n_train_validation - n_train

    train_ds = train_validation_ds.take(n_train)
    validation_ds = train_validation_ds.skip(n_train)

    train_ds = train_ds.map(normalization).shuffle(1000).batch(n_batch_train)
    validation_ds = validation_ds.map(normalization).batch(n_batch_train)
    test_ds = test_ds.map(normalization).batch(n_batch_test)
    return train_ds, validation_ds, test_ds


# hyper parameter 와 인스턴스 정의
EPOCHS = 10
n_batch_train = 32
n_batch_test = 32
ratio = 0.8
LR = 0.001

# 데이터 불러오기
train_ds, validation_ds, test_ds = get_mnist_dataset_and_normalize(ratio, n_batch_train, n_batch_test)

class CNN_Model(Model):
    def __init__(self):
        super(CNN_Model, self).__init__()
        
        # feature extractor
        self.conv1 = Conv2D(filters=8, kernel_size=5, padding='same', activation='relu')
        self.conv1_maxpool = MaxPooling2D(pool_size=2, strides=2)
        self.conv2 = Conv2D(filters=8, kernel_size=5, padding='same', activation='relu')
        self.conv2_maxpool = MaxPooling2D(pool_size=2, strides=2)
        
        # Classifier
        self.flatten = Flatten()
        self.dense1 = Dense(units=64, activation='relu')
        self.dense1_dropout = Dropout(0.5)
        self.dense2 = Dense(units=10, activation='softmax')
        
    def call(selfx):
        x = self.conv1(x)
        x = self.conv1_maxpool(x)
        x = self.conv2(x)
        x = self.conv2_maxpool(x)
        x = self.flatten(x)
        x = self.dense1(x)
        x = self.dense1_dropout(x)
        x = self.dense2(x)
        return x

    
# 각종 인스턴스 정의
model = CNN_Model()
loss_object = SparseCategoricalCrossentropy()
optimizer = Adam(learning_rate=LR)

loss_train = Mean()
loss_validation = Mean()
loss_test = Mean()

acc_train = SparseCategoricalAccuracy()
acc_validation = SparseCategoricalAccuracy()
acc_test = SparseCategoricalAccuracy()

@tf.function
def trainer(): # 하나의 epoch 내에서 학습데이터로 모델 학습시키는 함수
    global model, loss_object, loss_train, acc_train, optimizer
    for images, labels in train_ds:
        with tf.GradientTape() as tape:
            predictions = model(images)
            loss = loss_object(labels, predictions)
        gradients = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    
    loss_train(loss)
    acc_train(labels, predictions)
    
@tf.function
def validation(): # 하나의 epoch 내에서 학습데이터로 모델 학습시키는 함수
    global model, loss_object, loss_validation, acc_validation
    for images, labels in validation_ds:
        predictions = model(images)
        loss = loss_object(labels, predictions)
    
    loss_validation(loss)
    acc_validation(labels, predictions)
    
@tf.function
def tester(): # 하나의 epoch 내에서 학습데이터로 모델 학습시키는 함수
    global model, loss_object, loss_test, acc_test
    for images, labels in test_ds:
        predictions = model(images)
        loss = loss_object(labels, predictions)
    
    loss_test(loss)
    acc_test(labels, predictions)
    
for epoch in range(EPOCHS):
    trainer()
    validation()
    
    print(colored('Epoch','red','on_white'), epoch+1)
    print("Train Loss : {:.4f} / Train Accuracy : {:.2f}".format(loss_train.result(), acc_train.result()*100))
    print("Validation Loss : {:.4f} / Validation Accuracy : {:.2f}".format(loss_validation.result(), acc_validation.result()*100))
    
    loss_train.reset_states()
    loss_validation.reset_states()
    acc_train.reset_states()
    acc_validation.reset_states()

tester()
print(colored('TEST','cyan','on_white'))
print("Test Loss : {:.4f} / Test Accuracy : {:.2f}".format(loss_test.result(), acc_test.result()*100))

아래 결과가 @tf.function 를 지우고 돌렸을 때의 결과 입니다. 정상적으로 학습이 되는데
Validation Accuracy 나 Test Accuracy 가 100% 로 나오는데... 원래 MNIST 정도의 데이터는 이렇게 잘 맞는 건가요?? ;;
=================
Epoch 1
Train Loss : 0.2663 / Train Accuracy : 96.88
Validation Loss : 0.1054 / Validation Accuracy : 96.88
Epoch 2
Train Loss : 0.0721 / Train Accuracy : 100.00
Validation Loss : 0.0400 / Validation Accuracy : 100.00
Epoch 3
Train Loss : 0.0665 / Train Accuracy : 100.00
Validation Loss : 0.0373 / Validation Accuracy : 100.00
Epoch 4
Train Loss : 0.0575 / Train Accuracy : 96.88
Validation Loss : 0.0157 / Validation Accuracy : 100.00
Epoch 5
Train Loss : 0.1948 / Train Accuracy : 90.62
Validation Loss : 0.0192 / Validation Accuracy : 100.00
Epoch 6
Train Loss : 0.0520 / Train Accuracy : 96.88
Validation Loss : 0.0250 / Validation Accuracy : 100.00
Epoch 7
Train Loss : 0.0365 / Train Accuracy : 100.00
Validation Loss : 0.0134 / Validation Accuracy : 100.00
Epoch 8
Train Loss : 0.0342 / Train Accuracy : 100.00
Validation Loss : 0.0166 / Validation Accuracy : 100.00
Epoch 9
Train Loss : 0.0837 / Train Accuracy : 96.88
Validation Loss : 0.0062 / Validation Accuracy : 100.00
Epoch 10
Train Loss : 0.0166 / Train Accuracy : 100.00
Validation Loss : 0.0112 / Validation Accuracy : 100.00
TEST
Test Loss : 0.0096 / Test Accuracy : 100.00


답변 0

답변을 작성해보세요.

답변을 기다리고 있는 질문이에요.
첫번째 답변을 남겨보세요!