Research Board

Name	신병춘

Subject	신경망 파일: 1-layer(Inet1), 2-layer(Inet2)

# ShinNet_relu.py
# Inet1(input_size, output_size) : One layer NN
#     z = x dot W1 + b1  => E = softmax_loss(z)
# Inet2(input_size, hidden_size, output_size) : Two layer NN
#     y1 = x dot W1 + b1
#     y2 = relu(y1)
#     z = y2 dot W2 + b2
#     => E = softmax_loss(z)

import numpy as np

# Basic functions including activation and loss functions
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def relu(x):
    return np.maximum(0, x)

def relu_grad(x):
    grad = np.zeros_like(x)
    grad[x>=0] = 1
    return grad

def softmax(x):
    if x.ndim == 2:
        x = x.T
        x = x - np.max(x, axis=0)
        y = np.exp(x) / np.sum(np.exp(x), axis=0)
        return y.T
    else:
        x = x - np.max(x) # 오버플로 대책
        return np.exp(x) / np.sum(np.exp(x))

def CEE(y, t):  # t 는 인덱스형
    if y.ndim == 1:
        t = t.reshape(1, t.size)
        y = y.reshape(1, y.size)

    batch_size = y.shape[0]
    return -np.sum(np.log(y[np.arange(batch_size), t] + 1e-7)) / batch_size

def softmax_loss(X, t):  # SoftmaxWithLoss
    y = softmax(X)
    return CEE(y, t)

class Inet1:  # one layer NN
    __doc__ = "One layer Neural Network"
    def __init__(self, i_size, o_size):
        init_std = 0.01
        self.params = {}
        self.params['W1'] = init_std * np.random.randn(i_size, o_size)
        self.params['b1'] = np.zeros(o_size)

    def predict(self, x):
        z = np.dot(x, self.params['W1']) + self.params['b1']
        return z

    def loss(self, x, t):
        z = self.predict(x)
        return softmax_loss(z, t)

    def classifying(self, x):
        z = self.predict(x)
        if z.ndim == 1:
            return np.argmax(z).astype(np.uint8)
        else:
            return np.argmax(z, axis=1).astype(np.uint8)

    def accuracy(self, x, t): # t is index-type lable
        z = self.classifying(x)
        accuracy = np.mean(z==t)
        return accuracy

    def grad(self, xo, t):
        x = xo.copy()
        if x.ndim == 1:
            x.shape = 1, -1
        z = np.dot(x, self.params['W1']) + self.params['b1']

        Grad = {}
        dout = z.copy()
        batch_size = x.shape[0]
        # dout = DE(z,t) / Dz : E(z,t) = SoftMaxwithLoss(z,t)
        idi, idj = range(batch_size), t
        dout[idi, idj] = dout[idi, idj] - 1  # DE/Dz = z - t
        dout = dout / batch_size

        # DE/DW1, DE/Db1: z = x dot W1 + b1
        Grad['W1'] = np.dot( x.T, dout  )
        Grad['b1'] = np.sum( dout, axis=0 )

        return Grad

    def train(self, X_train, t_train, lr, batch_size=100):
        train_size = X_train.shape[0]
        itr_per_epoch = np.int(max(train_size / batch_size, 1))
        for k in range(itr_per_epoch):
            batch_mask = np.random.choice(train_size, batch_size)
            X_batch = X_train[batch_mask]
            t_batch = t_train[batch_mask]
            # 기울기 계산
            Grad = self.grad(X_batch, t_batch) # 오차역전파법 방식(훨씬 빠르다)
            # 갱신
            for key in Grad:
                self.params[key] -= lr * Grad[key]

class Inet2:  # Two layer NN
    __doc__ = "Two layer Neural Network"
    def __init__(self, i_size, h_size, o_size):
        init_std = 0.01
        self.params = {}
        self.params['W1'] = init_std * np.random.randn(i_size, h_size)
        self.params['b1'] = np.zeros(h_size)
        self.params['W2'] = init_std * np.random.randn(h_size, o_size)
        self.params['b2'] = np.zeros(o_size)

    def predict(self, x):
        y1 = np.dot(x, self.params['W1']) + self.params['b1']
        y2 = relu(y1)
        z = np.dot(y2, self.params['W2']) + self.params['b2']
        return z

    def loss(self, x, t):
        z = self.predict(x)
        return softmax_loss(z, t)

    def classifying(self, x):
        z = self.predict(x)
        if z.ndim == 1:
            return np.argmax(z).astype(np.uint8)
        else:
            return np.argmax(z, axis=1).astype(np.uint8)

    def accuracy(self, x, t): # t is index-type lable
        z = self.classifying(x)
        accuracy = np.mean(z==t)
        return accuracy

    def grad(self, xo, t):
        x = xo.copy()
        if x.ndim == 1:
            x.shape = 1, -1
        y1 = np.dot(x, self.params['W1']) + self.params['b1']
        y2 = relu(y1)
        z = np.dot(y2, self.params['W2']) + self.params['b2']
        Z = softmax(z)

        Grad = {}
        dout = Z.copy()
        batch_size = x.shape[0]
        # dout = DE(z,t) / Dz : E(z,t) = SoftMaxwithLoss(z,t)
        idi, idj = range(batch_size), t
        dout[idi, idj] = dout[idi, idj] - 1  # DE/Dz = z - t
        dout = dout / batch_size

        # DE/DW2, DE/Db2, DE/Dy2: Z = y2 dot W2 + b2
        Grad['W2'] = np.dot( y2.T, dout  )
        Grad['b2'] = np.sum( dout, axis=0 )
        dout = np.dot( dout, self.params['W2'].T )

        # DE/Dy1 : y2 = relu(y1)
        dout = dout * relu_grad(y1)
        # DE/DW1, DE/Db1, DE/Dx : y1 = x dot W1 + b1
        Grad['W1'] = np.dot( x.T, dout  )
               # reshape(n, -1): -1 은 자동으로 크기 정해줌
        Grad['b1'] = np.sum( dout, axis=0 )

        return Grad

    def train(self, X_train, t_train, lr, batch_size=100):
        train_size = X_train.shape[0]
        itr_per_epoch = np.int(max(train_size / batch_size, 1))
        for k in range(itr_per_epoch):
            batch_mask = np.random.choice(train_size, batch_size)
            X_batch = X_train[batch_mask]
            t_batch = t_train[batch_mask]
            # 기울기 계산
            Grad = self.grad(X_batch, t_batch) # 오차역전파법 방식(훨씬 빠르다)
            # 갱신
            for key in Grad:
                self.params[key] -= lr * Grad[key]

DATE: 2019.07.09 - 15:48

LAST UPDATE: 2022.06.08 - 14:45

MNIST tf 실행 파일using mnist_ready

손글씨 학습