2회차 | Notion

정리본 Linear Regression을 이용한 예측
# Data definition

x_train = torch.FloatTensor([1], [2], [3]) # 임력 - 공부시간
y_train = torch.FloatTensor([2], [4], [6]) # 출력 - 점수

# hypothesis 정의하기
# Weight 와 bias 를 초기화 한다 - 어떤 예측을 받아도 0으로 예측
W = torchch.zeros(1, requires_grad = True)
b = torchch.zeros(1, requires_grad = True)

# requires_grad = 학습할것을 명시
hypothesis = x_train * W +b

# Compute loss
# 정답이 우리의 예측과 얼마나 가까운지
# 선형 회기의 경우 Mean Squred Error 통해 loss 측정

cost = torch.mean((hypothesis - y_train)**2)

# loss를 통해 모델을 개선
# torch.optim 라이브러리 사용 - SGD 기법
# W, b 학습 - lr - learning rate
optimizer = optim.SGD([W, b], lr=0.01)

optimizer.zero_grad() # gradient 초기화
cost.barward() # gradient 계산
optimizer.step() # gradient descent 시행

------
x_train = torch.FloatTensor([[1], [2], [3]])
y_train = torch.FloatTensor([[2], [4], [6]])

W = torch.zeros(1, requires_grad = True)
b = torch.zeros(1, requires_grad = True)

optimizer = torch.optim.SGD([W, b], lr=0.01)

nb_epochs = 1000

for epoch in range (1, nb_epochs +1): # 학습반복 
    hypothesis = x_train * W +b
    cost = torch.mean((hypothesis - y_train)**2)
    
    optimizer.zero_grad() 
    cost.backward() 
    optimizer.step()

----

# Simpler Hypothesis Fuchtion
# H(x) = Wx
# no bias

x_train = torch.FloatTensor([[1], [2], [3]])
y_train = torch.FloatTensor([[1], [2], [3]])

# H(x) = x가 가장 정확한 모델
# W = 1 이 가장 좋은 숫자
# 학습의 목표는 W를 1로 수렴시키는 것
# W = 1 일때 cost = 0
# 1에서 멀어질수록 cost가 높아진다
# Cost function : MSE 

# cost function 최소화 
# cost - W 그래프의 접선 기울기 값이 음수 - W 크게 해야한다 양수면 작게
# 기울기가 가파를 수록 cost가 크니 w를 크게 바꿔줘야한다
# 접선 기울기 - gardient
# Gradient Descent 

W = torch.zeros(1) # 모델 초기화
lr = 0.1 # learning rate 설정
nb_epochs = 10

for epoch in range (1, nb_epochs +1): # 학습반복 
    hypothesis = x_train * W
    cost = torch.mean((hypothesis - y_train)**2)
    gradient = torch.sum((W * x_train - y_train) * x_train)
    
    print("Epoch {:4d}/{} W: {:.3f}, Cost: {:6f}".format(epoch, nb_epochs, W.item(), cost.item()))
    
    # cost gradient 로 H 개선
    W -= lr * gradient  

# torch.optim 으로도 gradient descent 가능
	
___
# Simpler Hypothesis Fuchtion
# H(x) = Wx
# no bias

x_train = torch.FloatTensor([[1], [2], [3]])
y_train = torch.FloatTensor([[1], [2], [3]])

W = torch.zeros(1, requires_grad = True) # 모델 초기화
optimizer = torch.optim.SGD([W], lr=0.15)
nb_epochs = 10

for epoch in range (1, nb_epochs +1): # 학습반복 
    hypothesis = x_train * W
    cost = torch.mean((hypothesis - y_train)**2)
    gradient = torch.sum((W * x_train - y_train) * x_train)
    
    print("Epoch {:4d}/{} W: {:.3f}, Cost: {:6f}".format(epoch, nb_epochs, W.item(), cost.item()))
    
    # cost gradient 로 H 개선
    optimizer.zero_grad() 
    cost.backward() 
    optimizer.step()

# 하나의 정보로부터 추측하는 모델

--
# Multivariate Linear Regression
# 복수의 정보가 존재할떄 하나의 추측 값을 계산
# Hypothesis Fuction - Matrix
# hypothesis = x_train.matmul(W) + b
# 여러개의 x 일떄 (x1, x2, x3)
# cost - 기존 simple 과 동일
# 학습 방식도 동일
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

x_train = torch.FloatTensor([[73, 80, 75],
                             [93, 88, 93],
                             [89, 91, 90],
                             [96, 98, 100],
                             [73, 66, 70]])
y_train = torch.FloatTensor([[152], [185], [180], [196], [142]])

# 모델 초기화
W = torch.zeros((3, 1), requires_grad=True)
b = torch.zeros(1, requires_grad=True)
# optimizer 설정
optimizer = optim.SGD([W, b], lr=1e-5)

nb_epochs = 20
for epoch in range(nb_epochs + 1):
    
    # H(x) 계산
    hypothesis = x_train.matmul(W) + b # or .mm or @

    # cost 계산
    cost = torch.mean((hypothesis - y_train) ** 2)

    # cost로 H(x) 개선
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()

    # 100번마다 로그 출력
    print('Epoch {:4d}/{} hypothesis: {} Cost: {:.6f}'.format(
        epoch, nb_epochs, hypothesis.squeeze().detach(), cost.item()
    ))
    
    # cost 점점 작아짐

---
# nn.Module
# W와 b를 대신 해서 써줌
# F.mse_loss 
# 쉽게 다른 loss와 교체 가능
---
# Data in the Real World
# 복잡한 머신러닝 모델을 학습하려면 엄청남 양의 데이터 필요
# Problem - 엄창많은 양의 데이터는 학습을 못할 수도 물리적 한계
# 모든 데이터를 한번에 학습하는 것이 아니라 일부만 사용
# Minibatch Gradient Descent 
# 전체 데이터를 균일하게 나눠서 학습하자 / 토막내기
# 업데이트를 좀 더 빠르게 할 수 있다
# 잘못된 방향으로 학습 할 수 있다
# 거칠게 gradient가 줄어든다

# _len_() - 데이터 셋의 총 데이터 수
# _getitem_() idx 받았을때 상응하는 입출력 데이터 반환

# batch_size =2 각 minibatch 의 크기 주로 2의 제곱수
# shuffle = True # 학습된는 순서를 바꾼다
# enumerate(dataloader)

--

# Logistic Regression
# 문제 정의 : binary classification problem 
# x  = m개*d(차원) - m개의 0과 1로 이루어진 정답을 구할 수 있도록 - binary
# P(x=1) = 1-P(x=0)
# Weight - d*1
# Weight update via Gradinet Descent