1. Pengantar PyTorch
PyTorch adalah framework Deep Learning open-source yang dikembangkan oleh Meta AI (Facebook). PyTorch menjadi sangat populer di kalangan peneliti dan industri berkat API-nya yang Pythonic (terasa seperti menulis Python biasa), dukungan dynamic computation graph, dan ekosistem yang sangat kaya.
Mengapa PyTorch?
| Keunggulan | Penjelasan |
|---|---|
| 🟢 Dynamic Graph | Graph komputasi dibangun saat runtime — mudah debug dan fleksibel |
| 🟢 Pythonic API | Terasa seperti NumPy — mudah dipelajari jika sudah tahu Python |
| 🟢 GPU Support | Komputasi di GPU semudah memanggil .cuda() atau .to(device) |
| 🟢 Ekosistem Kaya | TorchVision, TorchText, TorchAudio, HuggingFace — semua pakai PyTorch |
| 🟢 Riset & Industri | Standar di hampir semua lab riset AI dan banyak perusahaan besar |
| 🟢 Debugging Mudah | Bisa pakai Python debugger (pdb) langsung karena eager execution |
PyTorch vs TensorFlow
| Aspek | PyTorch | TensorFlow |
|---|---|---|
| Kemudahan | 🟢 Lebih Pythonic | 🟡 API lebih verbose |
| Graph | Dynamic (eager) | Static → Dynamic (TF2) |
| Debugging | 🟢 Standard Python debugger | 🟡 TFDBG khusus |
| Deployment | TorchServe, ONNX | 🟢 TF Lite, TF Serving, TF.js |
| Mobile | TorchMobile | 🟢 TF Lite (lebih matang) |
| Riset | 🟢 Dominan di riset | 🟡 Banyak digunakan industri |
Instalasi
# Instalasi PyTorch (CPU only)
pip install torch torchvision torchaudio
# Instalasi dengan CUDA support (GPU NVIDIA)
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
# Verifikasi instalasi
python -c "import torch; print(f'PyTorch {torch.__version__}')"
python -c "import torch; print(f'CUDA available: {torch.cuda.is_available()}')"
# Di Jupyter Notebook:
# !pip install torch torchvision
2. Tensors: Fondasi PyTorch
Tensor adalah struktur data utama di PyTorch — mirip dengan NumPy array, tapi dengan dua keunggulan besar: bisa berjalan di GPU dan mendukung automatic differentiation (autograd). Semua data di PyTorch direpresentasikan sebagai tensor.
Membuat Tensor
import torch
# ===== DARI DATA =====
# Dari list Python
t1 = torch.tensor([1, 2, 3, 4, 5])
print(f"Dari list: {t1}, dtype: {t1.dtype}")
# 2D tensor (matrix)
t2d = torch.tensor([[1, 2, 3], [4, 5, 6]])
print(f"2D: shape={t2d.shape}, ndim={t2d.ndim}")
# Dengan tipe data spesifik
t_float = torch.tensor([1, 2, 3], dtype=torch.float32)
t_double = torch.tensor([1, 2, 3], dtype=torch.float64)
# ===== TENSOR KHUSUS =====
zeros = torch.zeros(3, 4) # 3x4 matrix berisi 0
ones = torch.ones(2, 3) # 2x3 matrix berisi 1
eye = torch.eye(4) # 4x4 identity matrix
full = torch.full((2, 3), 7.0) # 2x3 matrix berisi 7.0
arange = torch.arange(0, 10, 2) # [0, 2, 4, 6, 8]
linspace = torch.linspace(0, 1, 5) # [0, 0.25, 0.5, 0.75, 1.0]
# ===== RANDOM TENSORS =====
torch.manual_seed(42) # Reproducibility
rand_uniform = torch.rand(3, 3) # Uniform [0, 1)
rand_normal = torch.randn(3, 3) # Normal (mean=0, std=1)
rand_int = torch.randint(0, 100, (5,)) # Random int [0, 100)
print(f"\nRandom uniform:\n{rand_uniform}")
print(f"\nRandom normal:\n{rand_normal}")
print(f"\nRandom int: {rand_int}")
# ===== PROPERTIES =====
t = torch.randn(3, 4, 5)
print(f"\nShape: {t.shape}")
print(f"Size: {t.size()}")
print(f"Dtype: {t.dtype}")
print(f"Device: {t.device}")
print(f"Ndim: {t.ndim}")
print(f"Numel: {t.numel()}") # Jumlah elemen total
Operasi Tensor
import torch
a = torch.tensor([1.0, 2.0, 3.0, 4.0, 5.0])
b = torch.tensor([5.0, 4.0, 3.0, 2.0, 1.0])
# ===== ARITHMETIK =====
print(f"Penjumlahan: {a + b}") # tensor([6., 6., 6., 6., 6.])
print(f"Pengurangan: {a - b}") # tensor([-4., -2., 0., 2., 4.])
print(f"Perkalian: {a * b}") # tensor([5., 8., 9., 8., 5.])
print(f"Pembagian: {a / b}") # tensor([0.2, 0.5, 1., 2., 5.])
print(f"Pangkat: {a ** 2}") # tensor([1., 4., 9., 16., 25.])
# ===== OPERASI MATEMATIKA =====
print(f"\nMean: {a.mean()}")
print(f"Sum: {a.sum()}")
print(f"Max: {a.max()}")
print(f"Min: {a.min()}")
print(f"Std: {a.std()}")
print(f"Sqrt: {torch.sqrt(a)}")
print(f"Exp: {torch.exp(a)}")
print(f"Log: {torch.log(a)}")
print(f"Abs: {torch.abs(a - 3)}")
# ===== MATRIX OPERATIONS =====
m1 = torch.randn(2, 3)
m2 = torch.randn(3, 4)
# Matrix multiplication
matmul = torch.matmul(m1, m2) # Atau: m1 @ m2
print(f"\nMatmul (2x3) @ (3x4) = {matmul.shape}") # [2, 4]
# Transpose
print(f"Transpose: {m1.T.shape}") # Atau m1.t()
# Dot product
v1 = torch.tensor([1.0, 2.0, 3.0])
v2 = torch.tensor([4.0, 5.0, 6.0])
print(f"Dot product: {torch.dot(v1, v2)}") # 32.0
# ===== RESHAPING =====
t = torch.arange(12)
print(f"\nOriginal: {t}") # tensor([0,1,...,11])
print(f"Reshape 3x4: {t.reshape(3, 4)}")
print(f"View 4x3: {t.view(4, 3)}")
print(f"Flatten: {torch.randn(2, 3).flatten()}")
print(f"Unsqueeze (add dim): {t.unsqueeze(0).shape}") # [1, 12]
print(f"Unsqueeze (dim=1): {t.unsqueeze(1).shape}") # [12, 1]
# ===== INDEXING & SLICING (sama seperti NumPy) =====
matrix = torch.arange(12).reshape(3, 4)
print(f"\nMatrix:\n{matrix}")
print(f"Baris 0: {matrix[0]}")
print(f"Kolom 1: {matrix[:, 1]}")
print(f"Sub-matrix: {matrix[0:2, 1:3]}")
# Boolean masking
mask = matrix > 5
print(f"Mask:\n{mask}")
print(f"Filtered: {matrix[mask]}") # tensor([6, 7, 8, 9, 10, 11])
Tensor ↔ NumPy Conversion
import torch
import numpy as np
# NumPy → Tensor
np_array = np.array([1, 2, 3, 4, 5])
tensor_from_np = torch.from_numpy(np_array)
tensor_from_np2 = torch.tensor(np_array) # Copy, bukan share memory
# Tensor → NumPy
t = torch.tensor([10, 20, 30, 40, 50])
np_from_tensor = t.numpy()
# ⚠️ Perhatian: torch.from_numpy dan .numpy() SHARE memory!
np_array[0] = 999
print(f"Tensor ikut berubah: {tensor_from_np}") # tensor([999, 2, 3, 4, 5])
# Untuk copy terpisah:
tensor_copy = torch.tensor(np_array) # Independent copy
GPU Acceleration
import torch
# Cek ketersediaan GPU
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
print(f"GPU: {torch.cuda.get_device_name(0)}")
print(f"Jumlah GPU: {torch.cuda.device_count()}")
# Device management
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")
# Pindahkan tensor ke GPU
t = torch.randn(1000, 1000)
t_gpu = t.to(device) # Cara terbaik
# t_gpu = t.cuda() # Shortcut (hanya CUDA)
# Operasi di GPU
result = torch.matmul(t_gpu, t_gpu.T)
print(f"Result device: {result.device}")
# Kembali ke CPU
result_cpu = result.cpu()
result_np = result.cpu().numpy() # Untuk konversi ke numpy, harus di CPU dulu
3. Autograd: Automatic Differentiation
Autograd adalah fitur paling penting yang membedakan tensor PyTorch dari NumPy array. Autograd secara otomatis menghitung gradien (turunan) — yang merupakan inti dari proses training neural network melalui backpropagation.
Konsep Dasar Gradient
Dalam Deep Learning, kita ingin meminimalkan loss function. Untuk itu, kita perlu tahu seberapa besar perubahan loss jika bobot model diubah sedikit — itulah gradien. Autograd menghitung gradien ini secara otomatis menggunakan teknik yang disebut computational graph.
import torch
# ===== TENSOR DENGAN GRADIENT TRACKING =====
x = torch.tensor(3.0, requires_grad=True) # Aktifkan gradient tracking
print(f"x = {x}")
# Operasi sederhana: y = x² + 2x + 1
y = x**2 + 2*x + 1
print(f"y = x² + 2x + 1 = {y}") # 9 + 6 + 1 = 16
# Hitung gradien: dy/dx = 2x + 2 = 8 (pada x=3)
y.backward()
print(f"dy/dx pada x=3: {x.grad}") # tensor(8.)
# ===== CONTOH LEBIH KOMPLEKS =====
x = torch.tensor(2.0, requires_grad=True)
w = torch.tensor(4.0, requires_grad=True)
b = torch.tensor(1.0, requires_grad=True)
# Forward pass: y = w*x + b, loss = y²
y_pred = w * x + b # 4*2 + 1 = 9
loss = y_pred ** 2 # 9² = 81
# Backward pass
loss.backward()
print(f"\ny_pred = w*x + b = {y_pred}")
print(f"loss = y² = {loss}")
print(f"d(loss)/dw = {w.grad}") # 2 * y * x = 2 * 9 * 2 = 36
print(f"d(loss)/dx = {x.grad}") # 2 * y * w = 2 * 9 * 4 = 72
print(f"d(loss)/db = {b.grad}") # 2 * y * 1 = 2 * 9 = 18
Computational Graph
import torch
# PyTorch membangun graph secara dinamis
a = torch.tensor(2.0, requires_grad=True)
b = torch.tensor(3.0, requires_grad=True)
# Setiap operasi ditambahkan ke graph
c = a + b # c = 5
d = c * a # d = 10
e = d ** 2 # e = 100
# Gradien dihitung via chain rule mundur (backward)
e.backward()
print(f"a.grad = {a.grad}") # de/da = ?
print(f"b.grad = {b.grad}") # de/db = ?
# Manual: de/dd = 2d = 20, dd/da = a+c = 4, dd/dc = a = 2
# de/da = de/dd * dd/da = 20 * 4 = 80
# de/db = de/dd * dd/dc * dc/db = 20 * 2 * 1 = 40
# ===== STOP GRADIENT TRACKING =====
# 1. .detach() — buat copy tanpa gradient
x = torch.tensor([1.0, 2.0, 3.0], requires_grad=True)
y = x * 2
z = y.detach() # z tidak terhubung ke graph
print(f"z requires_grad: {z.requires_grad}") # False
# 2. torch.no_grad() — context manager untuk inference
with torch.no_grad():
y = x * 2
print(f"y requires_grad: {y.requires_grad}") # False
# 3. .requires_grad_(False) — ubah in-place
x.requires_grad_(False)
print("\n=== KAPAN GUNAKAN WHAT ===")
print("Training: requires_grad=True (hitung gradien)")
print("Inference: torch.no_grad() (hemat memori, lebih cepat)")
print("Freeze layer: param.requires_grad = False (tidak update bobot)")
Di PyTorch, gradien diakumulasi (ditambahkan) secara default. Oleh karena itu, Anda harus memanggil optimizer.zero_grad() di setiap iterasi training sebelum loss.backward(). Lupa zero_grad akan menyebabkan gradien menumpuk dan training gagal!
4. nn.Module: Membangun Model
torch.nn adalah modul utama PyTorch untuk membangun neural network. Setiap model di PyTorch dibuat dengan mewarisi kelas nn.Module dan mendefinisikan layer di __init__ serta forward pass di forward().
Arsitektur Neural Network
import torch
import torch.nn as nn
# ===== CONTOH 1: Model Sederhana =====
class SimpleNet(nn.Module):
def __init__(self, input_size, hidden_size, output_size):
super(SimpleNet, self).__init__()
# Definisikan layers
self.fc1 = nn.Linear(input_size, hidden_size) # Fully connected layer 1
self.relu = nn.ReLU() # Aktivasi ReLU
self.fc2 = nn.Linear(hidden_size, output_size) # Fully connected layer 2
def forward(self, x):
# Definisikan forward pass
x = self.fc1(x)
x = self.relu(x)
x = self.fc2(x)
return x
# Buat instance model
model = SimpleNet(input_size=10, hidden_size=64, output_size=1)
print(f"Model:\n{model}")
# Hitung jumlah parameter
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"\nTotal parameters: {total_params:,}")
print(f"Trainable parameters: {trainable_params:,}")
# Lihat parameter
print(f"\nParameter per layer:")
for name, param in model.named_parameters():
print(f" {name}: shape={param.shape}, requires_grad={param.requires_grad}")
Common Layers
| Layer | Fungsi | Contoh |
|---|---|---|
| nn.Linear(in, out) | Fully connected / Dense layer | nn.Linear(784, 256) |
| nn.Conv2d(in, out, k) | Convolutional layer (gambar) | nn.Conv2d(3, 16, 3) |
| nn.ReLU() | Aktivasi ReLU: max(0, x) | nn.ReLU() |
| nn.Sigmoid() | Aktivasi Sigmoid: output 0-1 | nn.Sigmoid() |
| nn.Softmax(dim) | Softmax untuk klasifikasi multi-kelas | nn.Softmax(dim=1) |
| nn.Dropout(p) | Regularisasi (random off neurons) | nn.Dropout(0.5) |
| nn.BatchNorm1d(n) | Batch normalization | nn.BatchNorm1d(256) |
| nn.Embedding(n, d) | Lookup table untuk NLP | nn.Embedding(10000, 128) |
nn.Sequential: Model Cepat
import torch
import torch.nn as nn
# ===== nn.Sequential — model cepat tanpa custom class =====
model_seq = nn.Sequential(
nn.Linear(784, 256),
nn.ReLU(),
nn.Dropout(0.3),
nn.Linear(256, 128),
nn.ReLU(),
nn.Dropout(0.2),
nn.Linear(128, 10)
)
print(f"Sequential model:\n{model_seq}")
# ===== MODEL LEBIH CANGGIH DENGAN nn.Module =====
class AdvancedNet(nn.Module):
def __init__(self):
super(AdvancedNet, self).__init__()
# Feature extractor
self.features = nn.Sequential(
nn.Linear(784, 512),
nn.BatchNorm1d(512),
nn.ReLU(),
nn.Dropout(0.3),
nn.Linear(512, 256),
nn.BatchNorm1d(256),
nn.ReLU(),
nn.Dropout(0.3),
)
# Classifier
self.classifier = nn.Sequential(
nn.Linear(256, 128),
nn.ReLU(),
nn.Dropout(0.2),
nn.Linear(128, 10)
)
def forward(self, x):
x = self.features(x)
x = self.classifier(x)
return x
model_adv = AdvancedNet()
# Forward pass test
dummy_input = torch.randn(32, 784) # Batch 32, 784 fitur
output = model_adv(dummy_input)
print(f"\nInput shape: {dummy_input.shape}")
print(f"Output shape: {output.shape}") # [32, 10]
5. Training Loop: Melatih Model
Training loop adalah inti dari Deep Learning. Setiap iterasi terdiri dari: forward pass → hitung loss → backward pass → update bobot. Di PyTorch, Anda menulis loop ini secara eksplisit — yang membuatnya fleksibel dan transparan.
Anatomi Training Loop
import torch
import torch.nn as nn
def train_one_epoch(model, dataloader, criterion, optimizer, device):
"""Training selama satu epoch."""
model.train() # Set mode training (aktifkan dropout, batchnorm)
total_loss = 0
correct = 0
total = 0
for batch_idx, (inputs, targets) in enumerate(dataloader):
# 1. Pindahkan data ke device (CPU/GPU)
inputs = inputs.to(device)
targets = targets.to(device)
# 2. Zero gradients (PENTING! Jangan lupa!)
optimizer.zero_grad()
# 3. Forward pass
outputs = model(inputs)
loss = criterion(outputs, targets)
# 4. Backward pass (hitung gradien)
loss.backward()
# 5. Update weights
optimizer.step()
# 6. Track metrics
total_loss += loss.item()
_, predicted = outputs.max(1)
total += targets.size(0)
correct += predicted.eq(targets).sum().item()
avg_loss = total_loss / len(dataloader)
accuracy = 100.0 * correct / total
return avg_loss, accuracy
def evaluate(model, dataloader, criterion, device):
"""Evaluasi model pada data test."""
model.eval() # Set mode evaluasi (matikan dropout)
total_loss = 0
correct = 0
total = 0
with torch.no_grad(): # Tidak perlu hitung gradien saat evaluasi
for inputs, targets in dataloader:
inputs = inputs.to(device)
targets = targets.to(device)
outputs = model(inputs)
loss = criterion(outputs, targets)
total_loss += loss.item()
_, predicted = outputs.max(1)
total += targets.size(0)
correct += predicted.eq(targets).sum().item()
avg_loss = total_loss / len(dataloader)
accuracy = 100.0 * correct / total
return avg_loss, accuracy
Contoh Lengkap: Training MNIST
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
# ===== SETUP =====
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Device: {device}")
# Simulated MNIST-like data (ganti dengan dataset asli untuk eksperimen nyata)
np.random.seed(42)
X_train = np.random.randn(1000, 784).astype(np.float32)
y_train = np.random.randint(0, 10, 1000)
X_test = np.random.randn(200, 784).astype(np.float32)
y_test = np.random.randint(0, 10, 200)
# Convert ke DataLoader
train_dataset = TensorDataset(torch.from_numpy(X_train), torch.from_numpy(y_train))
test_dataset = TensorDataset(torch.from_numpy(X_test), torch.from_numpy(y_test))
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)
# ===== MODEL =====
class MNISTNet(nn.Module):
def __init__(self):
super(MNISTNet, self).__init__()
self.net = nn.Sequential(
nn.Linear(784, 256),
nn.ReLU(),
nn.Dropout(0.3),
nn.Linear(256, 128),
nn.ReLU(),
nn.Dropout(0.2),
nn.Linear(128, 10)
)
def forward(self, x):
return self.net(x)
model = MNISTNet().to(device)
# ===== LOSS & OPTIMIZER =====
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.5)
# ===== TRAINING =====
num_epochs = 30
print(f"\n{'Epoch':>6} {'Train Loss':>12} {'Train Acc':>11} {'Test Loss':>11} {'Test Acc':>10} {'LR':>10}")
print("-" * 64)
for epoch in range(num_epochs):
train_loss, train_acc = train_one_epoch(model, train_loader, criterion, optimizer, device)
test_loss, test_acc = evaluate(model, test_loader, criterion, device)
scheduler.step()
if (epoch + 1) % 5 == 0 or epoch == 0:
current_lr = scheduler.get_last_lr()[0]
print(f"{epoch+1:>6} {train_loss:>12.4f} {train_acc:>10.1f}% {test_loss:>11.4f} {test_acc:>10.1f}% {current_lr:>10.6f}")
print(f"\nTraining selesai!")
print(f"Final test accuracy: {test_acc:.1f}%")
6. Loss Functions & Optimizers
Loss function mengukur seberapa jauh prediksi model dari nilai sebenarnya. Optimizer menentukan bagaimana bobot model diperbarui berdasarkan gradien yang dihitung.
Common Loss Functions
| Loss Function | Tugas | Kapan Digunakan |
|---|---|---|
| CrossEntropyLoss | Klasifikasi multi-kelas | Output = logits, target = class index |
| BCEWithLogitsLoss | Klasifikasi biner | Output = logit tunggal, target = 0/1 |
| MSELoss | Regresi | Output = prediksi kontinu, target = nilai aktual |
| L1Loss (MAE) | Regresi (robust terhadap outlier) | Seperti MSELoss tapi lebih tahan outlier |
| HuberLoss | Regresi | Kombinasi MSE + L1 — balanced |
Common Optimizers
| Optimizer | Karakteristik | Kapan Digunakan |
|---|---|---|
| SGD | Sederhana, perlu tuning lr | Klasik, sering dipakai + momentum |
| Adam | Adaptive learning rate, cepat konvergen | 🟢 Default paling umum — mulai dari sini |
| AdamW | Adam + weight decay yang benar | 🟢 Best practice untuk banyak tugas |
| RMSprop | Adaptive, bagus untuk RNN | Sequential data, RNN/LSTM |
import torch
import torch.nn as nn
import torch.optim as optim
# ===== LOSS FUNCTIONS =====
# CrossEntropyLoss — untuk klasifikasi multi-kelas
ce_loss = nn.CrossEntropyLoss()
logits = torch.randn(3, 5) # 3 sampel, 5 kelas
targets = torch.tensor([0, 3, 4])
loss = ce_loss(logits, targets)
print(f"CrossEntropy loss: {loss.item():.4f}")
# MSELoss — untuk regresi
mse_loss = nn.MSELoss()
predictions = torch.tensor([2.5, 0.0, 2.0, 8.0])
actuals = torch.tensor([3.0, -0.5, 2.0, 7.0])
loss = mse_loss(predictions, actuals)
print(f"MSE loss: {loss.item():.4f}")
# BCEWithLogitsLoss — untuk klasifikasi biner
bce_loss = nn.BCEWithLogitsLoss()
logits_binary = torch.randn(5)
targets_binary = torch.tensor([1.0, 0.0, 1.0, 1.0, 0.0])
loss = bce_loss(logits_binary, targets_binary)
print(f"BCE loss: {loss.item():.4f}")
# ===== OPTIMIZER =====
model = nn.Linear(10, 2)
# Adam — paling populer
optimizer_adam = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)
# SGD with momentum
optimizer_sgd = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
# AdamW — best practice modern
optimizer_adamw = optim.AdamW(model.parameters(), lr=0.001, weight_decay=0.01)
# ===== LEARNING RATE SCHEDULER =====
# StepLR: kurangi lr setiap N epoch
scheduler_step = optim.lr_scheduler.StepLR(optimizer_adam, step_size=10, gamma=0.5)
# ReduceLROnPlateau: kurangi lr jika metric stagnan
scheduler_plateau = optim.lr_scheduler.ReduceLROnPlateau(optimizer_adam, patience=5, factor=0.5)
# CosineAnnealing: lr turun dengan pola cosine
scheduler_cosine = optim.lr_scheduler.CosineAnnealingLR(optimizer_adam, T_max=50)
# Contoh penggunaan scheduler
for epoch in range(30):
# ... training code ...
scheduler_step.step() # atau scheduler_plateau.step(val_loss)
print(f"\nFinal LR: {scheduler_step.get_last_lr()}")
7. Contoh Lengkap: Neural Network End-to-End
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
import matplotlib.pyplot as plt
# ===== 1. PREPARE DATA =====
np.random.seed(42)
n_samples = 2000
# Binary classification: dua lingkaran konsentris
X = np.random.randn(n_samples, 2).astype(np.float32)
r = np.sqrt(X[:, 0]**2 + X[:, 1]**2)
y = (r > 1.0).astype(np.int64)
X_train_t = torch.from_numpy(X[:1600])
y_train_t = torch.from_numpy(y[:1600])
X_test_t = torch.from_numpy(X[1600:])
y_test_t = torch.from_numpy(y[1600:])
train_ds = TensorDataset(X_train_t, y_train_t)
test_ds = TensorDataset(X_test_t, y_test_t)
train_dl = DataLoader(train_ds, batch_size=64, shuffle=True)
test_dl = DataLoader(test_ds, batch_size=64)
# ===== 2. MODEL =====
class BinaryClassifier(nn.Module):
def __init__(self):
super().__init__()
self.net = nn.Sequential(
nn.Linear(2, 32),
nn.ReLU(),
nn.Linear(32, 16),
nn.ReLU(),
nn.Linear(16, 2) # 2 output untuk 2 kelas
)
def forward(self, x):
return self.net(x)
model = BinaryClassifier()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
# ===== 3. TRAINING =====
train_losses, test_losses = [], []
train_accs, test_accs = [], []
for epoch in range(50):
# Train
model.train()
total_loss, correct, total = 0, 0, 0
for xb, yb in train_dl:
optimizer.zero_grad()
out = model(xb)
loss = criterion(out, yb)
loss.backward()
optimizer.step()
total_loss += loss.item()
correct += (out.argmax(1) == yb).sum().item()
total += yb.size(0)
train_losses.append(total_loss / len(train_dl))
train_accs.append(100 * correct / total)
# Eval
model.eval()
total_loss, correct, total = 0, 0, 0
with torch.no_grad():
for xb, yb in test_dl:
out = model(xb)
loss = criterion(out, yb)
total_loss += loss.item()
correct += (out.argmax(1) == yb).sum().item()
total += yb.size(0)
test_losses.append(total_loss / len(test_dl))
test_accs.append(100 * correct / total)
if (epoch + 1) % 10 == 0:
print(f"Epoch {epoch+1:>3}: Train Loss={train_losses[-1]:.4f}, "
f"Train Acc={train_accs[-1]:.1f}%, Test Acc={test_accs[-1]:.1f}%")
# ===== 4. VISUALISASI =====
fig, axes = plt.subplots(1, 2, figsize=(14, 5))
axes[0].plot(train_losses, label='Train Loss')
axes[0].plot(test_losses, label='Test Loss')
axes[0].set_xlabel('Epoch')
axes[0].set_ylabel('Loss')
axes[0].set_title('Training & Test Loss')
axes[0].legend()
axes[0].grid(True, alpha=0.3)
axes[1].plot(train_accs, label='Train Acc')
axes[1].plot(test_accs, label='Test Acc')
axes[1].set_xlabel('Epoch')
axes[1].set_ylabel('Accuracy (%)')
axes[1].set_title('Training & Test Accuracy')
axes[1].legend()
axes[1].grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig('pytorch_training_curves.png', dpi=150)
plt.show()
# ===== 5. SAVE & LOAD MODEL =====
# Simpan model
torch.save({
'model_state_dict': model.state_dict(),
'optimizer_state_dict': optimizer.state_dict(),
'train_losses': train_losses,
'test_losses': test_losses,
}, 'binary_classifier.pth')
print("\nModel saved!")
# Load model
checkpoint = torch.load('binary_classifier.pth')
model.load_state_dict(checkpoint['model_state_dict'])
print("Model loaded!")
- Selalu mulai dengan learning rate kecil (0.001 untuk Adam, 0.01 untuk SGD)
- Gunakan learning rate scheduler untuk mengurangi lr saat training berlangsung
- Simpan checkpoint secara berkala —
torch.save()state_dict, bukan model utuh - Gunakan torch.no_grad() saat evaluasi untuk menghemat memori
- Cek loss curve — train loss turun tapi test loss naik = overfitting
- Mulai dari arsitektur sederhana, tambah kompleksitas secara bertahap
8. Quiz: Uji Pemahamanmu!
Setelah membaca tutorial di atas, jawablah 5 pertanyaan berikut untuk menguji pemahamanmu tentang PyTorch: