1. Etika & Legalitas Web Scraping
Sebelum mulai menulis kode scraping, sangat penting untuk memahami etika dan legalitas dari aktivitas ini. Web scraping bukan kegiatan yang secara otomatis ilegal, tetapi ada batasan yang harus dipatuhi agar tidak melanggar hukum atau merugikan pihak lain.
Aturan Emas Web Scraping
| Aturan | Penjelasan | Prioritas |
|---|---|---|
| Cek robots.txt | File /robots.txt di setiap website menunjukkan halaman mana yang boleh/boleh di-crawl | 🔴 Wajib |
| Baca Terms of Service | Beberapa situs melarang scraping di ToS mereka | 🔴 Wajib |
| Rate limiting | Jangan kirim terlalu banyak request dalam waktu singat — bisa membebani server | 🔴 Wajib |
| Hormati data pribadi | Jangan scrape data pribadi (email, nomor HP) tanpa izin | 🔴 Wajib |
| Gunakan API jika tersedia | API resmi selalu lebih baik daripada scraping | 🟡 Disarankan |
| Identifikasi diri | Set User-Agent yang jelas agar admin tahu siapa yang crawl | 🟡 Disarankan |
Di Indonesia, akses tidak sah ke sistem elektronik dapat dijerat dengan UU ITE Pasal 30. Pastikan Anda selalu memiliki izin untuk mengakses dan mengambil data. Scraping data publik untuk keperluan riset atau analisis umumnya diperbolehkan, tetapi scraping untuk meniru produk atau merugikan bisnis lain bisa berujung masalah hukum.
Cek robots.txt
"""
Cek robots.txt sebelum scraping
BeebaneLabs - https://beebanelabs.pages.dev
"""
import requests
from urllib.parse import urljoin
def cek_robots_txt(base_url):
"""Mengecek robots.txt dari website."""
robots_url = urljoin(base_url, "/robots.txt")
print(f"Mengecek: {robots_url}\n")
try:
response = requests.get(robots_url, timeout=10)
if response.status_code == 200:
print(response.text)
else:
print(f"robots.txt tidak ditemukan (status: {response.status_code})")
print("Artinya: Tidak ada pembatasan khusus (tapi tetap gunakan etika)")
except requests.RequestException as e:
print(f"Error mengakses robots.txt: {e}")
# Contoh penggunaan
# cek_robots_txt("https://example.com")
2. BeautifulSoup Basics
BeautifulSoup (bs4) adalah library Python paling populer untuk parsing HTML dan XML. Ia mengubah dokumen HTML mentah menjadi struktur data (tree) yang bisa dijelajahi dengan mudah.
Instalasi
# Instalasi BeautifulSoup dan requests pip install beautifulsoup4 requests # Untuk parser yang lebih cepat (opsional) pip install lxml
2.1 — Parsing HTML Pertama
"""
BeautifulSoup Basics — Parsing HTML
BeebaneLabs - https://beebanelabs.pages.dev
"""
from bs4 import BeautifulSoup
# Contoh HTML mentah
html_mentah = """
<html>
<head><title>BeebaneLabs Tutorial</title></head>
<body>
<div class="article">
<h1 id="judul">Belajar Python</h1>
<p class="deskripsi">Tutorial Python untuk pemula</p>
<ul class="topik">
<li>Variabel & Tipe Data</li>
<li>Control Flow</li>
<li>Function</li>
<li>Class & OOP</li>
</ul>
<a href="/artikel/python-lanjutan" class="link">Pelajari Lanjutan</a>
<a href="/artikel/python-data" class="link">Python Data Science</a>
</div>
</body>
</html>
"""
# Parsing HTML
soup = BeautifulSoup(html_mentah, "html.parser")
# === Mengakses Elemen ===
print("=== Mengakses Elemen ===")
print(f"Title: {soup.title.text}") # Belajar Python
print(f"H1: {soup.h1.text}") # Belajar Python
print(f"Judul by ID: {soup.find(id='judul').text}") # Belajar Python
# === find() — mencari SATU elemen ===
print("\n=== find() ===")
paragraf = soup.find("p", class_="deskripsi")
print(f"Paragraf: {paragraf.text}")
link = soup.find("a", class_="link")
print(f"Link pertama: {link.text} → {link['href']}")
# === find_all() — mencari SEMUA elemen ===
print("\n=== find_all() ===")
semua_li = soup.find_all("li")
for i, li in enumerate(semua_li, 1):
print(f" {i}. {li.text}")
semua_link = soup.find_all("a", class_="link")
for a in semua_link:
print(f" Link: {a.text} → {a['href']}")
# === CSS Selector ===
print("\n=== CSS Selector ===")
judul = soup.select_one("div.article h1")
print(f"Select H1: {judul.text}")
topik = soup.select("ul.topik li")
for t in topik:
print(f" Topik: {t.text}")
2.2 — Ringkasan Method BeautifulSoup
| Method | Fungsi | Contoh |
|---|---|---|
find(tag, attrs) | Mencari SATU elemen pertama yang cocok | soup.find("div", class_="card") |
find_all(tag, attrs) | Mencari SEMUA elemen yang cocok (return list) | soup.find_all("a") |
select_one(css) | CSS selector — satu elemen | soup.select_one("div.card h2") |
select(css) | CSS selector — semua elemen (return list) | soup.select("ul li a") |
.text / .get_text() | Mengambil teks dari elemen | elem.text.strip() |
["attr"] / .get("attr") | Mengambil atribut (href, class, src) | link["href"] |
3. Parsing HTML dengan BeautifulSoup
Sekarang kita akan mengambil HTML dari website sungguhan menggunakan requests dan mem-parsing-nya dengan BeautifulSoup.
"""
Web Scraping — Mengambil Quotes dari Website
BeebaneLabs - https://beebanelabs.pages.dev
Target: http://quotes.toscrape.com (situs latihan legal)
"""
import requests
from bs4 import BeautifulSoup
def ambil_quotes(url):
"""Mengambil semua quotes dari halaman."""
# Kirim HTTP request dengan headers
headers = {
"User-Agent": "BeebaneLabs-Scraper/1.0 (tutorial)"
}
response = requests.get(url, headers=headers, timeout=10)
# Cek status
if response.status_code != 200:
print(f"Error: Status {response.status_code}")
return []
# Parse HTML
soup = BeautifulSoup(response.text, "html.parser")
# Cari semua container quote
quote_divs = soup.find_all("div", class_="quote")
quotes = []
for div in quote_divs:
teks = div.find("span", class_="text").get_text()
penulis = div.find("small", class_="author").get_text()
tags = [tag.get_text() for tag in div.find_all("a", class_="tag")]
quotes.append({
"teks": teks,
"penulis": penulis,
"tags": tags
})
return quotes
# Scrape halaman pertama
url = "http://quotes.toscrape.com"
print(f"Scraping: {url}\n")
quotes = ambil_quotes(url)
for i, q in enumerate(quotes, 1):
print(f"{i}. \"{q['teks'][:80]}...\"")
print(f" — {q['penulis']}")
print(f" Tags: {', '.join(q['tags'])}")
print()
print(f"Total: {len(quotes)} quotes ditemukan")
Website quotes.toscrape.com adalah situs latihan yang sengaja dibuat untuk belajar web scraping. Gunakan situs ini untuk berlatih tanpa khawatir melanggar aturan. Situs serupa: books.toscrape.com dan httpbin.org.
4. Mengekstrak Data dari Halaman Web
Teknik ekstraksi data yang lebih lanjut — menangani tabel HTML, atribut gambar, dan nested elements.
"""
Teknik Ekstraksi Data Lanjutan
BeebaneLabs - https://beebanelabs.pages.dev
"""
import requests
from bs4 import BeautifulSoup
def scrape_books(url):
"""Scrape buku dari books.toscrape.com."""
headers = {"User-Agent": "BeebaneLabs-Scraper/1.0 (tutorial)"}
response = requests.get(url, headers=headers, timeout=10)
soup = BeautifulSoup(response.text, "html.parser")
books = []
for article in soup.find_all("article", class_="product_pod"):
# Judul buku — ada di atribut title dari tag <a>
judul_elem = article.find("h3").find("a")
judul = judul_elem["title"]
# Link detail
link =judul_elem["href"]
# Harga
harga = article.find("p", class_="price_color").get_text()
# Rating (disimpan di class bintang)
rating_p = article.find("p", class_="star-rating")
rating_kelas = rating_p["class"][1] # e.g., "Three"
rating_map = {"One": 1, "Two": 2, "Three": 3, "Four": 4, "Five": 5}
rating = rating_map.get(rating_kelas, 0)
# Ketersediaan
avail = article.find("p", class_="instock").get_text().strip()
books.append({
"judul": judul,
"harga": harga,
"rating": rating,
"ketersediaan": avail,
"link": link
})
return books
# Scrape buku
url = "http://books.toscrape.com"
print(f"Scraping: {url}\n")
books = scrape_books(url)
for i, book in enumerate(books[:10], 1):
bintang = "⭐" * book["rating"]
print(f"{i}. {book['judul'][:50]}")
print(f" Harga: {book['harga']} | Rating: {bintang} | {book['ketersediaan']}")
print()
print(f"Total di halaman: {len(books)} buku")
# === Ekstraksi Tabel HTML ===
def ekstrak_tabel(html_string):
"""Contoh mengekstrak data dari tabel HTML."""
soup = BeautifulSoup(html_string, "html.parser")
tabel = soup.find("table")
# Ambil header
headers = []
for th in tabel.find("thead").find_all("th"):
headers.append(th.get_text().strip())
# Ambil baris data
rows = []
for tr in tabel.find("tbody").find_all("tr"):
row = {}
cells = tr.find_all("td")
for i, cell in enumerate(cells):
if i < len(headers):
row[headers[i]] = cell.get_text().strip()
rows.append(row)
return rows
# Contoh tabel HTML
html_tabel = """
<table>
<thead><tr><th>Sensor</th><th>Lokasi</th><th>Status</th></tr></thead>
<tbody>
<tr><td>DHT22</td><td>Ruang Server</td><td>Aktif</td></tr>
<tr><td>MQ-135</td><td>Dapur</td><td>Maintenance</td></tr>
</tbody>
</table>
"""
print("\n=== Hasil Ekstraksi Tabel ===")
data_tabel = ekstrak_tabel(html_tabel)
for row in data_tabel:
print(row)
5. Menangani Pagination
Kebanyakan website menampilkan data dalam beberapa halaman. Kita perlu menavigasi dari halaman ke halaman untuk mengambil semua data.
"""
Scraping dengan Pagination
BeebaneLabs - https://beebanelabs.pages.dev
"""
import requests
from bs4 import BeautifulSoup
import time
def scrape_semua_halaman(base_url, max_halaman=5):
"""Scrape beberapa halaman dari books.toscrape.com."""
semua_buku = []
url = base_url
for halaman in range(1, max_halaman + 1):
print(f"Scraping halaman {halaman}...")
headers = {"User-Agent": "BeebaneLabs-Scraper/1.0 (tutorial)"}
response = requests.get(url, headers=headers, timeout=10)
if response.status_code != 200:
print(f" Error: Status {response.status_code}")
break
soup = BeautifulSoup(response.text, "html.parser")
# Scrape buku di halaman ini
for article in soup.find_all("article", class_="product_pod"):
judul = article.find("h3").find("a")["title"]
harga = article.find("p", class_="price_color").get_text()
semua_buku.append({"judul": judul, "harga": harga})
print(f" Ditemukan: {len(soup.find_all('article'))} buku")
# Cari link "Next" untuk halaman berikutnya
next_btn = soup.find("li", class_="next")
if next_btn:
next_url = next_btn.find("a")["href"]
# Handle URL relatif
if not next_url.startswith("http"):
# Bangun URL absolut
from urllib.parse import urljoin
url = urljoin(url, next_url)
else:
print(" Tidak ada halaman berikutnya")
break
# Delay antar halaman (etika!)
time.sleep(1)
return semua_buku
# Scrape 3 halaman pertama
base_url = "http://books.toscrape.com/catalogue/page-1.html"
print(f"Scraping dari: {base_url}\n")
buku = scrape_semua_halaman(base_url, max_halaman=3)
print(f"\nTotal buku terkumpul: {len(buku)}")
# Tampilkan 10 pertama
for i, b in enumerate(buku[:10], 1):
print(f" {i}. {b['judul'][:50]} — {b['harga']}")
Teknik Pagination Umum
| Tipe Pagination | URL Pattern | Strategi |
|---|---|---|
| Query parameter | ?page=1, ?page=2 | Loop naikkan angka page |
| Path-based | /page/1/, /page/2/ | Loop ganti angka di URL |
| Next button | Link "Next" atau "Berikutnya" | Ikuti link next sampai habis |
| Infinite scroll | Tidak ada pagination tradisional | Gunakan Selenium atau API internal |
| Load more button | Tombol "Muat Lebih" | Gunakan Selenium untuk klik |
6. Selenium untuk Situs Dinamis
BeautifulSoup hanya bisa mem-parsing HTML yang sudah ada di response. Jika website menggunakan JavaScript untuk memuat data secara dinamis (AJAX, React, Vue, dll), kita perlu Selenium — library yang mengendalikan browser sungguhan.
Kapan Menggunakan Selenium?
| Situasi | Pakai Apa? |
|---|---|
| HTML langsung dari server | ✅ BeautifulSoup + requests |
| Data dimuat via JavaScript/AJAX | ✅ Selenium |
| Perlu login / interaksi form | ✅ Selenium |
| Infinite scroll | ✅ Selenium |
| Ada CAPTCHA | ⚠️ Sulit — pertimbangkan alternatif |
| Data di API endpoint | ✅ requests langsung ke API |
"""
Web Scraping dengan Selenium
BeebaneLabs - https://beebanelabs.pages.dev
Instalasi:
pip install selenium
# Juga perlu WebDriver (Chrome/Firefox)
# Atau gunakan webdriver-manager:
pip install webdriver-manager
"""
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
import time
def setup_driver(headless=True):
"""Setup Chrome WebDriver."""
options = Options()
if headless:
options.add_argument("--headless") # Tanpa GUI
options.add_argument("--no-sandbox")
options.add_argument("--disable-dev-shm-usage")
options.add_argument("--window-size=1920,1080")
# User-Agent agar terlihat seperti browser normal
options.add_argument(
"user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"Chrome/120.0.0.0 Safari/537.36"
)
# Gunakan webdriver-manager untuk auto-download driver
# dari webdriver_manager.chrome import ChromeDriverManager
# service = Service(ChromeDriverManager().install())
# driver = webdriver.Chrome(service=service, options=options)
driver = webdriver.Chrome(options=options)
driver.implicitly_wait(10) # Tunggu elemen maks 10 detik
return driver
def scrape_dengan_selenium(url):
"""Scrape halaman yang dimuat oleh JavaScript."""
driver = setup_driver(headless=True)
try:
print(f"Membuka: {url}")
driver.get(url)
# Tunggu sampai elemen tertentu muncul
wait = WebDriverWait(driver, 15)
wait.until(
EC.presence_of_element_located((By.CLASS_NAME, "quote"))
)
# Setelah halaman selesai dimuat, ambil page source
page_source = driver.page_source
# Bisa diparse dengan BeautifulSoup untuk efisiensi
from bs4 import BeautifulSoup
soup = BeautifulSoup(page_source, "html.parser")
quotes = []
for div in soup.find_all("div", class_="quote"):
teks = div.find("span", class_="text").get_text()
penulis = div.find("small", class_="author").get_text()
quotes.append({"teks": teks, "penulis": penulis})
return quotes
finally:
driver.quit()
def scroll_infinite(url, max_scrolls=5):
"""Menangani infinite scroll."""
driver = setup_driver(headless=True)
try:
driver.get(url)
time.sleep(2)
for i in range(max_scrolls):
# Scroll ke bawah
driver.execute_script(
"window.scrollTo(0, document.body.scrollHeight);"
)
print(f"Scroll #{i+1}...")
time.sleep(2) # Tunggu konten baru dimuat
# Ambil semua konten setelah scrolling
page_source = driver.page_source
return page_source
finally:
driver.quit()
def login_dan_scrape(url_login, url_target, credentials):
"""Login ke website lalu scrape halaman terproteksi."""
driver = setup_driver(headless=True)
try:
# Buka halaman login
driver.get(url_login)
# Isi form login
username_input = driver.find_element(By.ID, "username")
password_input = driver.find_element(By.ID, "password")
username_input.send_keys(credentials["username"])
password_input.send_keys(credentials["password"])
# Klik tombol login
login_button = driver.find_element(By.CSS_SELECTOR, "button[type='submit']")
login_button.click()
# Tunggu redirect
time.sleep(3)
# Akses halaman target
driver.get(url_target)
# Scrape data
page_source = driver.page_source
return page_source
finally:
driver.quit()
# Contoh penggunaan (uncomment untuk menjalankan):
# quotes = scrape_dengan_selenium("http://quotes.toscrape.com/js/")
# for q in quotes:
# print(f'"{q["teks"]}" — {q["penulis"]}')
Selenium membutuhkan browser terinstal di sistem. Pastikan Chrome/Firefox sudah terpasang. Untuk deployment di server, gunakan mode headless=True dan pertimbangkan menggunakan Playwright sebagai alternatif modern yang lebih cepat.
7. Rate Limiting & Etika Teknis
Rate limiting adalah keharusan dalam web scraping. Mengirim terlalu banyak request dalam waktu singat bisa mengakibatkan IP Anda diblokir atau bahkan membebani server target.
"""
Rate Limiter untuk Web Scraping
BeebaneLabs - https://beebanelabs.pages.dev
"""
import time
import random
import requests
from functools import wraps
class RateLimiter:
"""Rate limiter dengan delay dan randomisasi."""
def __init__(self, min_delay=1.0, max_delay=3.0):
self.min_delay = min_delay
self.max_delay = max_delay
self.last_request_time = 0
def wait(self):
"""Menunggu sebelum request berikutnya."""
elapsed = time.time() - self.last_request_time
delay = random.uniform(self.min_delay, self.max_delay)
if elapsed < delay:
sleep_time = delay - elapsed
print(f" [Rate Limit] Menunggu {sleep_time:.1f}s...")
time.sleep(sleep_time)
self.last_request_time = time.time()
def with_retry(max_retries=3, backoff_factor=2):
"""Decorator untuk retry dengan exponential backoff."""
def decorator(func):
@wraps(func)
def wrapper(*args, **kwargs):
for attempt in range(max_retries):
try:
return func(*args, **kwargs)
except requests.RequestException as e:
if attempt < max_retries - 1:
wait_time = backoff_factor ** attempt
print(f" [Retry] Percobaan {attempt + 1} gagal: {e}")
print(f" [Retry] Menunggu {wait_time}s sebelum retry...")
time.sleep(wait_time)
else:
print(f" [Retry] Semua percobaan gagal: {e}")
raise
return wrapper
return decorator
class EticalScraper:
"""Scraper dengan fitur etika built-in."""
def __init__(self, base_url, min_delay=2, max_delay=5):
self.base_url = base_url
self.limiter = RateLimiter(min_delay, max_delay)
self.session = requests.Session()
self.session.headers.update({
"User-Agent": "BeebaneLabs-Scraper/1.0 (tutorial; +https://beebanelabs.pages.dev)"
})
self.request_count = 0
@with_retry(max_retries=3)
def get(self, url):
"""GET request dengan rate limiting dan retry."""
self.limiter.wait()
self.request_count += 1
print(f" [Request #{self.request_count}] {url}")
response = self.session.get(url, timeout=15)
# Cek rate limit response dari server
if response.status_code == 429:
retry_after = int(response.headers.get("Retry-After", 60))
print(f" [429 Too Many Requests] Menunggu {retry_after}s...")
time.sleep(retry_after)
raise requests.RequestException("Rate limited by server")
response.raise_for_status()
return response
def cek_robots_txt(self):
"""Cek robots.txt sebelum scraping."""
from urllib.parse import urljoin
robots_url = urljoin(self.base_url, "/robots.txt")
try:
resp = self.session.get(robots_url, timeout=10)
if resp.status_code == 200:
print("=== robots.txt ===")
print(resp.text[:500])
print("...")
except requests.RequestException:
print("Tidak bisa mengakses robots.txt")
# --- Penggunaan ---
# scraper = EticalScraper("http://quotes.toscrape.com", min_delay=2, max_delay=4)
# scraper.cek_robots_txt()
#
# for page in range(1, 4):
# url = f"http://quotes.toscrape.com/page/{page}/"
# response = scraper.get(url)
# print(f" Status: {response.status_code}")
Best Practices Rate Limiting
| Praktik | Rekomendasi |
|---|---|
| Delay minimum | 1-3 detik antar request (makin lama makin baik) |
| Randomisasi delay | Gunakan delay acak (misal 2-5s) agar tidak terdeteksi bot |
| Retry with backoff | Jika gagal, tunggu 2s → 4s → 8s sebelum retry |
| Hormati 429 | Jika server membalas 429, tunggu sesuai header Retry-After |
| Session reuse | Gunakan requests.Session() untuk reuse koneksi |
| User-Agent jelas | Identifikasi scraper Anda agar admin bisa menghubungi |
8. Menyimpan Hasil Scraping
Data yang sudah di-scrape perlu disimpan dalam format yang bisa digunakan. Berikut beberapa opsi penyimpanan yang umum.
"""
Menyimpan Hasil Scraping ke Berbagai Format
BeebaneLabs - https://beebanelabs.pages.dev
"""
import json
import csv
import sqlite3
from datetime import datetime
# === Contoh data hasil scraping ===
data_scraping = [
{"judul": "Belajar Python", "harga": "£25.99", "rating": 4, "stok": "Tersedia"},
{"judul": "Web Scraping 101", "harga": "£19.99", "rating": 5, "stok": "Tersedia"},
{"judul": "Data Science Guide", "harga": "£32.50", "rating": 3, "stok": "Habis"},
{"judul": "IoT Programming", "harga": "£28.00", "rating": 4, "stok": "Tersedia"},
]
# === 1. Simpan ke JSON ===
def simpan_json(data, filename):
"""Menyimpan data ke file JSON."""
output = {
"metadata": {
"scraped_at": datetime.now().isoformat(),
"total_items": len(data),
"source": "books.toscrape.com"
},
"data": data
}
with open(filename, "w", encoding="utf-8") as f:
json.dump(output, f, indent=2, ensure_ascii=False)
print(f"[JSON] Disimpan ke {filename} ({len(data)} item)")
# === 2. Simpan ke CSV ===
def simpan_csv(data, filename):
"""Menyimpan data ke file CSV."""
if not data:
return
headers = data[0].keys()
with open(filename, "w", newline="", encoding="utf-8") as f:
writer = csv.DictWriter(f, fieldnames=headers)
writer.writeheader()
writer.writerows(data)
print(f"[CSV] Disimpan ke {filename} ({len(data)} baris)")
# === 3. Simpan ke SQLite ===
def simpan_sqlite(data, db_path):
"""Menyimpan data ke database SQLite."""
conn = sqlite3.connect(db_path)
cursor = conn.cursor()
# Buat tabel
cursor.execute("""
CREATE TABLE IF NOT EXISTS hasil_scraping (
id INTEGER PRIMARY KEY AUTOINCREMENT,
judul TEXT,
harga TEXT,
rating INTEGER,
stok TEXT,
scraped_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
)
""")
# Insert data
for item in data:
cursor.execute(
"INSERT INTO hasil_scraping (judul, harga, rating, stok) VALUES (?, ?, ?, ?)",
(item["judul"], item["harga"], item["rating"], item["stok"])
)
conn.commit()
conn.close()
print(f"[SQLite] Disimpan ke {db_path} ({len(data)} baris)")
# === Simpan ke semua format ===
print("=== Menyimpan Hasil Scraping ===\n")
simpan_json(data_scraping, "hasil_scraping.json")
simpan_csv(data_scraping, "hasil_scraping.csv")
simpan_sqlite(data_scraping, "hasil_scraping.db")
# === Baca kembali dari JSON ===
print("\n=== Membaca dari JSON ===")
with open("hasil_scraping.json", "r", encoding="utf-8") as f:
loaded = json.load(f)
print(f"Metadata: {loaded['metadata']}")
for item in loaded["data"][:3]:
print(f" {item['judul']} — {item['harga']}")
print("\nSelesai! ✅")
Perbandingan Format Penyimpanan
| Format | Keunggulan | Cocok Untuk |
|---|---|---|
| JSON | Fleksibel, mendukung nested data, mudah dibaca | Data semi-struktural, API, konfigurasi |
| CSV | Ringan, bisa dibuka di Excel, mudah diimpor | Data tabular, analisis sederhana |
| SQLite | Query cepat, relasi antar tabel, single file | Data besar, perlu pencarian/filter |
| Excel | Populer di bisnis, mendukung grafik | Laporan, presentasi |
9. Quiz: Uji Pemahamanmu!
Setelah membaca tutorial di atas, jawablah 5 pertanyaan berikut untuk menguji pemahamanmu tentang web scraping dengan Python: