Web Scraping Modern dengan Python dan Playwright di 2026
Selenium sudah terlalu tua. Playwright adalah tools scraping modern yang lebih cepat, lebih stabil, dan bisa handle JavaScript-heavy sites. Ini panduan lengkap dari setup sampai production.
Muhamad Putra Aulia Hidayat
Web Scraping Modern dengan Python Playwright
Web scraping di 2026 sudah lebih kompleks — hampir semua website pakai JavaScript, banyak yang punya anti-bot protection. Playwright adalah jawaban untuk semua tantangan ini.
Kenapa Playwright, Bukan Selenium?
| Fitur | Playwright | Selenium |
|---|---|---|
| Speed | 2-3x lebih cepat | Lambat |
| Stability | Sangat stabil | Sering flaky |
| Async support | Native | Butuh wrapper |
| Auto-wait | Ya, otomatis | Manual |
| Multiple browsers | Chromium, Firefox, WebKit | Chromium, Firefox |
| Screenshot/PDF | Built-in | Plugin |
Setup
uv init scraper
uv add playwright
uv run playwright install chromium
Scraping Dasar
import asyncio
from playwright.async_api import async_playwright
async def scrape_tokopedia(keyword: str) -> list[dict]:
async with async_playwright() as p:
browser = await p.chromium.launch(
headless=True, # True untuk production
args=["--no-sandbox"]
)
page = await browser.new_page()
# Block gambar dan font untuk lebih cepat
await page.route("**/*.{png,jpg,jpeg,gif,webp,svg,woff,woff2}",
lambda route: route.abort())
await page.goto(f"https://www.tokopedia.com/search?st=product&q={keyword}")
# Tunggu produk loaded
await page.wait_for_selector("[data-testid=master-product-card]", timeout=10000)
products = await page.evaluate("""
() => {
const cards = document.querySelectorAll("[data-testid=master-product-card]")
return Array.from(cards).map(card => ({
name: card.querySelector("[data-testid=linkProductName]")?.innerText,
price: card.querySelector("[data-testid=linkProductPrice]")?.innerText,
shop: card.querySelector("[data-testid=linkProductShopName]")?.innerText,
rating: card.querySelector("[data-testid=lblRating]")?.innerText,
}))
}
""")
await browser.close()
return products
result = asyncio.run(scrape_tokopedia("laptop gaming"))
print(f"Ditemukan {len(result)} produk")
Handle Anti-Bot Protection
from playwright.async_api import async_playwright
import random
async def get_stealth_browser():
p = await async_playwright().start()
browser = await p.chromium.launch(
headless=True,
args=[
"--no-sandbox",
"--disable-blink-features=AutomationControlled",
"--disable-infobars",
]
)
context = await browser.new_context(
viewport={"width": 1920, "height": 1080},
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36",
locale="id-ID",
timezone_id="Asia/Jakarta",
)
# Inject script untuk sembunyikan otomasi
await context.add_init_script("""
Object.defineProperty(navigator, 'webdriver', { get: () => undefined })
""")
return browser, context
async def human_scroll(page, scrolls: int = 3):
"""Scroll seperti manusia"""
for _ in range(scrolls):
await page.mouse.wheel(0, random.randint(300, 700))
await page.wait_for_timeout(random.randint(500, 1500))
Scraping dengan Login
async def scrape_with_auth(email: str, password: str):
async with async_playwright() as p:
browser = await p.chromium.launch(headless=False)
# Simpan session agar tidak perlu login ulang
context = await browser.new_context(
storage_state="auth_state.json" if Path("auth_state.json").exists() else None
)
page = await context.new_page()
await page.goto("https://example.com/login")
# Cek apakah sudah login
if await page.locator(".dashboard").count() == 0:
await page.fill("#email", email)
await page.fill("#password", password)
await page.click("[type=submit]")
await page.wait_for_url("**/dashboard")
# Simpan session
await context.storage_state(path="auth_state.json")
# Lanjut scraping...
data = await page.evaluate("() => window.__PRELOADED_STATE__")
return data
Concurrent Scraping
async def scrape_multiple_pages(urls: list[str]) -> list[dict]:
async with async_playwright() as p:
browser = await p.chromium.launch(headless=True)
# Buka maksimal 5 halaman serentak
semaphore = asyncio.Semaphore(5)
async def scrape_one(url: str):
async with semaphore:
page = await browser.new_page()
try:
await page.goto(url, timeout=30000)
data = await extract_data(page)
return data
finally:
await page.close()
results = await asyncio.gather(*[scrape_one(url) for url in urls])
await browser.close()
return results
Legal dan Etika Scraping
Sebelum scraping, cek:
- robots.txt —
yourtarget.com/robots.txt - Terms of Service — cari kata "scraping", "crawling", "automated"
- Rate limit — jangan hammer server, tambah delay antar request
- Data sensitif — jangan scrape data personal tanpa izin
Scraping untuk riset pasar, price monitoring, dan business intelligence umumnya acceptable asal tidak berlebihan.
Butuh script scraping custom untuk bisnis Anda? Hubungi kami.
Newsletter Digital Uptime
Tips teknologi & bisnis mingguan
Bergabung dengan 2,500+ subscriber yang mendapatkan insight teknologi, tutorial development, dan tips bisnis digital langsung ke inbox mereka setiap minggu.