Kami menggunakan cookies untuk meningkatkan pengalaman Anda di website ini. Dengan melanjutkan, Anda menyetujui penggunaan cookies sesuai Kebijakan Privasi kami.
Selenium sudah terlalu tua. Playwright adalah tools scraping modern yang lebih cepat, lebih stabil, dan bisa handle JavaScript-heavy sites. Ini panduan lengkap dari setup sampai production.
Muhamad Putra Aulia Hidayat
Web scraping di 2026 sudah lebih kompleks — hampir semua website pakai JavaScript, banyak yang punya anti-bot protection. Playwright adalah jawaban untuk semua tantangan ini.
| Fitur | Playwright | Selenium |
|---|---|---|
| Speed | 2-3x lebih cepat | Lambat |
| Stability | Sangat stabil | Sering flaky |
| Async support | Native | Butuh wrapper |
| Auto-wait | Ya, otomatis | Manual |
| Multiple browsers | Chromium, Firefox, WebKit | Chromium, Firefox |
| Screenshot/PDF | Built-in | Plugin |
uv init scraper
uv add playwright
uv run playwright install chromium
import asyncio
from playwright.async_api import async_playwright
async def scrape_tokopedia(keyword: str) -> list[dict]:
async with async_playwright() as p:
browser = await p.chromium.launch(
headless=True, # True untuk production
args=["--no-sandbox"]
)
page = await browser.new_page()
# Block gambar dan font untuk lebih cepat
await page.route("**/*.{png,jpg,jpeg,gif,webp,svg,woff,woff2}",
lambda route: route.abort())
await page.goto(f"https://www.tokopedia.com/search?st=product&q={keyword}")
# Tunggu produk loaded
await page.wait_for_selector("[data-testid=master-product-card]", timeout=10000)
products = await page.evaluate("""
() => {
const cards = document.querySelectorAll("[data-testid=master-product-card]")
return Array.from(cards).map(card => ({
name: card.querySelector("[data-testid=linkProductName]")?.innerText,
price: card.querySelector("[data-testid=linkProductPrice]")?.innerText,
shop: card.querySelector("[data-testid=linkProductShopName]")?.innerText,
rating: card.querySelector("[data-testid=lblRating]")?.innerText,
}))
}
""")
await browser.close()
return products
result = asyncio.run(scrape_tokopedia("laptop gaming"))
print(f"Ditemukan {len(result)} produk")
from playwright.async_api import async_playwright
import random
async def get_stealth_browser():
p = await async_playwright().start()
browser = await p.chromium.launch(
headless=True,
args=[
"--no-sandbox",
"--disable-blink-features=AutomationControlled",
"--disable-infobars",
]
)
context = await browser.new_context(
viewport={"width": 1920, "height": 1080},
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36",
locale="id-ID",
timezone_id="Asia/Jakarta",
)
# Inject script untuk sembunyikan otomasi
await context.add_init_script("""
Object.defineProperty(navigator, 'webdriver', { get: () => undefined })
""")
return browser, context
async def human_scroll(page, scrolls: int = 3):
"""Scroll seperti manusia"""
for _ in range(scrolls):
await page.mouse.wheel(0, random.randint(300, 700))
await page.wait_for_timeout(random.randint(500, 1500))
async def scrape_with_auth(email: str, password: str):
async with async_playwright() as p:
browser = await p.chromium.launch(headless=False)
# Simpan session agar tidak perlu login ulang
context = await browser.new_context(
storage_state="auth_state.json" if Path("auth_state.json").exists() else None
)
page = await context.new_page()
await page.goto("https://example.com/login")
# Cek apakah sudah login
if await page.locator(".dashboard").count() == 0:
await page.fill("#email", email)
await page.fill("#password", password)
await page.click("[type=submit]")
await page.wait_for_url("**/dashboard")
# Simpan session
await context.storage_state(path="auth_state.json")
# Lanjut scraping...
data = await page.evaluate("() => window.__PRELOADED_STATE__")
return data
async def scrape_multiple_pages(urls: list[str]) -> list[dict]:
async with async_playwright() as p:
browser = await p.chromium.launch(headless=True)
# Buka maksimal 5 halaman serentak
semaphore = asyncio.Semaphore(5)
async def scrape_one(url: str):
async with semaphore:
page = await browser.new_page()
try:
await page.goto(url, timeout=30000)
data = await extract_data(page)
return data
finally:
await page.close()
results = await asyncio.gather(*[scrape_one(url) for url in urls])
await browser.close()
return results
Sebelum scraping, cek:
yourtarget.com/robots.txtScraping untuk riset pasar, price monitoring, dan business intelligence umumnya acceptable asal tidak berlebihan.
Butuh script scraping custom untuk bisnis Anda? Hubungi kami.
Tips teknologi & bisnis mingguan
Bergabung dengan 2,500+ subscriber yang mendapatkan insight teknologi, tutorial development, dan tips bisnis digital langsung ke inbox mereka setiap minggu.
Dapatkan tips & insight teknologi terbaru langsung ke inbox Anda.
© 2026 PT Digital Uptime Teknologi Informasi. Hak cipta dilindungi.