Kembali ke Blog
Tutorial#python#web scraping#playwright#automation#tutorial

Web Scraping Modern dengan Python dan Playwright di 2026

Selenium sudah terlalu tua. Playwright adalah tools scraping modern yang lebih cepat, lebih stabil, dan bisa handle JavaScript-heavy sites. Ini panduan lengkap dari setup sampai production.

Muhamad Putra Aulia Hidayat

Muhamad Putra Aulia Hidayat

20 Maret 20263 menit baca

Web Scraping Modern dengan Python Playwright

Web scraping di 2026 sudah lebih kompleks — hampir semua website pakai JavaScript, banyak yang punya anti-bot protection. Playwright adalah jawaban untuk semua tantangan ini.

Kenapa Playwright, Bukan Selenium?

FiturPlaywrightSelenium
Speed2-3x lebih cepatLambat
StabilitySangat stabilSering flaky
Async supportNativeButuh wrapper
Auto-waitYa, otomatisManual
Multiple browsersChromium, Firefox, WebKitChromium, Firefox
Screenshot/PDFBuilt-inPlugin

Setup

uv init scraper
uv add playwright
uv run playwright install chromium

Scraping Dasar

import asyncio
from playwright.async_api import async_playwright

async def scrape_tokopedia(keyword: str) -> list[dict]:
    async with async_playwright() as p:
        browser = await p.chromium.launch(
            headless=True,  # True untuk production
            args=["--no-sandbox"]
        )
        
        page = await browser.new_page()
        
        # Block gambar dan font untuk lebih cepat
        await page.route("**/*.{png,jpg,jpeg,gif,webp,svg,woff,woff2}", 
                        lambda route: route.abort())
        
        await page.goto(f"https://www.tokopedia.com/search?st=product&q={keyword}")
        
        # Tunggu produk loaded
        await page.wait_for_selector("[data-testid=master-product-card]", timeout=10000)
        
        products = await page.evaluate("""
            () => {
                const cards = document.querySelectorAll("[data-testid=master-product-card]")
                return Array.from(cards).map(card => ({
                    name: card.querySelector("[data-testid=linkProductName]")?.innerText,
                    price: card.querySelector("[data-testid=linkProductPrice]")?.innerText,
                    shop: card.querySelector("[data-testid=linkProductShopName]")?.innerText,
                    rating: card.querySelector("[data-testid=lblRating]")?.innerText,
                }))
            }
        """)
        
        await browser.close()
        return products

result = asyncio.run(scrape_tokopedia("laptop gaming"))
print(f"Ditemukan {len(result)} produk")

Handle Anti-Bot Protection

from playwright.async_api import async_playwright
import random

async def get_stealth_browser():
    p = await async_playwright().start()
    
    browser = await p.chromium.launch(
        headless=True,
        args=[
            "--no-sandbox",
            "--disable-blink-features=AutomationControlled",
            "--disable-infobars",
        ]
    )
    
    context = await browser.new_context(
        viewport={"width": 1920, "height": 1080},
        user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36",
        locale="id-ID",
        timezone_id="Asia/Jakarta",
    )
    
    # Inject script untuk sembunyikan otomasi
    await context.add_init_script("""
        Object.defineProperty(navigator, 'webdriver', { get: () => undefined })
    """)
    
    return browser, context

async def human_scroll(page, scrolls: int = 3):
    """Scroll seperti manusia"""
    for _ in range(scrolls):
        await page.mouse.wheel(0, random.randint(300, 700))
        await page.wait_for_timeout(random.randint(500, 1500))

Scraping dengan Login

async def scrape_with_auth(email: str, password: str):
    async with async_playwright() as p:
        browser = await p.chromium.launch(headless=False)
        
        # Simpan session agar tidak perlu login ulang
        context = await browser.new_context(
            storage_state="auth_state.json" if Path("auth_state.json").exists() else None
        )
        
        page = await context.new_page()
        await page.goto("https://example.com/login")
        
        # Cek apakah sudah login
        if await page.locator(".dashboard").count() == 0:
            await page.fill("#email", email)
            await page.fill("#password", password)
            await page.click("[type=submit]")
            await page.wait_for_url("**/dashboard")
            
            # Simpan session
            await context.storage_state(path="auth_state.json")
        
        # Lanjut scraping...
        data = await page.evaluate("() => window.__PRELOADED_STATE__")
        return data

Concurrent Scraping

async def scrape_multiple_pages(urls: list[str]) -> list[dict]:
    async with async_playwright() as p:
        browser = await p.chromium.launch(headless=True)
        
        # Buka maksimal 5 halaman serentak
        semaphore = asyncio.Semaphore(5)
        
        async def scrape_one(url: str):
            async with semaphore:
                page = await browser.new_page()
                try:
                    await page.goto(url, timeout=30000)
                    data = await extract_data(page)
                    return data
                finally:
                    await page.close()
        
        results = await asyncio.gather(*[scrape_one(url) for url in urls])
        await browser.close()
        return results

Legal dan Etika Scraping

Sebelum scraping, cek:

  1. robots.txtyourtarget.com/robots.txt
  2. Terms of Service — cari kata "scraping", "crawling", "automated"
  3. Rate limit — jangan hammer server, tambah delay antar request
  4. Data sensitif — jangan scrape data personal tanpa izin

Scraping untuk riset pasar, price monitoring, dan business intelligence umumnya acceptable asal tidak berlebihan.

Butuh script scraping custom untuk bisnis Anda? Hubungi kami.

pythonweb scrapingplaywrightautomationtutorial

Newsletter Digital Uptime

Tips teknologi & bisnis mingguan

Bergabung dengan 2,500+ subscriber yang mendapatkan insight teknologi, tutorial development, dan tips bisnis digital langsung ke inbox mereka setiap minggu.

Tidak ada spam. Unsubscribe kapan saja.

Artikel Terkait

Kami menggunakan cookies untuk meningkatkan pengalaman Anda di website ini. Dengan melanjutkan, Anda menyetujui penggunaan cookies sesuai Kebijakan Privasi kami.