New skill - go-rod-master. Browser automation with Golang (#83)

* New skill - go-rod-master. Pretty big skill for browser automation with go and go-rod.

* chore: sync generated registry files

---------

Co-authored-by: 8hoursking <user@MacBook-Pro-user.local>
This commit is contained in:
8hoursking
2026-02-16 09:20:43 +03:00
committed by GitHub
parent 2382b7439c
commit 37349607ae
9 changed files with 1021 additions and 3 deletions

View File

@@ -0,0 +1,41 @@
package main
import (
"fmt"
"time"
"github.com/go-rod/rod"
"github.com/go-rod/rod/lib/input"
)
// basic_scrape demonstrates a minimal go-rod scraping workflow:
// Launch browser → navigate → extract text → close.
func main() {
// Launch and connect to a new browser instance.
// Rod auto-downloads Chromium if not present.
browser := rod.New().
Timeout(time.Minute). // global timeout for the browser
MustConnect()
defer browser.MustClose()
// Navigate to the target page and wait for it to stabilize
page := browser.MustPage("https://github.com").MustWaitStable()
// Extract the page title via JavaScript evaluation
title := page.MustElement("title").MustEval(`() => this.innerText`).String()
fmt.Println("Page title:", title)
// Use CSS selector to find elements
links := page.MustElements("a[href]")
fmt.Printf("Found %d links on the page\n", len(links))
// Use keyboard shortcut to trigger search
page.Keyboard.MustType(input.Slash)
// Type into the search input and press Enter
page.MustElement("#query-builder-test").MustInput("go-rod").MustType(input.Enter)
// Wait for results — MustElementR matches by CSS selector + text regex
result := page.MustElementR("span", "DevTools Protocol").MustText()
fmt.Println("Found result:", result)
}

View File

@@ -0,0 +1,81 @@
package main
import (
"fmt"
"sync"
"time"
"github.com/go-rod/rod"
"github.com/go-rod/stealth"
)
// concurrent_pages demonstrates using rod.PagePool for concurrent scraping
// with stealth-enabled pages.
func main() {
browser := rod.New().
Timeout(2 * time.Minute).
MustConnect()
defer browser.MustClose()
// URLs to scrape concurrently
urls := []string{
"https://example.com",
"https://example.org",
"https://www.iana.org/domains/reserved",
"https://www.iana.org/about",
}
// Create a page pool with max 3 concurrent pages
pool := rod.NewPagePool(3)
// Factory function: creates stealth-enabled pages in isolated incognito contexts
create := func() *rod.Page {
// MustIncognito creates an isolated browser context (separate cookies, storage)
page := stealth.MustPage(browser.MustIncognito())
return page
}
// Collect results safely using a mutex
var mu sync.Mutex
results := make(map[string]string)
// Scrape all URLs concurrently
var wg sync.WaitGroup
for _, url := range urls {
wg.Add(1)
go func(u string) {
defer wg.Done()
// Get a page from the pool (blocks if pool is full)
page := pool.MustGet(create)
defer pool.Put(page) // return page to pool when done
// Navigate and wait for the page to stabilize
page.MustNavigate(u).MustWaitStable()
// Extract the page title
title := page.MustInfo().Title
// Store result
mu.Lock()
results[u] = title
mu.Unlock()
fmt.Printf("[done] %s → %s\n", u, title)
}(url)
}
// Wait for all goroutines to complete
wg.Wait()
// Clean up the pool
pool.Cleanup(func(p *rod.Page) {
p.MustClose()
})
// Print summary
fmt.Printf("\n--- Results (%d pages scraped) ---\n", len(results))
for url, title := range results {
fmt.Printf(" %s: %s\n", url, title)
}
}

View File

@@ -0,0 +1,85 @@
package main
import (
"fmt"
"net/http"
"time"
"github.com/go-rod/rod"
"github.com/go-rod/rod/lib/proto"
"github.com/go-rod/stealth"
)
// request_hijacking demonstrates intercepting and modifying network requests
// using Rod's HijackRequests API.
func main() {
browser := rod.New().
Timeout(time.Minute).
MustConnect()
defer browser.MustClose()
// --- Example 1: Block image requests to save bandwidth ---
router := browser.HijackRequests()
defer router.MustStop()
// Block all PNG and JPEG image requests
router.MustAdd("*.png", func(ctx *rod.Hijack) {
ctx.Response.Fail(proto.NetworkErrorReasonBlockedByClient)
})
router.MustAdd("*.jpg", func(ctx *rod.Hijack) {
ctx.Response.Fail(proto.NetworkErrorReasonBlockedByClient)
})
// Modify request headers for API calls
router.MustAdd("*api.*", func(ctx *rod.Hijack) {
ctx.Request.Req().Header.Set("X-Custom-Header", "go-rod")
ctx.Request.Req().Header.Set("Authorization", "Bearer my-token")
// Load the actual response from the server
if err := ctx.LoadResponse(http.DefaultClient, true); err != nil {
fmt.Printf("Failed to load response: %v\n", err)
return
}
fmt.Printf("API response status: %d\n", ctx.Response.Payload().ResponseCode)
})
// Inject JavaScript into every JS file loaded
router.MustAdd("*.js", func(ctx *rod.Hijack) {
if err := ctx.LoadResponse(http.DefaultClient, true); err != nil {
return
}
// Append tracking code to all JavaScript files
body := ctx.Response.Body()
ctx.Response.SetBody(body + "\n// Monitored by go-rod")
})
// IMPORTANT: Start the router in a goroutine
go router.Run()
// Use stealth page for anti-detection
page := stealth.MustPage(browser)
page.MustNavigate("https://example.com").MustWaitLoad()
fmt.Println("Page loaded with request hijacking active")
fmt.Println("Title:", page.MustElement("title").MustText())
// --- Example 2: Capture and log all network requests ---
// (Using a separate page to show different patterns)
page2 := stealth.MustPage(browser)
// Enable network domain for request logging
proto.NetworkEnable{}.Call(page2)
// Listen for network responses
go page2.EachEvent(func(e *proto.NetworkResponseReceived) {
fmt.Printf(" [%d] %s %s\n",
e.Response.Status,
e.Type.String(),
e.Response.URL,
)
})()
page2.MustNavigate("https://example.com").MustWaitLoad()
fmt.Println("\nNetwork log above shows all requests captured")
}

View File

@@ -0,0 +1,91 @@
package main
import (
"fmt"
"strings"
"time"
"github.com/go-rod/rod"
"github.com/go-rod/rod/lib/launcher"
"github.com/go-rod/rod/lib/utils"
"github.com/go-rod/stealth"
)
// stealth_page demonstrates using go-rod/stealth to bypass bot detection.
// It creates a stealth-enabled page and verifies evasions against a detection site.
func main() {
// Ensure the browser binary is downloaded
launcher.NewBrowser().MustGet()
// Launch browser with custom launcher settings
url := launcher.New().
Headless(true).
MustLaunch()
browser := rod.New().
ControlURL(url).
Timeout(time.Minute).
MustConnect()
defer browser.MustClose()
// CRITICAL: Use stealth.MustPage instead of browser.MustPage
// This injects anti-detection JavaScript into every new document
page := stealth.MustPage(browser)
// Navigate to a bot detection test page
page.MustNavigate("https://bot.sannysoft.com")
// Wait for the detection tests to complete
page.MustElement("#broken-image-dimensions.passed")
// Take a screenshot to verify results
page.MustScreenshot("stealth_result.png")
fmt.Println("Screenshot saved to stealth_result.png")
// Print detection results
printBotDetectionReport(page)
// ---- Advanced: Using stealth.JS directly ----
// If you need to create the page manually (e.g., with specific context),
// you can inject stealth.JS via EvalOnNewDocument:
advancedPage := browser.MustPage()
advancedPage.MustEvalOnNewDocument(stealth.JS)
advancedPage.MustNavigate("https://bot.sannysoft.com")
advancedPage.MustElement("#broken-image-dimensions.passed")
fmt.Println("\nAdvanced stealth page also passed detection tests")
// ---- Production: Error handling pattern ----
prodPage, err := stealth.Page(browser)
if err != nil {
fmt.Printf("Failed to create stealth page: %v\n", err)
return
}
prodPage.MustNavigate("https://example.com")
title, err := prodPage.MustElement("title").Text()
if err != nil {
fmt.Printf("Failed to get title: %v\n", err)
return
}
fmt.Printf("\nProduction page title: %s\n", title)
}
// printBotDetectionReport extracts and prints the detection test results.
func printBotDetectionReport(page *rod.Page) {
el := page.MustElement("#broken-image-dimensions.passed")
for _, row := range el.MustParents("table").First().MustElements("tr:nth-child(n+2)") {
cells := row.MustElements("td")
key := cells[0].MustProperty("textContent")
if strings.HasPrefix(key.String(), "User Agent") {
ua := cells[1].MustProperty("textContent").String()
passed := !strings.Contains(ua, "HeadlessChrome/")
fmt.Printf(" %s: %t\n", key, passed)
} else if strings.HasPrefix(key.String(), "Hairline Feature") {
continue // machine-dependent, skip
} else {
fmt.Printf(" %s: %s\n", key, cells[1].MustProperty("textContent"))
}
}
_ = utils.OutputFile("stealth_result.png", []byte{})
}