New skill - go-rod-master. Browser automation with Golang (#83)
* New skill - go-rod-master. Pretty big skill for browser automation with go and go-rod. * chore: sync generated registry files --------- Co-authored-by: 8hoursking <user@MacBook-Pro-user.local>
This commit is contained in:
41
skills/go-rod-master/examples/basic_scrape.go
Normal file
41
skills/go-rod-master/examples/basic_scrape.go
Normal file
@@ -0,0 +1,41 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/go-rod/rod"
|
||||
"github.com/go-rod/rod/lib/input"
|
||||
)
|
||||
|
||||
// basic_scrape demonstrates a minimal go-rod scraping workflow:
|
||||
// Launch browser → navigate → extract text → close.
|
||||
func main() {
|
||||
// Launch and connect to a new browser instance.
|
||||
// Rod auto-downloads Chromium if not present.
|
||||
browser := rod.New().
|
||||
Timeout(time.Minute). // global timeout for the browser
|
||||
MustConnect()
|
||||
defer browser.MustClose()
|
||||
|
||||
// Navigate to the target page and wait for it to stabilize
|
||||
page := browser.MustPage("https://github.com").MustWaitStable()
|
||||
|
||||
// Extract the page title via JavaScript evaluation
|
||||
title := page.MustElement("title").MustEval(`() => this.innerText`).String()
|
||||
fmt.Println("Page title:", title)
|
||||
|
||||
// Use CSS selector to find elements
|
||||
links := page.MustElements("a[href]")
|
||||
fmt.Printf("Found %d links on the page\n", len(links))
|
||||
|
||||
// Use keyboard shortcut to trigger search
|
||||
page.Keyboard.MustType(input.Slash)
|
||||
|
||||
// Type into the search input and press Enter
|
||||
page.MustElement("#query-builder-test").MustInput("go-rod").MustType(input.Enter)
|
||||
|
||||
// Wait for results — MustElementR matches by CSS selector + text regex
|
||||
result := page.MustElementR("span", "DevTools Protocol").MustText()
|
||||
fmt.Println("Found result:", result)
|
||||
}
|
||||
81
skills/go-rod-master/examples/concurrent_pages.go
Normal file
81
skills/go-rod-master/examples/concurrent_pages.go
Normal file
@@ -0,0 +1,81 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/go-rod/rod"
|
||||
"github.com/go-rod/stealth"
|
||||
)
|
||||
|
||||
// concurrent_pages demonstrates using rod.PagePool for concurrent scraping
|
||||
// with stealth-enabled pages.
|
||||
func main() {
|
||||
browser := rod.New().
|
||||
Timeout(2 * time.Minute).
|
||||
MustConnect()
|
||||
defer browser.MustClose()
|
||||
|
||||
// URLs to scrape concurrently
|
||||
urls := []string{
|
||||
"https://example.com",
|
||||
"https://example.org",
|
||||
"https://www.iana.org/domains/reserved",
|
||||
"https://www.iana.org/about",
|
||||
}
|
||||
|
||||
// Create a page pool with max 3 concurrent pages
|
||||
pool := rod.NewPagePool(3)
|
||||
|
||||
// Factory function: creates stealth-enabled pages in isolated incognito contexts
|
||||
create := func() *rod.Page {
|
||||
// MustIncognito creates an isolated browser context (separate cookies, storage)
|
||||
page := stealth.MustPage(browser.MustIncognito())
|
||||
return page
|
||||
}
|
||||
|
||||
// Collect results safely using a mutex
|
||||
var mu sync.Mutex
|
||||
results := make(map[string]string)
|
||||
|
||||
// Scrape all URLs concurrently
|
||||
var wg sync.WaitGroup
|
||||
for _, url := range urls {
|
||||
wg.Add(1)
|
||||
go func(u string) {
|
||||
defer wg.Done()
|
||||
|
||||
// Get a page from the pool (blocks if pool is full)
|
||||
page := pool.MustGet(create)
|
||||
defer pool.Put(page) // return page to pool when done
|
||||
|
||||
// Navigate and wait for the page to stabilize
|
||||
page.MustNavigate(u).MustWaitStable()
|
||||
|
||||
// Extract the page title
|
||||
title := page.MustInfo().Title
|
||||
|
||||
// Store result
|
||||
mu.Lock()
|
||||
results[u] = title
|
||||
mu.Unlock()
|
||||
|
||||
fmt.Printf("[done] %s → %s\n", u, title)
|
||||
}(url)
|
||||
}
|
||||
|
||||
// Wait for all goroutines to complete
|
||||
wg.Wait()
|
||||
|
||||
// Clean up the pool
|
||||
pool.Cleanup(func(p *rod.Page) {
|
||||
p.MustClose()
|
||||
})
|
||||
|
||||
// Print summary
|
||||
fmt.Printf("\n--- Results (%d pages scraped) ---\n", len(results))
|
||||
for url, title := range results {
|
||||
fmt.Printf(" %s: %s\n", url, title)
|
||||
}
|
||||
}
|
||||
85
skills/go-rod-master/examples/request_hijacking.go
Normal file
85
skills/go-rod-master/examples/request_hijacking.go
Normal file
@@ -0,0 +1,85 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net/http"
|
||||
"time"
|
||||
|
||||
"github.com/go-rod/rod"
|
||||
"github.com/go-rod/rod/lib/proto"
|
||||
"github.com/go-rod/stealth"
|
||||
)
|
||||
|
||||
// request_hijacking demonstrates intercepting and modifying network requests
|
||||
// using Rod's HijackRequests API.
|
||||
func main() {
|
||||
browser := rod.New().
|
||||
Timeout(time.Minute).
|
||||
MustConnect()
|
||||
defer browser.MustClose()
|
||||
|
||||
// --- Example 1: Block image requests to save bandwidth ---
|
||||
router := browser.HijackRequests()
|
||||
defer router.MustStop()
|
||||
|
||||
// Block all PNG and JPEG image requests
|
||||
router.MustAdd("*.png", func(ctx *rod.Hijack) {
|
||||
ctx.Response.Fail(proto.NetworkErrorReasonBlockedByClient)
|
||||
})
|
||||
router.MustAdd("*.jpg", func(ctx *rod.Hijack) {
|
||||
ctx.Response.Fail(proto.NetworkErrorReasonBlockedByClient)
|
||||
})
|
||||
|
||||
// Modify request headers for API calls
|
||||
router.MustAdd("*api.*", func(ctx *rod.Hijack) {
|
||||
ctx.Request.Req().Header.Set("X-Custom-Header", "go-rod")
|
||||
ctx.Request.Req().Header.Set("Authorization", "Bearer my-token")
|
||||
|
||||
// Load the actual response from the server
|
||||
if err := ctx.LoadResponse(http.DefaultClient, true); err != nil {
|
||||
fmt.Printf("Failed to load response: %v\n", err)
|
||||
return
|
||||
}
|
||||
|
||||
fmt.Printf("API response status: %d\n", ctx.Response.Payload().ResponseCode)
|
||||
})
|
||||
|
||||
// Inject JavaScript into every JS file loaded
|
||||
router.MustAdd("*.js", func(ctx *rod.Hijack) {
|
||||
if err := ctx.LoadResponse(http.DefaultClient, true); err != nil {
|
||||
return
|
||||
}
|
||||
// Append tracking code to all JavaScript files
|
||||
body := ctx.Response.Body()
|
||||
ctx.Response.SetBody(body + "\n// Monitored by go-rod")
|
||||
})
|
||||
|
||||
// IMPORTANT: Start the router in a goroutine
|
||||
go router.Run()
|
||||
|
||||
// Use stealth page for anti-detection
|
||||
page := stealth.MustPage(browser)
|
||||
page.MustNavigate("https://example.com").MustWaitLoad()
|
||||
|
||||
fmt.Println("Page loaded with request hijacking active")
|
||||
fmt.Println("Title:", page.MustElement("title").MustText())
|
||||
|
||||
// --- Example 2: Capture and log all network requests ---
|
||||
// (Using a separate page to show different patterns)
|
||||
page2 := stealth.MustPage(browser)
|
||||
|
||||
// Enable network domain for request logging
|
||||
proto.NetworkEnable{}.Call(page2)
|
||||
|
||||
// Listen for network responses
|
||||
go page2.EachEvent(func(e *proto.NetworkResponseReceived) {
|
||||
fmt.Printf(" [%d] %s %s\n",
|
||||
e.Response.Status,
|
||||
e.Type.String(),
|
||||
e.Response.URL,
|
||||
)
|
||||
})()
|
||||
|
||||
page2.MustNavigate("https://example.com").MustWaitLoad()
|
||||
fmt.Println("\nNetwork log above shows all requests captured")
|
||||
}
|
||||
91
skills/go-rod-master/examples/stealth_page.go
Normal file
91
skills/go-rod-master/examples/stealth_page.go
Normal file
@@ -0,0 +1,91 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/go-rod/rod"
|
||||
"github.com/go-rod/rod/lib/launcher"
|
||||
"github.com/go-rod/rod/lib/utils"
|
||||
"github.com/go-rod/stealth"
|
||||
)
|
||||
|
||||
// stealth_page demonstrates using go-rod/stealth to bypass bot detection.
|
||||
// It creates a stealth-enabled page and verifies evasions against a detection site.
|
||||
func main() {
|
||||
// Ensure the browser binary is downloaded
|
||||
launcher.NewBrowser().MustGet()
|
||||
|
||||
// Launch browser with custom launcher settings
|
||||
url := launcher.New().
|
||||
Headless(true).
|
||||
MustLaunch()
|
||||
|
||||
browser := rod.New().
|
||||
ControlURL(url).
|
||||
Timeout(time.Minute).
|
||||
MustConnect()
|
||||
defer browser.MustClose()
|
||||
|
||||
// CRITICAL: Use stealth.MustPage instead of browser.MustPage
|
||||
// This injects anti-detection JavaScript into every new document
|
||||
page := stealth.MustPage(browser)
|
||||
|
||||
// Navigate to a bot detection test page
|
||||
page.MustNavigate("https://bot.sannysoft.com")
|
||||
|
||||
// Wait for the detection tests to complete
|
||||
page.MustElement("#broken-image-dimensions.passed")
|
||||
|
||||
// Take a screenshot to verify results
|
||||
page.MustScreenshot("stealth_result.png")
|
||||
fmt.Println("Screenshot saved to stealth_result.png")
|
||||
|
||||
// Print detection results
|
||||
printBotDetectionReport(page)
|
||||
|
||||
// ---- Advanced: Using stealth.JS directly ----
|
||||
// If you need to create the page manually (e.g., with specific context),
|
||||
// you can inject stealth.JS via EvalOnNewDocument:
|
||||
advancedPage := browser.MustPage()
|
||||
advancedPage.MustEvalOnNewDocument(stealth.JS)
|
||||
advancedPage.MustNavigate("https://bot.sannysoft.com")
|
||||
advancedPage.MustElement("#broken-image-dimensions.passed")
|
||||
fmt.Println("\nAdvanced stealth page also passed detection tests")
|
||||
|
||||
// ---- Production: Error handling pattern ----
|
||||
prodPage, err := stealth.Page(browser)
|
||||
if err != nil {
|
||||
fmt.Printf("Failed to create stealth page: %v\n", err)
|
||||
return
|
||||
}
|
||||
prodPage.MustNavigate("https://example.com")
|
||||
title, err := prodPage.MustElement("title").Text()
|
||||
if err != nil {
|
||||
fmt.Printf("Failed to get title: %v\n", err)
|
||||
return
|
||||
}
|
||||
fmt.Printf("\nProduction page title: %s\n", title)
|
||||
}
|
||||
|
||||
// printBotDetectionReport extracts and prints the detection test results.
|
||||
func printBotDetectionReport(page *rod.Page) {
|
||||
el := page.MustElement("#broken-image-dimensions.passed")
|
||||
for _, row := range el.MustParents("table").First().MustElements("tr:nth-child(n+2)") {
|
||||
cells := row.MustElements("td")
|
||||
key := cells[0].MustProperty("textContent")
|
||||
|
||||
if strings.HasPrefix(key.String(), "User Agent") {
|
||||
ua := cells[1].MustProperty("textContent").String()
|
||||
passed := !strings.Contains(ua, "HeadlessChrome/")
|
||||
fmt.Printf(" %s: %t\n", key, passed)
|
||||
} else if strings.HasPrefix(key.String(), "Hairline Feature") {
|
||||
continue // machine-dependent, skip
|
||||
} else {
|
||||
fmt.Printf(" %s: %s\n", key, cells[1].MustProperty("textContent"))
|
||||
}
|
||||
}
|
||||
|
||||
_ = utils.OutputFile("stealth_result.png", []byte{})
|
||||
}
|
||||
Reference in New Issue
Block a user