Compare commits

..

1 Commits

Author SHA1 Message Date
UncleCode
6029097114 feat: add VNC streaming support 2025-05-17 19:12:15 +08:00
6 changed files with 101 additions and 6 deletions

View File

@@ -43,9 +43,17 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
libjpeg-dev \
redis-server \
supervisor \
&& apt-get clean \
xvfb \
x11vnc \
fluxbox \
websockify \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
# Install noVNC for browser-based VNC access
RUN git clone --depth 1 https://github.com/novnc/noVNC /opt/novnc \
&& git clone --depth 1 https://github.com/novnc/websockify /opt/novnc/utils/websockify
RUN apt-get update && apt-get install -y --no-install-recommends \
libglib2.0-0 \
libnss3 \

View File

@@ -17,6 +17,7 @@
- [Screenshot Endpoint](#screenshot-endpoint)
- [PDF Export Endpoint](#pdf-export-endpoint)
- [JavaScript Execution Endpoint](#javascript-execution-endpoint)
- [Browser VNC Endpoint](#browser-vnc-endpoint)
- [Library Context Endpoint](#library-context-endpoint)
- [MCP (Model Context Protocol) Support](#mcp-model-context-protocol-support)
- [What is MCP?](#what-is-mcp)
@@ -377,6 +378,20 @@ Executes JavaScript snippets on the specified URL and returns the full crawl res
- `scripts`: List of JavaScript snippets to execute sequentially
### Browser VNC Endpoint
```
GET /vnc
```
Opens a browser-based VNC session for interacting with the container's desktop environment. Use `/vnc/url` to retrieve only the iframe URL.
```
GET /vnc/url
```
Returns a JSON object containing the URL of the embedded noVNC client.
---
## Dockerfile Parameters

View File

@@ -33,7 +33,11 @@ from schemas import (
)
from utils import (
FilterType, load_config, setup_logging, verify_email_domain
FilterType,
load_config,
setup_logging,
verify_email_domain,
get_base_url,
)
import os
import sys
@@ -48,7 +52,11 @@ from fastapi import (
)
from rank_bm25 import BM25Okapi
from fastapi.responses import (
StreamingResponse, RedirectResponse, PlainTextResponse, JSONResponse
StreamingResponse,
RedirectResponse,
PlainTextResponse,
JSONResponse,
HTMLResponse,
)
from fastapi.middleware.httpsredirect import HTTPSRedirectMiddleware
from fastapi.middleware.trustedhost import TrustedHostMiddleware
@@ -129,11 +137,31 @@ app.mount(
name="play",
)
# Serve noVNC static files if available
VNC_DIR = pathlib.Path("/opt/novnc")
if VNC_DIR.exists():
app.mount("/novnc", StaticFiles(directory=VNC_DIR, html=True), name="novnc")
@app.get("/")
async def root():
return RedirectResponse("/playground")
@app.get("/vnc")
async def vnc_page(request: Request):
"""Return a simple page embedding the noVNC client."""
url = f"{get_base_url(request)}/novnc/vnc.html?autoconnect=true&resize=scale"
html = f"<iframe src='{url}' width='1024' height='768' style='border:none'></iframe>"
return HTMLResponse(f"<html><body>{html}</body></html>")
@app.get("/vnc/url")
async def vnc_url(request: Request):
"""Return the direct URL to the noVNC client."""
url = f"{get_base_url(request)}/novnc/vnc.html?autoconnect=true&resize=scale"
return {"url": url}
# ─────────────────── infra / middleware ─────────────────────
redis = aioredis.from_url(config["redis"].get("uri", "redis://localhost"))

View File

@@ -20,9 +20,53 @@ user=appuser ; Run gunicorn as our non-root user
autorestart=true
priority=20
environment=PYTHONUNBUFFERED=1 ; Ensure Python output is sent straight to logs
environment=DISPLAY=:99
stdout_logfile=/dev/stdout ; Redirect gunicorn stdout to container stdout
stdout_logfile_maxbytes=0
stderr_logfile=/dev/stderr ; Redirect gunicorn stderr to container stderr
stderr_logfile_maxbytes=0
[program:xvfb]
command=/usr/bin/Xvfb :99 -screen 0 1280x720x24
user=appuser
autorestart=true
priority=5
stdout_logfile=/dev/stdout
stdout_logfile_maxbytes=0
stderr_logfile=/dev/stderr
stderr_logfile_maxbytes=0
[program:fluxbox]
command=/usr/bin/fluxbox
user=appuser
autorestart=true
priority=6
environment=DISPLAY=:99
stdout_logfile=/dev/stdout
stdout_logfile_maxbytes=0
stderr_logfile=/dev/stderr
stderr_logfile_maxbytes=0
[program:x11vnc]
command=/usr/bin/x11vnc -display :99 -nopw -forever -shared -rfbport 5900 -quiet
user=appuser
autorestart=true
priority=7
environment=DISPLAY=:99
stdout_logfile=/dev/stdout
stdout_logfile_maxbytes=0
stderr_logfile=/dev/stderr
stderr_logfile_maxbytes=0
[program:websockify]
command=/usr/bin/websockify 6080 localhost:5900 --web /opt/novnc
user=appuser
autorestart=true
priority=8
environment=DISPLAY=:99
stdout_logfile=/dev/stdout
stdout_logfile_maxbytes=0
stderr_logfile=/dev/stderr
stderr_logfile_maxbytes=0
# Optional: Add filebeat or other logging agents here if needed

View File

@@ -137,7 +137,7 @@ if __name__ == "__main__":
- Higher → fewer chunks but more relevant.
- Lower → more inclusive.
> In more advanced scenarios, you might see parameters like `language`, `case_sensitive`, or `priority_tags` to refine how text is tokenized or weighted.
> In more advanced scenarios, you might see parameters like `use_stemming`, `case_sensitive`, or `priority_tags` to refine how text is tokenized or weighted.
---
@@ -242,4 +242,4 @@ class MyCustomFilter(RelevantContentFilter):
With these tools, you can **zero in** on the text that truly matters, ignoring spammy or boilerplate content, and produce a concise, relevant “fit markdown” for your AI or data pipelines. Happy pruning and searching!
- Last Updated: 2025-01-01
- Last Updated: 2025-01-01

View File

@@ -187,7 +187,7 @@ from crawl4ai import CrawlerRunConfig
bm25_filter = BM25ContentFilter(
user_query="machine learning",
bm25_threshold=1.2,
language="english"
use_stemming=True
)
md_generator = DefaultMarkdownGenerator(