Compare commits
1 Commits
devin/1748
...
codex/add-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6029097114 |
10
Dockerfile
10
Dockerfile
@@ -43,9 +43,17 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
libjpeg-dev \
|
||||
redis-server \
|
||||
supervisor \
|
||||
&& apt-get clean \
|
||||
xvfb \
|
||||
x11vnc \
|
||||
fluxbox \
|
||||
websockify \
|
||||
&& apt-get clean \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Install noVNC for browser-based VNC access
|
||||
RUN git clone --depth 1 https://github.com/novnc/noVNC /opt/novnc \
|
||||
&& git clone --depth 1 https://github.com/novnc/websockify /opt/novnc/utils/websockify
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
libglib2.0-0 \
|
||||
libnss3 \
|
||||
|
||||
@@ -17,6 +17,7 @@
|
||||
- [Screenshot Endpoint](#screenshot-endpoint)
|
||||
- [PDF Export Endpoint](#pdf-export-endpoint)
|
||||
- [JavaScript Execution Endpoint](#javascript-execution-endpoint)
|
||||
- [Browser VNC Endpoint](#browser-vnc-endpoint)
|
||||
- [Library Context Endpoint](#library-context-endpoint)
|
||||
- [MCP (Model Context Protocol) Support](#mcp-model-context-protocol-support)
|
||||
- [What is MCP?](#what-is-mcp)
|
||||
@@ -377,6 +378,20 @@ Executes JavaScript snippets on the specified URL and returns the full crawl res
|
||||
|
||||
- `scripts`: List of JavaScript snippets to execute sequentially
|
||||
|
||||
### Browser VNC Endpoint
|
||||
|
||||
```
|
||||
GET /vnc
|
||||
```
|
||||
|
||||
Opens a browser-based VNC session for interacting with the container's desktop environment. Use `/vnc/url` to retrieve only the iframe URL.
|
||||
|
||||
```
|
||||
GET /vnc/url
|
||||
```
|
||||
|
||||
Returns a JSON object containing the URL of the embedded noVNC client.
|
||||
|
||||
---
|
||||
|
||||
## Dockerfile Parameters
|
||||
|
||||
@@ -33,7 +33,11 @@ from schemas import (
|
||||
)
|
||||
|
||||
from utils import (
|
||||
FilterType, load_config, setup_logging, verify_email_domain
|
||||
FilterType,
|
||||
load_config,
|
||||
setup_logging,
|
||||
verify_email_domain,
|
||||
get_base_url,
|
||||
)
|
||||
import os
|
||||
import sys
|
||||
@@ -48,7 +52,11 @@ from fastapi import (
|
||||
)
|
||||
from rank_bm25 import BM25Okapi
|
||||
from fastapi.responses import (
|
||||
StreamingResponse, RedirectResponse, PlainTextResponse, JSONResponse
|
||||
StreamingResponse,
|
||||
RedirectResponse,
|
||||
PlainTextResponse,
|
||||
JSONResponse,
|
||||
HTMLResponse,
|
||||
)
|
||||
from fastapi.middleware.httpsredirect import HTTPSRedirectMiddleware
|
||||
from fastapi.middleware.trustedhost import TrustedHostMiddleware
|
||||
@@ -129,11 +137,31 @@ app.mount(
|
||||
name="play",
|
||||
)
|
||||
|
||||
# Serve noVNC static files if available
|
||||
VNC_DIR = pathlib.Path("/opt/novnc")
|
||||
if VNC_DIR.exists():
|
||||
app.mount("/novnc", StaticFiles(directory=VNC_DIR, html=True), name="novnc")
|
||||
|
||||
|
||||
@app.get("/")
|
||||
async def root():
|
||||
return RedirectResponse("/playground")
|
||||
|
||||
|
||||
@app.get("/vnc")
|
||||
async def vnc_page(request: Request):
|
||||
"""Return a simple page embedding the noVNC client."""
|
||||
url = f"{get_base_url(request)}/novnc/vnc.html?autoconnect=true&resize=scale"
|
||||
html = f"<iframe src='{url}' width='1024' height='768' style='border:none'></iframe>"
|
||||
return HTMLResponse(f"<html><body>{html}</body></html>")
|
||||
|
||||
|
||||
@app.get("/vnc/url")
|
||||
async def vnc_url(request: Request):
|
||||
"""Return the direct URL to the noVNC client."""
|
||||
url = f"{get_base_url(request)}/novnc/vnc.html?autoconnect=true&resize=scale"
|
||||
return {"url": url}
|
||||
|
||||
# ─────────────────── infra / middleware ─────────────────────
|
||||
redis = aioredis.from_url(config["redis"].get("uri", "redis://localhost"))
|
||||
|
||||
|
||||
@@ -20,9 +20,53 @@ user=appuser ; Run gunicorn as our non-root user
|
||||
autorestart=true
|
||||
priority=20
|
||||
environment=PYTHONUNBUFFERED=1 ; Ensure Python output is sent straight to logs
|
||||
environment=DISPLAY=:99
|
||||
stdout_logfile=/dev/stdout ; Redirect gunicorn stdout to container stdout
|
||||
stdout_logfile_maxbytes=0
|
||||
stderr_logfile=/dev/stderr ; Redirect gunicorn stderr to container stderr
|
||||
stderr_logfile_maxbytes=0
|
||||
|
||||
[program:xvfb]
|
||||
command=/usr/bin/Xvfb :99 -screen 0 1280x720x24
|
||||
user=appuser
|
||||
autorestart=true
|
||||
priority=5
|
||||
stdout_logfile=/dev/stdout
|
||||
stdout_logfile_maxbytes=0
|
||||
stderr_logfile=/dev/stderr
|
||||
stderr_logfile_maxbytes=0
|
||||
|
||||
[program:fluxbox]
|
||||
command=/usr/bin/fluxbox
|
||||
user=appuser
|
||||
autorestart=true
|
||||
priority=6
|
||||
environment=DISPLAY=:99
|
||||
stdout_logfile=/dev/stdout
|
||||
stdout_logfile_maxbytes=0
|
||||
stderr_logfile=/dev/stderr
|
||||
stderr_logfile_maxbytes=0
|
||||
|
||||
[program:x11vnc]
|
||||
command=/usr/bin/x11vnc -display :99 -nopw -forever -shared -rfbport 5900 -quiet
|
||||
user=appuser
|
||||
autorestart=true
|
||||
priority=7
|
||||
environment=DISPLAY=:99
|
||||
stdout_logfile=/dev/stdout
|
||||
stdout_logfile_maxbytes=0
|
||||
stderr_logfile=/dev/stderr
|
||||
stderr_logfile_maxbytes=0
|
||||
|
||||
[program:websockify]
|
||||
command=/usr/bin/websockify 6080 localhost:5900 --web /opt/novnc
|
||||
user=appuser
|
||||
autorestart=true
|
||||
priority=8
|
||||
environment=DISPLAY=:99
|
||||
stdout_logfile=/dev/stdout
|
||||
stdout_logfile_maxbytes=0
|
||||
stderr_logfile=/dev/stderr
|
||||
stderr_logfile_maxbytes=0
|
||||
|
||||
# Optional: Add filebeat or other logging agents here if needed
|
||||
@@ -137,7 +137,7 @@ if __name__ == "__main__":
|
||||
- Higher → fewer chunks but more relevant.
|
||||
- Lower → more inclusive.
|
||||
|
||||
> In more advanced scenarios, you might see parameters like `language`, `case_sensitive`, or `priority_tags` to refine how text is tokenized or weighted.
|
||||
> In more advanced scenarios, you might see parameters like `use_stemming`, `case_sensitive`, or `priority_tags` to refine how text is tokenized or weighted.
|
||||
|
||||
---
|
||||
|
||||
@@ -242,4 +242,4 @@ class MyCustomFilter(RelevantContentFilter):
|
||||
|
||||
With these tools, you can **zero in** on the text that truly matters, ignoring spammy or boilerplate content, and produce a concise, relevant “fit markdown” for your AI or data pipelines. Happy pruning and searching!
|
||||
|
||||
- Last Updated: 2025-01-01
|
||||
- Last Updated: 2025-01-01
|
||||
@@ -187,7 +187,7 @@ from crawl4ai import CrawlerRunConfig
|
||||
bm25_filter = BM25ContentFilter(
|
||||
user_query="machine learning",
|
||||
bm25_threshold=1.2,
|
||||
language="english"
|
||||
use_stemming=True
|
||||
)
|
||||
|
||||
md_generator = DefaultMarkdownGenerator(
|
||||
|
||||
Reference in New Issue
Block a user