-
Notifications
You must be signed in to change notification settings - Fork 15
Expand file tree
/
Copy pathdocker-compose.traefik.yml
More file actions
107 lines (103 loc) · 4.64 KB
/
Copy pathdocker-compose.traefik.yml
File metadata and controls
107 lines (103 loc) · 4.64 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
services:
pullmd:
image: aeternalabshq/pullmd:latest
container_name: pullmd
restart: unless-stopped
environment:
- PUBLIC_URL=${PUBLIC_URL:-https://${HOST_DOMAIN}}
- TRAFILATURA_URL=http://trafilatura:8001/extract
- PLAYWRIGHT_URL=http://playwright:8002/render
- MARKITDOWN_URL=http://markitdown:8003/convert
- MARKITDOWN_YOUTUBE=${MARKITDOWN_YOUTUBE:-}
# Media tier (image captioning, audio transcription) — handled by pullmd, not the sidecar.
- PULLMD_VISION_API_KEY=${PULLMD_VISION_API_KEY:-}
- PULLMD_VISION_BASE_URL=${PULLMD_VISION_BASE_URL:-}
- PULLMD_VISION_MODEL=${PULLMD_VISION_MODEL:-}
- PULLMD_STT_API_KEY=${PULLMD_STT_API_KEY:-}
- PULLMD_STT_BASE_URL=${PULLMD_STT_BASE_URL:-}
- PULLMD_STT_MODEL=${PULLMD_STT_MODEL:-}
- PULLMD_LLM_API_KEY=${PULLMD_LLM_API_KEY:-}
- PULLMD_LLM_BASE_URL=${PULLMD_LLM_BASE_URL:-}
# Opt-in PDF OCR tier (?pdf=ocr) — needs its OWN key, no PULLMD_LLM_* fallback.
- PULLMD_PDF_OCR_API_KEY=${PULLMD_PDF_OCR_API_KEY:-}
- PULLMD_PDF_OCR_BASE_URL=${PULLMD_PDF_OCR_BASE_URL:-}
- PULLMD_PDF_OCR_MODEL=${PULLMD_PDF_OCR_MODEL:-}
# Output shaping (v3): set PULLMD_SOURCE_HEADER=true to restore the legacy
# "Source:" body header; PULLMD_FRONTMATTER_FIELDS limits frontmatter keys.
- PULLMD_SOURCE_HEADER=${PULLMD_SOURCE_HEADER:-}
- PULLMD_FRONTMATTER_FIELDS=${PULLMD_FRONTMATTER_FIELDS:-}
# Outbound User-Agent overrides + custom site recipes (path inside the
# container; ./data is mounted at /data).
- PULLMD_USER_AGENT=${PULLMD_USER_AGENT:-}
- PULLMD_UA_FEED_URL=${PULLMD_UA_FEED_URL:-}
- PULLMD_SITE_RECIPES=${PULLMD_SITE_RECIPES:-}
- REDDIT_CLIENT_ID=${REDDIT_CLIENT_ID:-}
- REDDIT_CLIENT_SECRET=${REDDIT_CLIENT_SECRET:-}
- REDDIT_USER_AGENT=${REDDIT_USER_AGENT:-}
- DISABLE_PUBLIC_HISTORY=${DISABLE_PUBLIC_HISTORY:-false}
# Authentication (v2.0+) - see MIGRATION.md before changing on an existing instance.
- PULLMD_AUTH_MODE=${PULLMD_AUTH_MODE:-disabled}
- PULLMD_ADMIN_EMAIL=${PULLMD_ADMIN_EMAIL:-}
- PULLMD_ADMIN_PASSWORD=${PULLMD_ADMIN_PASSWORD:-}
# Legacy bearer token (single-admin mode only, deprecated; removed in v3).
- PULLMD_AUTH_TOKEN=${PULLMD_AUTH_TOKEN:-}
# OAuth 2.1 for MCP clients (claude.ai web, Claude Desktop) — opt-in, see README.
- OAUTH_JWT_SECRET=${OAUTH_JWT_SECRET:-}
- CACHE_DB=/data/cache.db
volumes:
- ./data:/data
networks:
- proxy
- pullmd-internal
depends_on:
- trafilatura
- playwright
- markitdown
labels:
- "traefik.enable=true"
- "traefik.http.routers.pullmd.entrypoints=http"
- "traefik.http.routers.pullmd.rule=Host(`${HOST_DOMAIN}`)"
- "traefik.http.middlewares.pullmd-https-redirect.redirectscheme.scheme=https"
- "traefik.http.routers.pullmd.middlewares=pullmd-https-redirect"
- "traefik.http.routers.pullmd-secure.entrypoints=https"
- "traefik.http.routers.pullmd-secure.rule=Host(`${HOST_DOMAIN}`)"
- "traefik.http.routers.pullmd-secure.tls=true"
- "traefik.http.routers.pullmd-secure.service=pullmd"
- "traefik.http.services.pullmd.loadbalancer.server.port=3000"
- "traefik.docker.network=proxy"
trafilatura:
image: aeternalabshq/pullmd-trafilatura:latest
container_name: pullmd-trafilatura
restart: unless-stopped
networks:
- pullmd-internal
playwright:
image: aeternalabshq/pullmd-playwright:latest
container_name: pullmd-playwright
restart: unless-stopped
networks:
- pullmd-internal
markitdown:
image: aeternalabshq/pullmd-markitdown:latest
container_name: pullmd-markitdown
restart: unless-stopped
# Hard memory bound: a decompression bomb only OOM-kills the sandboxed
# conversion child, the container stays under this cap and recovers.
mem_limit: ${MARKITDOWN_MEM_LIMIT:-1g}
environment:
- MARKITDOWN_YT_TIMECODES=${MARKITDOWN_YT_TIMECODES:-links}
- MARKITDOWN_YT_CHUNK=${MARKITDOWN_YT_CHUNK:-30}
- MARKITDOWN_YT_LANGS=${MARKITDOWN_YT_LANGS:-}
- MARKITDOWN_YT_PROXY=${MARKITDOWN_YT_PROXY:-}
# Per-conversion wall-clock timeout (s); 0 disables. The conversion runs
# in a disposable child process so a timeout/OOM can't take the server down.
- MARKITDOWN_CONVERT_TIMEOUT=${MARKITDOWN_CONVERT_TIMEOUT:-60}
# Optional per-conversion address-space cap (MB); 0 = rely on mem_limit.
- MARKITDOWN_MEM_LIMIT_MB=${MARKITDOWN_MEM_LIMIT_MB:-0}
networks:
- pullmd-internal
networks:
proxy:
external: true
pullmd-internal:
driver: bridge