mirror of
https://github.com/docling-project/docling-serve.git
synced 2026-05-17 13:10:40 +00:00
453db676ee
Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
243 lines
10 KiB
Bash
243 lines
10 KiB
Bash
TESSDATA_PREFIX=/usr/share/tesseract/tessdata/
|
|
UVICORN_WORKERS=2
|
|
UVICORN_RELOAD=True
|
|
|
|
# Logging configuration (case-insensitive)
|
|
# DOCLING_SERVE_LOG_LEVEL=WARNING # Options: WARNING, INFO, DEBUG (or warning, info, debug)
|
|
|
|
# RQ Engine Redis Connection Pool Configuration
|
|
# Adjust these values based on your deployment scale:
|
|
# - Small (1-4 workers): Use defaults (50 connections, no timeouts)
|
|
# - Medium (5-10 workers): Set max_connections to 100
|
|
# - Large (10+ workers): Set max_connections to 150-200
|
|
# - Timeout settings: Only set if experiencing connection issues
|
|
# DOCLING_SERVE_ENG_RQ_REDIS_MAX_CONNECTIONS=50
|
|
# DOCLING_SERVE_ENG_RQ_REDIS_SOCKET_TIMEOUT=5.0
|
|
# DOCLING_SERVE_ENG_RQ_REDIS_SOCKET_CONNECT_TIMEOUT=5.0
|
|
|
|
# ============================================================================
|
|
|
|
# Configuration File Support
|
|
# ============================================================================
|
|
# Load configuration from a YAML or JSON file
|
|
# Environment variables take precedence over config file values
|
|
# DOCLING_SERVE_CONFIG_FILE=examples/config.yaml
|
|
|
|
# ============================================================================
|
|
# VLM Pipeline Control
|
|
# ============================================================================
|
|
# Default VLM preset to use when user specifies "default"
|
|
# DOCLING_SERVE_DEFAULT_VLM_PRESET=granite_docling
|
|
|
|
# List of allowed VLM preset IDs (JSON array or comma-separated)
|
|
# JSON format:
|
|
# DOCLING_SERVE_ALLOWED_VLM_PRESETS='["granite_docling", "custom_preset"]'
|
|
# Comma-separated format:
|
|
# DOCLING_SERVE_ALLOWED_VLM_PRESETS=granite_docling,custom_preset
|
|
|
|
# Custom VLM presets (JSON object)
|
|
# DOCLING_SERVE_CUSTOM_VLM_PRESETS='{"my_preset": {"engine": "openai", "model": "gpt-4-vision"}}'
|
|
|
|
# List of allowed VLM engine types (JSON array or comma-separated)
|
|
# DOCLING_SERVE_ALLOWED_VLM_ENGINES='["openai", "anthropic"]'
|
|
# DOCLING_SERVE_ALLOWED_VLM_ENGINES=openai,anthropic
|
|
|
|
# Whether users can specify custom VLM engine configurations
|
|
# DOCLING_SERVE_ALLOW_CUSTOM_VLM_CONFIG=false
|
|
|
|
# ============================================================================
|
|
# Picture Description Control
|
|
# ============================================================================
|
|
# DOCLING_SERVE_DEFAULT_PICTURE_DESCRIPTION_PRESET=smolvlm
|
|
# DOCLING_SERVE_ALLOWED_PICTURE_DESCRIPTION_PRESETS='["smolvlm", "custom"]'
|
|
# DOCLING_SERVE_CUSTOM_PICTURE_DESCRIPTION_PRESETS='{}'
|
|
# DOCLING_SERVE_ALLOWED_PICTURE_DESCRIPTION_ENGINES='["smolvlm"]'
|
|
# DOCLING_SERVE_ALLOW_CUSTOM_PICTURE_DESCRIPTION_CONFIG=false
|
|
|
|
# ============================================================================
|
|
# Code/Formula Control
|
|
# ============================================================================
|
|
# DOCLING_SERVE_DEFAULT_CODE_FORMULA_PRESET=default
|
|
# DOCLING_SERVE_ALLOWED_CODE_FORMULA_PRESETS='["default"]'
|
|
# DOCLING_SERVE_CUSTOM_CODE_FORMULA_PRESETS='{}'
|
|
# DOCLING_SERVE_ALLOWED_CODE_FORMULA_ENGINES='["default"]'
|
|
# DOCLING_SERVE_ALLOW_CUSTOM_CODE_FORMULA_CONFIG=false
|
|
|
|
# ============================================================================
|
|
# Table Structure Control
|
|
# ============================================================================
|
|
# DOCLING_SERVE_DEFAULT_TABLE_STRUCTURE_KIND=docling_tableformer
|
|
# DOCLING_SERVE_ALLOWED_TABLE_STRUCTURE_KINDS='["docling_tableformer", "approved_plugin"]'
|
|
# DOCLING_SERVE_ALLOWED_TABLE_STRUCTURE_KINDS=docling_tableformer,approved_plugin
|
|
|
|
# ============================================================================
|
|
# Layout Control
|
|
# ============================================================================
|
|
# DOCLING_SERVE_DEFAULT_LAYOUT_KIND=docling_layout_default
|
|
# DOCLING_SERVE_ALLOWED_LAYOUT_KINDS='["docling_layout_default", "layout_object_detection"]'
|
|
# DOCLING_SERVE_ALLOWED_LAYOUT_KINDS=docling_layout_default,layout_object_detection
|
|
|
|
# Ray Engine Configuration
|
|
# ============================================================================
|
|
# The Ray engine uses Ray Serve for autoscaling document processing with
|
|
# Redis for state management and fair task scheduling across tenants.
|
|
#
|
|
# To use: Set DOCLING_SERVE_ENG_KIND=ray
|
|
#
|
|
# Key Features:
|
|
# - Fair round-robin scheduling at task level
|
|
# - Per-tenant task queues and resource limits
|
|
# - Ray Serve autoscaling with persistent converters
|
|
# - Redis for HA state management (supports Sentinel/Cluster)
|
|
# - Fault tolerance with automatic retries
|
|
# ============================================================================
|
|
|
|
# --- Required Settings ---
|
|
# Redis URL for state management (REQUIRED)
|
|
# DOCLING_SERVE_ENG_RAY_REDIS_URL=redis://localhost:6379/
|
|
|
|
# Ray cluster address (REQUIRED)
|
|
# Use "auto" or "local" to start local Ray cluster
|
|
# Or provide Ray cluster address like "ray://host:10001"
|
|
# DOCLING_SERVE_ENG_RAY_ADDRESS=auto
|
|
|
|
# --- Redis Configuration ---
|
|
# Connection pool and timeout settings
|
|
# DOCLING_SERVE_ENG_RAY_REDIS_MAX_CONNECTIONS=50
|
|
# DOCLING_SERVE_ENG_RAY_REDIS_SOCKET_TIMEOUT=5.0
|
|
# DOCLING_SERVE_ENG_RAY_REDIS_SOCKET_CONNECT_TIMEOUT=5.0
|
|
|
|
# --- Result Storage ---
|
|
# How long to keep task results in Redis (seconds)
|
|
# DOCLING_SERVE_ENG_RAY_RESULTS_TTL=14400 # 4 hours
|
|
# DOCLING_SERVE_ENG_RAY_RESULTS_PREFIX=docling:ray:results
|
|
|
|
# --- Pub/Sub ---
|
|
# Redis channel for task status updates
|
|
# DOCLING_SERVE_ENG_RAY_SUB_CHANNEL=docling:ray:updates
|
|
|
|
# --- Fair Dispatcher ---
|
|
# How often to check for new tasks (seconds)
|
|
# DOCLING_SERVE_ENG_RAY_DISPATCHER_INTERVAL=2.0
|
|
|
|
# --- Per-User Limits ---
|
|
# Maximum tasks being processed simultaneously per tenant
|
|
# DOCLING_SERVE_ENG_RAY_DEFAULT_MAX_CONCURRENT_TASKS=5
|
|
# Maximum tasks in queue per tenant (None = unlimited)
|
|
# DOCLING_SERVE_ENG_RAY_DEFAULT_MAX_QUEUED_TASKS=
|
|
# Return 429 if queue limit exceeded (requires max_queued_tasks)
|
|
# DOCLING_SERVE_ENG_RAY_ENABLE_QUEUE_LIMIT_REJECTION=false
|
|
# Maximum documents being processed per tenant (None = unlimited)
|
|
# DOCLING_SERVE_ENG_RAY_DEFAULT_MAX_DOCUMENTS=
|
|
# Enable per-tenant document limits
|
|
# DOCLING_SERVE_ENG_RAY_ENABLE_DOCUMENT_LIMITS=false
|
|
|
|
# --- Ray Configuration ---
|
|
# Ray namespace for isolation
|
|
# DOCLING_SERVE_ENG_RAY_NAMESPACE=docling
|
|
# Ray runtime environment (JSON dict)
|
|
# DOCLING_SERVE_ENG_RAY_RUNTIME_ENV=
|
|
|
|
# --- Ray Serve Autoscaling ---
|
|
# Minimum number of Ray Serve replicas
|
|
# DOCLING_SERVE_ENG_RAY_MIN_WORKERS=1
|
|
# Maximum number of Ray Serve replicas
|
|
# DOCLING_SERVE_ENG_RAY_MAX_WORKERS=10
|
|
# Target concurrent requests per replica for autoscaling
|
|
# DOCLING_SERVE_ENG_RAY_TARGET_REQUESTS_PER_REPLICA=1
|
|
# Seconds to wait before scaling up (prevents flapping)
|
|
# DOCLING_SERVE_ENG_RAY_UPSCALE_DELAY_S=30.0
|
|
# Seconds to wait before scaling down
|
|
# DOCLING_SERVE_ENG_RAY_DOWNSCALE_DELAY_S=600.0
|
|
# CPUs to allocate per Ray Serve replica
|
|
# DOCLING_SERVE_ENG_RAY_NUM_CPUS_PER_ACTOR=1.0
|
|
|
|
# --- Fault Tolerance & Retry ---
|
|
# Maximum retries for failed tasks
|
|
# DOCLING_SERVE_ENG_RAY_MAX_TASK_RETRIES=3
|
|
# Seconds to wait between task retries
|
|
# DOCLING_SERVE_ENG_RAY_RETRY_DELAY=5.0
|
|
# Maximum retries per document within a task
|
|
# DOCLING_SERVE_ENG_RAY_MAX_DOCUMENT_RETRIES=2
|
|
|
|
# --- Ray Actor Configuration ---
|
|
# Max dispatcher actor restarts (-1 = unlimited for HA)
|
|
# DOCLING_SERVE_ENG_RAY_DISPATCHER_MAX_RESTARTS=-1
|
|
# Ray-level task retries for dispatcher operations
|
|
# DOCLING_SERVE_ENG_RAY_DISPATCHER_MAX_TASK_RETRIES=3
|
|
|
|
# --- Timeouts ---
|
|
# Maximum seconds per task (None = no limit)
|
|
# DOCLING_SERVE_ENG_RAY_TASK_TIMEOUT=3600.0
|
|
# Maximum seconds per document (None = no limit)
|
|
# DOCLING_SERVE_ENG_RAY_DOCUMENT_TIMEOUT=300.0
|
|
# Timeout for Redis operations in seconds
|
|
# DOCLING_SERVE_ENG_RAY_REDIS_OPERATION_TIMEOUT=30.0
|
|
|
|
# --- Health Checks ---
|
|
# Enable dispatcher heartbeat monitoring
|
|
# DOCLING_SERVE_ENG_RAY_ENABLE_HEARTBEAT=true
|
|
|
|
# --- Resource Management & Memory Monitoring ---
|
|
# Memory limit per Ray actor (e.g., "4GB")
|
|
# DOCLING_SERVE_ENG_RAY_MEMORY_LIMIT_PER_ACTOR=
|
|
# Ray object store memory (e.g., "10GB")
|
|
# DOCLING_SERVE_ENG_RAY_OBJECT_STORE_MEMORY=
|
|
# Enable out-of-memory detection and recovery
|
|
# DOCLING_SERVE_ENG_RAY_ENABLE_OOM_PROTECTION=true
|
|
# Memory usage threshold for warnings (0.0-1.0)
|
|
# DOCLING_SERVE_ENG_RAY_MEMORY_WARNING_THRESHOLD=0.9
|
|
|
|
# --- Scratch Directory ---
|
|
# Directory for temporary files during processing
|
|
# DOCLING_SERVE_ENG_RAY_SCRATCH_DIR=
|
|
|
|
# --- Logging ---
|
|
# Logging level for Ray orchestrator
|
|
# DOCLING_SERVE_ENG_RAY_LOG_LEVEL=INFO
|
|
|
|
# --- Tenant Identification ---
|
|
# Header name for tenant identification (for fair scheduling)
|
|
# DOCLING_SERVE_ENG_RAY_TENANT_ID_HEADER=X-Tenant-Id
|
|
|
|
# ============================================================================
|
|
# Example Configurations
|
|
# ============================================================================
|
|
|
|
# Small deployment (1-2 workers) with local Ray:
|
|
# DOCLING_SERVE_ENG_KIND=ray
|
|
# DOCLING_SERVE_ENG_RAY_REDIS_URL=redis://localhost:6379/
|
|
# DOCLING_SERVE_ENG_RAY_ADDRESS=auto
|
|
# DOCLING_SERVE_ENG_RAY_MIN_ACTORS=1
|
|
# DOCLING_SERVE_ENG_RAY_MAX_ACTORS=2
|
|
|
|
# Medium deployment (5-10 workers) with existing Ray cluster:
|
|
# DOCLING_SERVE_ENG_KIND=ray
|
|
# DOCLING_SERVE_ENG_RAY_REDIS_URL=redis://redis-cluster:6379/
|
|
# DOCLING_SERVE_ENG_RAY_ADDRESS=ray://ray-head:10001
|
|
# DOCLING_SERVE_ENG_RAY_MIN_ACTORS=2
|
|
# DOCLING_SERVE_ENG_RAY_MAX_ACTORS=10
|
|
# DOCLING_SERVE_ENG_RAY_MAX_QUEUED_TASKS=50
|
|
# DOCLING_SERVE_ENG_RAY_ENABLE_QUEUE_LIMIT_REJECTION=true
|
|
|
|
# Large deployment (10+ workers) with Redis Sentinel and Ray cluster:
|
|
# DOCLING_SERVE_ENG_KIND=ray
|
|
# DOCLING_SERVE_ENG_RAY_REDIS_URL=redis+sentinel://sentinel-host:26379/mymaster/0
|
|
# DOCLING_SERVE_ENG_RAY_ADDRESS=ray://ray-head:10001
|
|
# DOCLING_SERVE_ENG_RAY_MIN_ACTORS=5
|
|
# DOCLING_SERVE_ENG_RAY_MAX_ACTORS=20
|
|
# DOCLING_SERVE_ENG_RAY_REDIS_MAX_CONNECTIONS=150
|
|
|
|
# ============================================================================
|
|
# Usage with Tenant Identification
|
|
# ============================================================================
|
|
# When using the ray engine, you can identify tenants via HTTP header:
|
|
#
|
|
# curl -X POST "http://localhost:5001/v1/convert/source" \
|
|
# -H "X-Tenant-Id: alice@example.com" \
|
|
# -H "Content-Type: application/json" \
|
|
# -d '{"sources": [{"url": "https://example.com/doc.pdf"}]}'
|
|
#
|
|
# Without the header, all requests are treated as "default" tenant.
|
|
# Fair scheduling ensures tasks from different tenants are processed fairly.
|