From d75d0df88abe45205f2c50fc94d6844eb19cf50f Mon Sep 17 00:00:00 2001 From: Laurent Cozic Date: Wed, 25 Feb 2026 17:46:08 +0000 Subject: [PATCH] Chore: Refactor and simplify Transcribe server (#14462) --- .dockerignore | 1 + .env-transcribe-sample | 48 ++++++++--------- Dockerfile.transcribe | 13 +++-- docker-compose.transcribe.yml | 44 +++++++++++++++ packages/transcribe/README.md | 52 +++++++++--------- packages/transcribe/src/api/app.ts | 7 ++- .../src/api/handler/createJob.test.ts | 3 ++ .../transcribe/src/api/handler/createJob.ts | 3 +- .../src/api/utils/isFileAValidImage.test.ts | 29 ++++++++-- .../src/api/utils/isFileAValidImage.ts | 2 +- .../transcribe/src/api/utils/parseRequest.ts | 1 + packages/transcribe/src/env.ts | 54 +++++++++++++------ .../transcribe/src/services/createQueue.ts | 4 +- packages/transcribe/src/testUtils.ts | 11 ++-- 14 files changed, 181 insertions(+), 91 deletions(-) create mode 100644 docker-compose.transcribe.yml diff --git a/.dockerignore b/.dockerignore index 4c2a7cfd43..7e3c5df80c 100644 --- a/.dockerignore +++ b/.dockerignore @@ -17,3 +17,4 @@ packages/server/db-*.sqlite packages/server/dist/ packages/server/logs/ packages/server/temp/ +packages/transcribe/.env diff --git a/.env-transcribe-sample b/.env-transcribe-sample index 5eb1e22da8..8a70f02ff7 100644 --- a/.env-transcribe-sample +++ b/.env-transcribe-sample @@ -1,35 +1,33 @@ +# Joplin Transcribe Configuration +# +# Copy this file to .env-transcribe and update the values. + # ============================================================================= # Required -# ----------------------------------------------------------------------------- # ============================================================================= -SERVER_PORT=4567 +# Set a secure API key for authentication +API_KEY=changeme -API_KEY=random-string -QUEUE_TTL=900000 -QUEUE_RETRY_COUNT=2 -QUEUE_MAINTENANCE_INTERVAL=30000 -IMAGE_MAX_DIMENSION=400 +# ============================================================================= +# Optional (defaults are set in the Docker image) +# ============================================================================= -HTR_CLI_DOCKER_IMAGE=joplin/htr-cli:latest -# Fullpath to images folder e.g.: -#HTR_CLI_IMAGES_FOLDER=/home/user/joplin/packages/transcribe/images -HTR_CLI_IMAGES_FOLDER= +# Server port (default: 4567) +# SERVER_PORT=4567 -QUEUE_DRIVER=pg +# Maximum image dimension for processing (default: 400) +# IMAGE_MAX_DIMENSION=400 + +# Queue driver: sqlite (default) or pg # QUEUE_DRIVER=sqlite -FILE_STORAGE_MAINTENANCE_INTERVAL=3600000 -FILE_STORAGE_TTL=604800000 # one week +# ============================================================================= +# PostgreSQL settings (only if QUEUE_DRIVER=pg) +# ============================================================================= -# ============================================================================= -# Queue driver -# ----------------------------------------------------------------------------- -# ============================================================================= -# -# QUEUE_DATABASE_NAME=./queue.sqlite3 -QUEUE_DATABASE_NAME=transcribe -QUEUE_DATABASE_USER=transcribe -QUEUE_DATABASE_PASSWORD=transcribe -QUEUE_DATABASE_PORT=5432 -QUEUE_DATABASE_HOST=localhost \ No newline at end of file +# QUEUE_DATABASE_NAME=transcribe +# QUEUE_DATABASE_USER=transcribe +# QUEUE_DATABASE_PASSWORD=transcribe +# QUEUE_DATABASE_PORT=5432 +# QUEUE_DATABASE_HOST=localhost diff --git a/Dockerfile.transcribe b/Dockerfile.transcribe index 6fc7e73476..b024a7382f 100644 --- a/Dockerfile.transcribe +++ b/Dockerfile.transcribe @@ -18,7 +18,7 @@ RUN wget -q https://github.com/ggml-org/llama.cpp/releases/download/b5449/llama- && chmod +x /opt/llama/build/bin/llama-mtmd-cli # Create non-root user for security -RUN groupadd -r transcribe && useradd -r -g transcribe transcribe +RUN groupadd -r transcribe && useradd -r -g transcribe -m transcribe WORKDIR /app @@ -43,17 +43,20 @@ RUN BUILD_SEQUENCIAL=1 yarn install --inline-builds \ && yarn cache clean \ && rm -rf .yarn/berry -# Create images directory and set permissions -RUN mkdir -p /app/packages/transcribe/images \ - && chown -R transcribe:transcribe /app/packages/transcribe/images +# Create data directory and set permissions +RUN mkdir -p /data/images \ + && chown -R transcribe:transcribe /data WORKDIR /app/packages/transcribe # Switch to non-root user USER transcribe -# Set environment variable for embedded llama.cpp binary +# Set environment variables ENV HTR_CLI_BINARY_PATH=/opt/llama/build/bin/llama-mtmd-cli +ENV DATA_DIR=/data +ENV QUEUE_DRIVER=sqlite # Start the Node.js application +ENTRYPOINT ["/usr/bin/tini", "--"] CMD ["yarn", "start"] diff --git a/docker-compose.transcribe.yml b/docker-compose.transcribe.yml new file mode 100644 index 0000000000..60b40b31f7 --- /dev/null +++ b/docker-compose.transcribe.yml @@ -0,0 +1,44 @@ +# Standalone docker-compose for Joplin Transcribe +# +# Uses SQLite for the queue (no external database needed). +# Data is stored in a named volume for proper permissions. +# +# Usage: +# +# 1. Download models: +# mkdir -p ./data/models +# wget -O ./data/models/Model-7.6B-Q4_K_M.gguf https://huggingface.co/openbmb/MiniCPM-o-2_6-gguf/resolve/main/Model-7.6B-Q4_K_M.gguf +# wget -O ./data/models/mmproj-model-f16.gguf https://huggingface.co/openbmb/MiniCPM-o-2_6-gguf/resolve/main/mmproj-model-f16.gguf +# +# 2. Configure: +# cp .env-transcribe-sample .env +# # Edit .env and set API_KEY +# +# 3. Run: +# docker compose -f docker-compose.transcribe.yml up + +volumes: + transcribe-data: + +services: + transcribe: + image: joplin/transcribe:amd64-latest + ports: + - "4567:4567" + volumes: + - transcribe-data:/data + - ./data/models:/data/models:ro + restart: unless-stopped + # Security: limit resources to prevent runaway processes + deploy: + resources: + limits: + memory: 16G + cpus: '4' + # Security: read-only root filesystem + read_only: true + tmpfs: + - /tmp + - /home/transcribe/.cache + env_file: + - .env diff --git a/packages/transcribe/README.md b/packages/transcribe/README.md index 04ad243758..dcfc8b0f26 100644 --- a/packages/transcribe/README.md +++ b/packages/transcribe/README.md @@ -4,34 +4,34 @@ The transcribe server embeds the llama.cpp binary directly in the Docker image. The AI models must be downloaded separately and mounted as a volume. -### 1. Download the models - -Create a directory for the models and download them: +### 1. Create data directory and download models ```shell -mkdir -p ./data/transcribe-models -wget -O ./data/transcribe-models/Model-7.6B-Q4_K_M.gguf https://huggingface.co/openbmb/MiniCPM-o-2_6-gguf/resolve/main/Model-7.6B-Q4_K_M.gguf -wget -O ./data/transcribe-models/mmproj-model-f16.gguf https://huggingface.co/openbmb/MiniCPM-o-2_6-gguf/resolve/main/mmproj-model-f16.gguf +mkdir -p ./data/models +chmod 755 ./data +wget -O ./data/models/Model-7.6B-Q4_K_M.gguf https://huggingface.co/openbmb/MiniCPM-o-2_6-gguf/resolve/main/Model-7.6B-Q4_K_M.gguf +wget -O ./data/models/mmproj-model-f16.gguf https://huggingface.co/openbmb/MiniCPM-o-2_6-gguf/resolve/main/mmproj-model-f16.gguf ``` ### 2. Configure environment 1. Copy `.env-transcribe-sample` to your Docker configuration directory. 2. Rename it to `.env-transcribe`. +3. Set `API_KEY` to a secure value. ### 3. Run the server -The models directory on your host is mounted into the container at `/opt/models`. The `HTR_CLI_MODELS_FOLDER` environment variable refers to the path inside the container, not the host path. - ```shell -docker build -f ./Dockerfile.transcribe -t transcribe . -docker run --env-file .env-transcribe -p 4567:4567 \ - -v ./packages/transcribe/images:/app/packages/transcribe/images \ - -v ./data/transcribe-models:/opt/models:ro \ - -e HTR_CLI_MODELS_FOLDER=/opt/models \ - transcribe +docker run --rm --env-file .env-transcribe -p 4567:4567 \ + -v ./data:/data \ + joplin/transcribe:amd64-latest ``` +The container automatically creates the following inside `/data`: +- `images/` - uploaded images +- `models/` - AI models (you provide these) +- `queue.sqlite3` - job queue database + ## Using Docker Compose The minimal configuration is provided in `.env-sample` and `docker-compose.server.yml`. @@ -55,8 +55,6 @@ The transcribe container runs with these security measures: - **Resource limits**: Memory and CPU limits prevent runaway processes - **No Docker socket**: Unlike previous versions, no Docker socket mount is required ---- - # Development Setup ## Testing @@ -70,13 +68,6 @@ Run all tests with: yarn test-all ``` -## Database Setup - -The queue driver can be **SQLite** or **PostgreSQL**: - -* Set `QUEUE_DRIVER` to `sqlite` or `pg`. -* If using SQLite, `QUEUE_DATABASE_NAME` specifies the path to the database file. - ## Starting the Server From `packages/transcribe`, run: @@ -87,11 +78,18 @@ yarn start ### Environment variables +Required: +- `API_KEY`: Authentication key for API requests +- `DATA_DIR`: Base directory for all data (images, models, database) - `HTR_CLI_BINARY_PATH`: Path to the llama-mtmd-cli binary -- `HTR_CLI_MODELS_FOLDER`: Path to the models directory -- `HTR_CLI_IMAGES_FOLDER`: Path where uploaded images are stored ---- +Optional: +- `QUEUE_DRIVER`: `sqlite` (default in Docker) or `pg` for PostgreSQL + +The following paths are automatically derived from `DATA_DIR`: +- `$DATA_DIR/images` - uploaded images +- `$DATA_DIR/models` - AI models +- `$DATA_DIR/queue.sqlite3` - SQLite database (when using sqlite driver) # API Endpoints @@ -124,8 +122,6 @@ curl --request POST \ --form file=@/home/js/Pictures/2025-07-24_17-42_1.png ``` ---- - ## GET `/transcribe/{jobId}` Fetches the result of a transcription job created with `POST /transcribe`. diff --git a/packages/transcribe/src/api/app.ts b/packages/transcribe/src/api/app.ts index 3ba3c2a4b6..f71c157846 100644 --- a/packages/transcribe/src/api/app.ts +++ b/packages/transcribe/src/api/app.ts @@ -6,7 +6,7 @@ import initiateLogger from '../services/initiateLogger'; import createQueue from '../services/createQueue'; import FileStorage from '../services/FileStorage'; import router from './router'; -import env, { EnvVariables } from '../env'; +import env, { ComputedEnvVariables } from '../env'; import HtrCli from '../core/HtrCli'; import JobProcessor from '../workers/JobProcessor'; @@ -47,11 +47,10 @@ const init = async (logger: LoggerWrapper) => { logger.info('Server started successfully'); }; -const checkServerConfigurations = (envVariables: EnvVariables) => { +const checkServerConfigurations = (envVariables: ComputedEnvVariables) => { if (!envVariables.API_KEY) throw Error('API_KEY environment variable not set.'); - if (!envVariables.HTR_CLI_IMAGES_FOLDER) throw Error('HTR_CLI_IMAGES_FOLDER environment variable not set. This should point to a folder where images will be stored.'); + if (!envVariables.DATA_DIR) throw Error('DATA_DIR environment variable not set. This should point to a folder where data will be stored.'); if (!envVariables.HTR_CLI_BINARY_PATH) throw Error('HTR_CLI_BINARY_PATH environment variable not set. This should point to the llama-mtmd-cli binary.'); - if (!envVariables.HTR_CLI_MODELS_FOLDER) throw Error('HTR_CLI_MODELS_FOLDER environment variable not set. This should point to the folder containing the AI models.'); }; const main = async () => { diff --git a/packages/transcribe/src/api/handler/createJob.test.ts b/packages/transcribe/src/api/handler/createJob.test.ts index e2799b5dd8..9ad54d9f3e 100644 --- a/packages/transcribe/src/api/handler/createJob.test.ts +++ b/packages/transcribe/src/api/handler/createJob.test.ts @@ -39,6 +39,7 @@ describe('createJob', () => { sendToQueue: (data: JobData) => queue.send(data), imageMaxDimension: 400, randomName: 'test_file_resized-1', + imagesFolder: './images', }; const result = await createJob(requirements); const job = await queue.fetch(); @@ -58,6 +59,7 @@ describe('createJob', () => { sendToQueue: (data: JobData) => queue.send(data), imageMaxDimension: 400, randomName: 'test_file_resized-2', + imagesFolder: './images', }; expect(async () => createJob(requirements)).rejects.toThrow(); @@ -78,6 +80,7 @@ describe('createJob', () => { sendToQueue: (data: JobData) => queue.send(data), imageMaxDimension: 400, randomName: 'test_file_resized-3', + imagesFolder: './images', }; await createJob(requirements); diff --git a/packages/transcribe/src/api/handler/createJob.ts b/packages/transcribe/src/api/handler/createJob.ts index 714fe288eb..b6d4c510f1 100644 --- a/packages/transcribe/src/api/handler/createJob.ts +++ b/packages/transcribe/src/api/handler/createJob.ts @@ -11,10 +11,11 @@ type CreateJobContext = { filepath: string; imageMaxDimension: number; randomName: string; + imagesFolder: string; }; const createJob = async (context: CreateJobContext) => { - const imageResizedPath = join('images', context.randomName); + const imageResizedPath = join(context.imagesFolder, context.randomName); await resizeImageAndDeleteInput(context.filepath, imageResizedPath, context.imageMaxDimension); diff --git a/packages/transcribe/src/api/utils/isFileAValidImage.test.ts b/packages/transcribe/src/api/utils/isFileAValidImage.test.ts index 63ae6e02c3..8c0df370d3 100644 --- a/packages/transcribe/src/api/utils/isFileAValidImage.test.ts +++ b/packages/transcribe/src/api/utils/isFileAValidImage.test.ts @@ -1,23 +1,44 @@ import isFileAValidImage, { supportedImageFormat } from './isFileAValidImage'; +// Map MIME types to file extensions (for filenames) and detected extensions (from file-type library) +const mimeToFileExt: Record = { + 'image/png': 'png', + 'image/jpeg': 'jpeg', + 'image/bmp': 'bmp', + 'application/zip': 'zip', + 'application/pdf': 'pdf', +}; + +const mimeToDetectedExt: Record = { + 'image/png': 'png', + 'image/jpeg': 'jpg', + 'image/bmp': 'bmp', + 'application/zip': 'zip', + 'application/pdf': 'pdf', +}; + describe('isFileAValidImage', () => { it.each( supportedImageFormat, )('should be valid if the format is supported: %s', async (format: string) => { - const fileName = `sample.${format.split('/')[1]}`; + const fileExt = mimeToFileExt[format]; + const detectedExt = mimeToDetectedExt[format]; + const fileName = `sample.${fileExt}`; const fullFilePath = `./test-cases/${fileName}`; const [isValid, fileFormat] = await isFileAValidImage(fullFilePath); expect(isValid).toBe(true); - expect(fileFormat).toBe(format); + expect(fileFormat).toBe(detectedExt); }); it.each(['application/zip', 'application/pdf'])('should not be valid if the format is not supported: %s', async (format: string) => { - const fileName = `sample.${format.split('/')[1]}`; + const fileExt = mimeToFileExt[format]; + const detectedExt = mimeToDetectedExt[format]; + const fileName = `sample.${fileExt}`; const fullFilePath = `./test-cases/${fileName}`; const [isValid, fileFormat] = await isFileAValidImage(fullFilePath); expect(isValid).toBe(false); - expect(fileFormat).toBe(format); + expect(fileFormat).toBe(detectedExt); }); it('should throw an error if it is not possible to determine the type of the file', async () => { diff --git a/packages/transcribe/src/api/utils/isFileAValidImage.ts b/packages/transcribe/src/api/utils/isFileAValidImage.ts index c0dd49e048..41714645d9 100644 --- a/packages/transcribe/src/api/utils/isFileAValidImage.ts +++ b/packages/transcribe/src/api/utils/isFileAValidImage.ts @@ -10,7 +10,7 @@ const isFileAValidImage = async (filepath: string): Promise<[boolean, string]> = } const isValid = supportedImageFormat.includes(result.mime); - return [isValid, result.mime]; + return [isValid, result.ext]; }; export default isFileAValidImage; diff --git a/packages/transcribe/src/api/utils/parseRequest.ts b/packages/transcribe/src/api/utils/parseRequest.ts index 2b609a6acc..311045a3a2 100644 --- a/packages/transcribe/src/api/utils/parseRequest.ts +++ b/packages/transcribe/src/api/utils/parseRequest.ts @@ -29,6 +29,7 @@ export const parseCreateJobRequest = async (ctx: AppContext) => { filepath: file.filepath, imageMaxDimension: env().IMAGE_MAX_DIMENSION, randomName: createFilename(formatProvided), + imagesFolder: env().HTR_CLI_IMAGES_FOLDER, }; }; diff --git a/packages/transcribe/src/env.ts b/packages/transcribe/src/env.ts index c17c1b5926..d9608691a1 100644 --- a/packages/transcribe/src/env.ts +++ b/packages/transcribe/src/env.ts @@ -6,12 +6,10 @@ export const defaultEnvValues: EnvVariables = { QUEUE_TTL: 15 * Minute, QUEUE_RETRY_COUNT: 2, QUEUE_MAINTENANCE_INTERVAL: 60 * Second, - HTR_CLI_IMAGES_FOLDER: '', + DATA_DIR: '', HTR_CLI_BINARY_PATH: '', - HTR_CLI_MODELS_FOLDER: '', QUEUE_DRIVER: 'pg', // 'sqlite' QUEUE_DATABASE_PASSWORD: '', - QUEUE_DATABASE_NAME: '', QUEUE_DATABASE_USER: '', QUEUE_DATABASE_PORT: 5432, FILE_STORAGE_MAINTENANCE_INTERVAL: 1 * Hour, @@ -26,12 +24,10 @@ export interface EnvVariables { QUEUE_TTL: number; QUEUE_RETRY_COUNT: number; QUEUE_MAINTENANCE_INTERVAL: number; - HTR_CLI_IMAGES_FOLDER: string; + DATA_DIR: string; HTR_CLI_BINARY_PATH: string; - HTR_CLI_MODELS_FOLDER: string; QUEUE_DRIVER: string; QUEUE_DATABASE_PASSWORD: string; - QUEUE_DATABASE_NAME: string; QUEUE_DATABASE_USER: string; QUEUE_DATABASE_PORT: number; FILE_STORAGE_MAINTENANCE_INTERVAL: number; @@ -40,7 +36,13 @@ export interface EnvVariables { IMAGE_MAX_DIMENSION: number; } -export function parseEnv(rawEnv: Record): EnvVariables { +export interface ComputedEnvVariables extends EnvVariables { + HTR_CLI_IMAGES_FOLDER: string; + HTR_CLI_MODELS_FOLDER: string; + QUEUE_DATABASE_NAME: string; +} + +export function parseEnv(rawEnv: Record): ComputedEnvVariables { const output: EnvVariables = { ...defaultEnvValues, }; @@ -48,7 +50,7 @@ export function parseEnv(rawEnv: Record): EnvVariabl for (const [key, value] of Object.entries(defaultEnvValues)) { const rawEnvValue = rawEnv[key]; - if (rawEnvValue === undefined) continue; + if (rawEnvValue === undefined || rawEnvValue === '') continue; const typedKey = key as keyof EnvVariables; @@ -63,19 +65,37 @@ export function parseEnv(rawEnv: Record): EnvVariabl } } - return output; + // Derive paths from DATA_DIR + let queueDatabaseName: string; + if (output.QUEUE_DRIVER === 'sqlite') { + queueDatabaseName = `${output.DATA_DIR}/queue.sqlite3`; + } else { + // For PostgreSQL, use env var or default to 'transcribe' + queueDatabaseName = rawEnv['QUEUE_DATABASE_NAME'] || 'transcribe'; + } + + const computed: ComputedEnvVariables = { + ...output, + HTR_CLI_IMAGES_FOLDER: `${output.DATA_DIR}/images`, + HTR_CLI_MODELS_FOLDER: `${output.DATA_DIR}/models`, + QUEUE_DATABASE_NAME: queueDatabaseName, + }; + + return computed; } // Should always be called after require('dotenv').config() -const env = () => { - return parseEnv( - Object.keys(defaultEnvValues) - .reduce((env: Record, key) => { - env[key] = process.env[key]; - return env; - }, {}), - ); +const env = (): ComputedEnvVariables => { + const rawEnv = Object.keys(defaultEnvValues) + .reduce((env: Record, key) => { + env[key] = process.env[key]; + return env; + }, {} as Record); + // Also include QUEUE_DATABASE_NAME for PostgreSQL driver + rawEnv['QUEUE_DATABASE_NAME'] = process.env['QUEUE_DATABASE_NAME']; + + return parseEnv(rawEnv); }; export default env; diff --git a/packages/transcribe/src/services/createQueue.ts b/packages/transcribe/src/services/createQueue.ts index 545966cf5a..1688887c73 100644 --- a/packages/transcribe/src/services/createQueue.ts +++ b/packages/transcribe/src/services/createQueue.ts @@ -1,11 +1,11 @@ import Logger from '@joplin/utils/Logger'; import PgBossQueue from './queue/PgBossQueue'; import SqliteQueue from './queue/SqliteQueue'; -import { EnvVariables } from '../env'; +import { ComputedEnvVariables } from '../env'; const logger = Logger.create('createQueue'); -const createQueue = async (envVariables: EnvVariables, isPrimary: boolean) => { +const createQueue = async (envVariables: ComputedEnvVariables, isPrimary: boolean) => { logger.info('Choosing queue'); if (envVariables.QUEUE_DRIVER === 'pg') { diff --git a/packages/transcribe/src/testUtils.ts b/packages/transcribe/src/testUtils.ts index ab5a7f796c..40e3993ad4 100644 --- a/packages/transcribe/src/testUtils.ts +++ b/packages/transcribe/src/testUtils.ts @@ -1,12 +1,15 @@ import { remove } from 'fs-extra'; import createQueue from './services/createQueue'; -import env from './env'; +import env, { ComputedEnvVariables } from './env'; export const initDb = async (sqliteFile: string) => { const envVariables = env(); - envVariables.QUEUE_DRIVER = 'sqlite'; - envVariables.QUEUE_DATABASE_NAME = sqliteFile; - const queue = await createQueue(envVariables, true); + const testEnv: ComputedEnvVariables = { + ...envVariables, + QUEUE_DRIVER: 'sqlite', + QUEUE_DATABASE_NAME: sqliteFile, + }; + const queue = await createQueue(testEnv, true); return queue; };