Chore: Refactor and simplify Transcribe server (#14462)

This commit is contained in:
Laurent Cozic
2026-02-25 17:46:08 +00:00
committed by GitHub
parent 5d9a6151ea
commit d75d0df88a
14 changed files with 181 additions and 91 deletions
+1
View File
@@ -17,3 +17,4 @@ packages/server/db-*.sqlite
packages/server/dist/
packages/server/logs/
packages/server/temp/
packages/transcribe/.env
+23 -25
View File
@@ -1,35 +1,33 @@
# Joplin Transcribe Configuration
#
# Copy this file to .env-transcribe and update the values.
# =============================================================================
# Required
# -----------------------------------------------------------------------------
# =============================================================================
SERVER_PORT=4567
# Set a secure API key for authentication
API_KEY=changeme
API_KEY=random-string
QUEUE_TTL=900000
QUEUE_RETRY_COUNT=2
QUEUE_MAINTENANCE_INTERVAL=30000
IMAGE_MAX_DIMENSION=400
# =============================================================================
# Optional (defaults are set in the Docker image)
# =============================================================================
HTR_CLI_DOCKER_IMAGE=joplin/htr-cli:latest
# Fullpath to images folder e.g.:
#HTR_CLI_IMAGES_FOLDER=/home/user/joplin/packages/transcribe/images
HTR_CLI_IMAGES_FOLDER=
# Server port (default: 4567)
# SERVER_PORT=4567
QUEUE_DRIVER=pg
# Maximum image dimension for processing (default: 400)
# IMAGE_MAX_DIMENSION=400
# Queue driver: sqlite (default) or pg
# QUEUE_DRIVER=sqlite
FILE_STORAGE_MAINTENANCE_INTERVAL=3600000
FILE_STORAGE_TTL=604800000 # one week
# =============================================================================
# PostgreSQL settings (only if QUEUE_DRIVER=pg)
# =============================================================================
# =============================================================================
# Queue driver
# -----------------------------------------------------------------------------
# =============================================================================
#
# QUEUE_DATABASE_NAME=./queue.sqlite3
QUEUE_DATABASE_NAME=transcribe
QUEUE_DATABASE_USER=transcribe
QUEUE_DATABASE_PASSWORD=transcribe
QUEUE_DATABASE_PORT=5432
QUEUE_DATABASE_HOST=localhost
# QUEUE_DATABASE_NAME=transcribe
# QUEUE_DATABASE_USER=transcribe
# QUEUE_DATABASE_PASSWORD=transcribe
# QUEUE_DATABASE_PORT=5432
# QUEUE_DATABASE_HOST=localhost
+8 -5
View File
@@ -18,7 +18,7 @@ RUN wget -q https://github.com/ggml-org/llama.cpp/releases/download/b5449/llama-
&& chmod +x /opt/llama/build/bin/llama-mtmd-cli
# Create non-root user for security
RUN groupadd -r transcribe && useradd -r -g transcribe transcribe
RUN groupadd -r transcribe && useradd -r -g transcribe -m transcribe
WORKDIR /app
@@ -43,17 +43,20 @@ RUN BUILD_SEQUENCIAL=1 yarn install --inline-builds \
&& yarn cache clean \
&& rm -rf .yarn/berry
# Create images directory and set permissions
RUN mkdir -p /app/packages/transcribe/images \
&& chown -R transcribe:transcribe /app/packages/transcribe/images
# Create data directory and set permissions
RUN mkdir -p /data/images \
&& chown -R transcribe:transcribe /data
WORKDIR /app/packages/transcribe
# Switch to non-root user
USER transcribe
# Set environment variable for embedded llama.cpp binary
# Set environment variables
ENV HTR_CLI_BINARY_PATH=/opt/llama/build/bin/llama-mtmd-cli
ENV DATA_DIR=/data
ENV QUEUE_DRIVER=sqlite
# Start the Node.js application
ENTRYPOINT ["/usr/bin/tini", "--"]
CMD ["yarn", "start"]
+44
View File
@@ -0,0 +1,44 @@
# Standalone docker-compose for Joplin Transcribe
#
# Uses SQLite for the queue (no external database needed).
# Data is stored in a named volume for proper permissions.
#
# Usage:
#
# 1. Download models:
# mkdir -p ./data/models
# wget -O ./data/models/Model-7.6B-Q4_K_M.gguf https://huggingface.co/openbmb/MiniCPM-o-2_6-gguf/resolve/main/Model-7.6B-Q4_K_M.gguf
# wget -O ./data/models/mmproj-model-f16.gguf https://huggingface.co/openbmb/MiniCPM-o-2_6-gguf/resolve/main/mmproj-model-f16.gguf
#
# 2. Configure:
# cp .env-transcribe-sample .env
# # Edit .env and set API_KEY
#
# 3. Run:
# docker compose -f docker-compose.transcribe.yml up
volumes:
transcribe-data:
services:
transcribe:
image: joplin/transcribe:amd64-latest
ports:
- "4567:4567"
volumes:
- transcribe-data:/data
- ./data/models:/data/models:ro
restart: unless-stopped
# Security: limit resources to prevent runaway processes
deploy:
resources:
limits:
memory: 16G
cpus: '4'
# Security: read-only root filesystem
read_only: true
tmpfs:
- /tmp
- /home/transcribe/.cache
env_file:
- .env
+24 -28
View File
@@ -4,34 +4,34 @@
The transcribe server embeds the llama.cpp binary directly in the Docker image. The AI models must be downloaded separately and mounted as a volume.
### 1. Download the models
Create a directory for the models and download them:
### 1. Create data directory and download models
```shell
mkdir -p ./data/transcribe-models
wget -O ./data/transcribe-models/Model-7.6B-Q4_K_M.gguf https://huggingface.co/openbmb/MiniCPM-o-2_6-gguf/resolve/main/Model-7.6B-Q4_K_M.gguf
wget -O ./data/transcribe-models/mmproj-model-f16.gguf https://huggingface.co/openbmb/MiniCPM-o-2_6-gguf/resolve/main/mmproj-model-f16.gguf
mkdir -p ./data/models
chmod 755 ./data
wget -O ./data/models/Model-7.6B-Q4_K_M.gguf https://huggingface.co/openbmb/MiniCPM-o-2_6-gguf/resolve/main/Model-7.6B-Q4_K_M.gguf
wget -O ./data/models/mmproj-model-f16.gguf https://huggingface.co/openbmb/MiniCPM-o-2_6-gguf/resolve/main/mmproj-model-f16.gguf
```
### 2. Configure environment
1. Copy `.env-transcribe-sample` to your Docker configuration directory.
2. Rename it to `.env-transcribe`.
3. Set `API_KEY` to a secure value.
### 3. Run the server
The models directory on your host is mounted into the container at `/opt/models`. The `HTR_CLI_MODELS_FOLDER` environment variable refers to the path inside the container, not the host path.
```shell
docker build -f ./Dockerfile.transcribe -t transcribe .
docker run --env-file .env-transcribe -p 4567:4567 \
-v ./packages/transcribe/images:/app/packages/transcribe/images \
-v ./data/transcribe-models:/opt/models:ro \
-e HTR_CLI_MODELS_FOLDER=/opt/models \
transcribe
docker run --rm --env-file .env-transcribe -p 4567:4567 \
-v ./data:/data \
joplin/transcribe:amd64-latest
```
The container automatically creates the following inside `/data`:
- `images/` - uploaded images
- `models/` - AI models (you provide these)
- `queue.sqlite3` - job queue database
## Using Docker Compose
The minimal configuration is provided in `.env-sample` and `docker-compose.server.yml`.
@@ -55,8 +55,6 @@ The transcribe container runs with these security measures:
- **Resource limits**: Memory and CPU limits prevent runaway processes
- **No Docker socket**: Unlike previous versions, no Docker socket mount is required
---
# Development Setup
## Testing
@@ -70,13 +68,6 @@ Run all tests with:
yarn test-all
```
## Database Setup
The queue driver can be **SQLite** or **PostgreSQL**:
* Set `QUEUE_DRIVER` to `sqlite` or `pg`.
* If using SQLite, `QUEUE_DATABASE_NAME` specifies the path to the database file.
## Starting the Server
From `packages/transcribe`, run:
@@ -87,11 +78,18 @@ yarn start
### Environment variables
Required:
- `API_KEY`: Authentication key for API requests
- `DATA_DIR`: Base directory for all data (images, models, database)
- `HTR_CLI_BINARY_PATH`: Path to the llama-mtmd-cli binary
- `HTR_CLI_MODELS_FOLDER`: Path to the models directory
- `HTR_CLI_IMAGES_FOLDER`: Path where uploaded images are stored
---
Optional:
- `QUEUE_DRIVER`: `sqlite` (default in Docker) or `pg` for PostgreSQL
The following paths are automatically derived from `DATA_DIR`:
- `$DATA_DIR/images` - uploaded images
- `$DATA_DIR/models` - AI models
- `$DATA_DIR/queue.sqlite3` - SQLite database (when using sqlite driver)
# API Endpoints
@@ -124,8 +122,6 @@ curl --request POST \
--form file=@/home/js/Pictures/2025-07-24_17-42_1.png
```
---
## GET `/transcribe/{jobId}`
Fetches the result of a transcription job created with `POST /transcribe`.
+3 -4
View File
@@ -6,7 +6,7 @@ import initiateLogger from '../services/initiateLogger';
import createQueue from '../services/createQueue';
import FileStorage from '../services/FileStorage';
import router from './router';
import env, { EnvVariables } from '../env';
import env, { ComputedEnvVariables } from '../env';
import HtrCli from '../core/HtrCli';
import JobProcessor from '../workers/JobProcessor';
@@ -47,11 +47,10 @@ const init = async (logger: LoggerWrapper) => {
logger.info('Server started successfully');
};
const checkServerConfigurations = (envVariables: EnvVariables) => {
const checkServerConfigurations = (envVariables: ComputedEnvVariables) => {
if (!envVariables.API_KEY) throw Error('API_KEY environment variable not set.');
if (!envVariables.HTR_CLI_IMAGES_FOLDER) throw Error('HTR_CLI_IMAGES_FOLDER environment variable not set. This should point to a folder where images will be stored.');
if (!envVariables.DATA_DIR) throw Error('DATA_DIR environment variable not set. This should point to a folder where data will be stored.');
if (!envVariables.HTR_CLI_BINARY_PATH) throw Error('HTR_CLI_BINARY_PATH environment variable not set. This should point to the llama-mtmd-cli binary.');
if (!envVariables.HTR_CLI_MODELS_FOLDER) throw Error('HTR_CLI_MODELS_FOLDER environment variable not set. This should point to the folder containing the AI models.');
};
const main = async () => {
@@ -39,6 +39,7 @@ describe('createJob', () => {
sendToQueue: (data: JobData) => queue.send(data),
imageMaxDimension: 400,
randomName: 'test_file_resized-1',
imagesFolder: './images',
};
const result = await createJob(requirements);
const job = await queue.fetch();
@@ -58,6 +59,7 @@ describe('createJob', () => {
sendToQueue: (data: JobData) => queue.send(data),
imageMaxDimension: 400,
randomName: 'test_file_resized-2',
imagesFolder: './images',
};
expect(async () => createJob(requirements)).rejects.toThrow();
@@ -78,6 +80,7 @@ describe('createJob', () => {
sendToQueue: (data: JobData) => queue.send(data),
imageMaxDimension: 400,
randomName: 'test_file_resized-3',
imagesFolder: './images',
};
await createJob(requirements);
@@ -11,10 +11,11 @@ type CreateJobContext = {
filepath: string;
imageMaxDimension: number;
randomName: string;
imagesFolder: string;
};
const createJob = async (context: CreateJobContext) => {
const imageResizedPath = join('images', context.randomName);
const imageResizedPath = join(context.imagesFolder, context.randomName);
await resizeImageAndDeleteInput(context.filepath, imageResizedPath, context.imageMaxDimension);
@@ -1,23 +1,44 @@
import isFileAValidImage, { supportedImageFormat } from './isFileAValidImage';
// Map MIME types to file extensions (for filenames) and detected extensions (from file-type library)
const mimeToFileExt: Record<string, string> = {
'image/png': 'png',
'image/jpeg': 'jpeg',
'image/bmp': 'bmp',
'application/zip': 'zip',
'application/pdf': 'pdf',
};
const mimeToDetectedExt: Record<string, string> = {
'image/png': 'png',
'image/jpeg': 'jpg',
'image/bmp': 'bmp',
'application/zip': 'zip',
'application/pdf': 'pdf',
};
describe('isFileAValidImage', () => {
it.each(
supportedImageFormat,
)('should be valid if the format is supported: %s', async (format: string) => {
const fileName = `sample.${format.split('/')[1]}`;
const fileExt = mimeToFileExt[format];
const detectedExt = mimeToDetectedExt[format];
const fileName = `sample.${fileExt}`;
const fullFilePath = `./test-cases/${fileName}`;
const [isValid, fileFormat] = await isFileAValidImage(fullFilePath);
expect(isValid).toBe(true);
expect(fileFormat).toBe(format);
expect(fileFormat).toBe(detectedExt);
});
it.each(['application/zip', 'application/pdf'])('should not be valid if the format is not supported: %s', async (format: string) => {
const fileName = `sample.${format.split('/')[1]}`;
const fileExt = mimeToFileExt[format];
const detectedExt = mimeToDetectedExt[format];
const fileName = `sample.${fileExt}`;
const fullFilePath = `./test-cases/${fileName}`;
const [isValid, fileFormat] = await isFileAValidImage(fullFilePath);
expect(isValid).toBe(false);
expect(fileFormat).toBe(format);
expect(fileFormat).toBe(detectedExt);
});
it('should throw an error if it is not possible to determine the type of the file', async () => {
@@ -10,7 +10,7 @@ const isFileAValidImage = async (filepath: string): Promise<[boolean, string]> =
}
const isValid = supportedImageFormat.includes(result.mime);
return [isValid, result.mime];
return [isValid, result.ext];
};
export default isFileAValidImage;
@@ -29,6 +29,7 @@ export const parseCreateJobRequest = async (ctx: AppContext) => {
filepath: file.filepath,
imageMaxDimension: env().IMAGE_MAX_DIMENSION,
randomName: createFilename(formatProvided),
imagesFolder: env().HTR_CLI_IMAGES_FOLDER,
};
};
+37 -17
View File
@@ -6,12 +6,10 @@ export const defaultEnvValues: EnvVariables = {
QUEUE_TTL: 15 * Minute,
QUEUE_RETRY_COUNT: 2,
QUEUE_MAINTENANCE_INTERVAL: 60 * Second,
HTR_CLI_IMAGES_FOLDER: '',
DATA_DIR: '',
HTR_CLI_BINARY_PATH: '',
HTR_CLI_MODELS_FOLDER: '',
QUEUE_DRIVER: 'pg', // 'sqlite'
QUEUE_DATABASE_PASSWORD: '',
QUEUE_DATABASE_NAME: '',
QUEUE_DATABASE_USER: '',
QUEUE_DATABASE_PORT: 5432,
FILE_STORAGE_MAINTENANCE_INTERVAL: 1 * Hour,
@@ -26,12 +24,10 @@ export interface EnvVariables {
QUEUE_TTL: number;
QUEUE_RETRY_COUNT: number;
QUEUE_MAINTENANCE_INTERVAL: number;
HTR_CLI_IMAGES_FOLDER: string;
DATA_DIR: string;
HTR_CLI_BINARY_PATH: string;
HTR_CLI_MODELS_FOLDER: string;
QUEUE_DRIVER: string;
QUEUE_DATABASE_PASSWORD: string;
QUEUE_DATABASE_NAME: string;
QUEUE_DATABASE_USER: string;
QUEUE_DATABASE_PORT: number;
FILE_STORAGE_MAINTENANCE_INTERVAL: number;
@@ -40,7 +36,13 @@ export interface EnvVariables {
IMAGE_MAX_DIMENSION: number;
}
export function parseEnv(rawEnv: Record<string, string | undefined>): EnvVariables {
export interface ComputedEnvVariables extends EnvVariables {
HTR_CLI_IMAGES_FOLDER: string;
HTR_CLI_MODELS_FOLDER: string;
QUEUE_DATABASE_NAME: string;
}
export function parseEnv(rawEnv: Record<string, string | undefined>): ComputedEnvVariables {
const output: EnvVariables = {
...defaultEnvValues,
};
@@ -48,7 +50,7 @@ export function parseEnv(rawEnv: Record<string, string | undefined>): EnvVariabl
for (const [key, value] of Object.entries(defaultEnvValues)) {
const rawEnvValue = rawEnv[key];
if (rawEnvValue === undefined) continue;
if (rawEnvValue === undefined || rawEnvValue === '') continue;
const typedKey = key as keyof EnvVariables;
@@ -63,19 +65,37 @@ export function parseEnv(rawEnv: Record<string, string | undefined>): EnvVariabl
}
}
return output;
// Derive paths from DATA_DIR
let queueDatabaseName: string;
if (output.QUEUE_DRIVER === 'sqlite') {
queueDatabaseName = `${output.DATA_DIR}/queue.sqlite3`;
} else {
// For PostgreSQL, use env var or default to 'transcribe'
queueDatabaseName = rawEnv['QUEUE_DATABASE_NAME'] || 'transcribe';
}
const computed: ComputedEnvVariables = {
...output,
HTR_CLI_IMAGES_FOLDER: `${output.DATA_DIR}/images`,
HTR_CLI_MODELS_FOLDER: `${output.DATA_DIR}/models`,
QUEUE_DATABASE_NAME: queueDatabaseName,
};
return computed;
}
// Should always be called after require('dotenv').config()
const env = () => {
return parseEnv(
Object.keys(defaultEnvValues)
.reduce((env: Record<string, string | undefined>, key) => {
env[key] = process.env[key];
return env;
}, {}),
);
const env = (): ComputedEnvVariables => {
const rawEnv = Object.keys(defaultEnvValues)
.reduce((env: Record<string, string | undefined>, key) => {
env[key] = process.env[key];
return env;
}, {} as Record<string, string | undefined>);
// Also include QUEUE_DATABASE_NAME for PostgreSQL driver
rawEnv['QUEUE_DATABASE_NAME'] = process.env['QUEUE_DATABASE_NAME'];
return parseEnv(rawEnv);
};
export default env;
@@ -1,11 +1,11 @@
import Logger from '@joplin/utils/Logger';
import PgBossQueue from './queue/PgBossQueue';
import SqliteQueue from './queue/SqliteQueue';
import { EnvVariables } from '../env';
import { ComputedEnvVariables } from '../env';
const logger = Logger.create('createQueue');
const createQueue = async (envVariables: EnvVariables, isPrimary: boolean) => {
const createQueue = async (envVariables: ComputedEnvVariables, isPrimary: boolean) => {
logger.info('Choosing queue');
if (envVariables.QUEUE_DRIVER === 'pg') {
+7 -4
View File
@@ -1,12 +1,15 @@
import { remove } from 'fs-extra';
import createQueue from './services/createQueue';
import env from './env';
import env, { ComputedEnvVariables } from './env';
export const initDb = async (sqliteFile: string) => {
const envVariables = env();
envVariables.QUEUE_DRIVER = 'sqlite';
envVariables.QUEUE_DATABASE_NAME = sqliteFile;
const queue = await createQueue(envVariables, true);
const testEnv: ComputedEnvVariables = {
...envVariables,
QUEUE_DRIVER: 'sqlite',
QUEUE_DATABASE_NAME: sqliteFile,
};
const queue = await createQueue(testEnv, true);
return queue;
};