Chore: Refactor and simplify Transcribe server (#14462)

2026-05-07 20:02:45 +00:00 · 2026-02-25 17:46:08 +00:00
parent 5d9a6151ea
commit d75d0df88a
14 changed files with 181 additions and 91 deletions
@@ -17,3 +17,4 @@ packages/server/db-*.sqlite
 packages/server/dist/
 packages/server/logs/
 packages/server/temp/
+packages/transcribe/.env
@@ -1,35 +1,33 @@
+# Joplin Transcribe Configuration
+#
+# Copy this file to .env-transcribe and update the values.
+
 # =============================================================================
 # Required
-# -----------------------------------------------------------------------------
 # =============================================================================

-SERVER_PORT=4567
+# Set a secure API key for authentication
+API_KEY=changeme

-API_KEY=random-string
-QUEUE_TTL=900000
-QUEUE_RETRY_COUNT=2
-QUEUE_MAINTENANCE_INTERVAL=30000
-IMAGE_MAX_DIMENSION=400
+# =============================================================================
+# Optional (defaults are set in the Docker image)
+# =============================================================================

-HTR_CLI_DOCKER_IMAGE=joplin/htr-cli:latest
-# Fullpath to images folder e.g.:
-#HTR_CLI_IMAGES_FOLDER=/home/user/joplin/packages/transcribe/images
-HTR_CLI_IMAGES_FOLDER=
+# Server port (default: 4567)
+# SERVER_PORT=4567

-QUEUE_DRIVER=pg
+# Maximum image dimension for processing (default: 400)
+# IMAGE_MAX_DIMENSION=400
+
+# Queue driver: sqlite (default) or pg
 # QUEUE_DRIVER=sqlite

-FILE_STORAGE_MAINTENANCE_INTERVAL=3600000
-FILE_STORAGE_TTL=604800000 # one week
+# =============================================================================
+# PostgreSQL settings (only if QUEUE_DRIVER=pg)
+# =============================================================================

-# =============================================================================
-# Queue driver
-# -----------------------------------------------------------------------------
-# =============================================================================
-#
-# QUEUE_DATABASE_NAME=./queue.sqlite3
-QUEUE_DATABASE_NAME=transcribe
-QUEUE_DATABASE_USER=transcribe
-QUEUE_DATABASE_PASSWORD=transcribe
-QUEUE_DATABASE_PORT=5432
-QUEUE_DATABASE_HOST=localhost
+# QUEUE_DATABASE_NAME=transcribe
+# QUEUE_DATABASE_USER=transcribe
+# QUEUE_DATABASE_PASSWORD=transcribe
+# QUEUE_DATABASE_PORT=5432
+# QUEUE_DATABASE_HOST=localhost
@@ -18,7 +18,7 @@ RUN wget -q https://github.com/ggml-org/llama.cpp/releases/download/b5449/llama-
    && chmod +x /opt/llama/build/bin/llama-mtmd-cli

 # Create non-root user for security
-RUN groupadd -r transcribe && useradd -r -g transcribe transcribe
+RUN groupadd -r transcribe && useradd -r -g transcribe -m transcribe

 WORKDIR /app

@@ -43,17 +43,20 @@ RUN BUILD_SEQUENCIAL=1 yarn install --inline-builds \
    && yarn cache clean \
    && rm -rf .yarn/berry

-# Create images directory and set permissions
-RUN mkdir -p /app/packages/transcribe/images \
-    && chown -R transcribe:transcribe /app/packages/transcribe/images
+# Create data directory and set permissions
+RUN mkdir -p /data/images \
+    && chown -R transcribe:transcribe /data

 WORKDIR /app/packages/transcribe

 # Switch to non-root user
 USER transcribe

-# Set environment variable for embedded llama.cpp binary
+# Set environment variables
 ENV HTR_CLI_BINARY_PATH=/opt/llama/build/bin/llama-mtmd-cli
+ENV DATA_DIR=/data
+ENV QUEUE_DRIVER=sqlite

 # Start the Node.js application
+ENTRYPOINT ["/usr/bin/tini", "--"]
 CMD ["yarn", "start"]
@@ -0,0 +1,44 @@
+# Standalone docker-compose for Joplin Transcribe
+#
+# Uses SQLite for the queue (no external database needed).
+# Data is stored in a named volume for proper permissions.
+#
+# Usage:
+#
+#   1. Download models:
+#      mkdir -p ./data/models
+#      wget -O ./data/models/Model-7.6B-Q4_K_M.gguf https://huggingface.co/openbmb/MiniCPM-o-2_6-gguf/resolve/main/Model-7.6B-Q4_K_M.gguf
+#      wget -O ./data/models/mmproj-model-f16.gguf https://huggingface.co/openbmb/MiniCPM-o-2_6-gguf/resolve/main/mmproj-model-f16.gguf
+#
+#   2. Configure:
+#      cp .env-transcribe-sample .env
+#      # Edit .env and set API_KEY
+#
+#   3. Run:
+#      docker compose -f docker-compose.transcribe.yml up
+
+volumes:
+    transcribe-data:
+
+services:
+    transcribe:
+        image: joplin/transcribe:amd64-latest
+        ports:
+            - "4567:4567"
+        volumes:
+            - transcribe-data:/data
+            - ./data/models:/data/models:ro
+        restart: unless-stopped
+        # Security: limit resources to prevent runaway processes
+        deploy:
+            resources:
+                limits:
+                    memory: 16G
+                    cpus: '4'
+        # Security: read-only root filesystem
+        read_only: true
+        tmpfs:
+            - /tmp
+            - /home/transcribe/.cache
+        env_file:
+            - .env
@@ -4,34 +4,34 @@

 The transcribe server embeds the llama.cpp binary directly in the Docker image. The AI models must be downloaded separately and mounted as a volume.

-### 1. Download the models
-
-Create a directory for the models and download them:
+### 1. Create data directory and download models

 ```shell
-mkdir -p ./data/transcribe-models
-wget -O ./data/transcribe-models/Model-7.6B-Q4_K_M.gguf https://huggingface.co/openbmb/MiniCPM-o-2_6-gguf/resolve/main/Model-7.6B-Q4_K_M.gguf
-wget -O ./data/transcribe-models/mmproj-model-f16.gguf https://huggingface.co/openbmb/MiniCPM-o-2_6-gguf/resolve/main/mmproj-model-f16.gguf
+mkdir -p ./data/models
+chmod 755 ./data
+wget -O ./data/models/Model-7.6B-Q4_K_M.gguf https://huggingface.co/openbmb/MiniCPM-o-2_6-gguf/resolve/main/Model-7.6B-Q4_K_M.gguf
+wget -O ./data/models/mmproj-model-f16.gguf https://huggingface.co/openbmb/MiniCPM-o-2_6-gguf/resolve/main/mmproj-model-f16.gguf
 ```

 ### 2. Configure environment

 1. Copy `.env-transcribe-sample` to your Docker configuration directory.
 2. Rename it to `.env-transcribe`.
+3. Set `API_KEY` to a secure value.

 ### 3. Run the server

-The models directory on your host is mounted into the container at `/opt/models`. The `HTR_CLI_MODELS_FOLDER` environment variable refers to the path inside the container, not the host path.
-
 ```shell
-docker build -f ./Dockerfile.transcribe -t transcribe .
-docker run --env-file .env-transcribe -p 4567:4567 \
-	-v ./packages/transcribe/images:/app/packages/transcribe/images \
-	-v ./data/transcribe-models:/opt/models:ro \
-	-e HTR_CLI_MODELS_FOLDER=/opt/models \
-	transcribe
+docker run --rm --env-file .env-transcribe -p 4567:4567 \
+	-v ./data:/data \
+	joplin/transcribe:amd64-latest
 ```

+The container automatically creates the following inside `/data`:
+- `images/` - uploaded images
+- `models/` - AI models (you provide these)
+- `queue.sqlite3` - job queue database
+
 ## Using Docker Compose

 The minimal configuration is provided in `.env-sample` and `docker-compose.server.yml`.
@@ -55,8 +55,6 @@ The transcribe container runs with these security measures:
 - **Resource limits**: Memory and CPU limits prevent runaway processes
 - **No Docker socket**: Unlike previous versions, no Docker socket mount is required

---
-
 # Development Setup

 ## Testing
@@ -70,13 +68,6 @@ Run all tests with:
 yarn test-all
 ```

-## Database Setup
-
-The queue driver can be **SQLite** or **PostgreSQL**:
-
-* Set `QUEUE_DRIVER` to `sqlite` or `pg`.
-* If using SQLite, `QUEUE_DATABASE_NAME` specifies the path to the database file.
-
 ## Starting the Server

 From `packages/transcribe`, run:
@@ -87,11 +78,18 @@ yarn start

 ### Environment variables

+Required:
+- `API_KEY`: Authentication key for API requests
+- `DATA_DIR`: Base directory for all data (images, models, database)
 - `HTR_CLI_BINARY_PATH`: Path to the llama-mtmd-cli binary
- `HTR_CLI_MODELS_FOLDER`: Path to the models directory
- `HTR_CLI_IMAGES_FOLDER`: Path where uploaded images are stored

---
+Optional:
+- `QUEUE_DRIVER`: `sqlite` (default in Docker) or `pg` for PostgreSQL
+
+The following paths are automatically derived from `DATA_DIR`:
+- `$DATA_DIR/images` - uploaded images
+- `$DATA_DIR/models` - AI models
+- `$DATA_DIR/queue.sqlite3` - SQLite database (when using sqlite driver)

 # API Endpoints

@@ -124,8 +122,6 @@ curl --request POST \
 	--form file=@/home/js/Pictures/2025-07-24_17-42_1.png
 ```

---
-
 ## GET `/transcribe/{jobId}`

 Fetches the result of a transcription job created with `POST /transcribe`.
@@ -6,7 +6,7 @@ import initiateLogger from '../services/initiateLogger';
 import createQueue from '../services/createQueue';
 import FileStorage from '../services/FileStorage';
 import router from './router';
-import env, { EnvVariables } from '../env';
+import env, { ComputedEnvVariables } from '../env';
 import HtrCli from '../core/HtrCli';
 import JobProcessor from '../workers/JobProcessor';

@@ -47,11 +47,10 @@ const init = async (logger: LoggerWrapper) => {
 	logger.info('Server started successfully');
 };

-const checkServerConfigurations = (envVariables: EnvVariables) => {
+const checkServerConfigurations = (envVariables: ComputedEnvVariables) => {
 	if (!envVariables.API_KEY) throw Error('API_KEY environment variable not set.');
-	if (!envVariables.HTR_CLI_IMAGES_FOLDER) throw Error('HTR_CLI_IMAGES_FOLDER environment variable not set. This should point to a folder where images will be stored.');
+	if (!envVariables.DATA_DIR) throw Error('DATA_DIR environment variable not set. This should point to a folder where data will be stored.');
 	if (!envVariables.HTR_CLI_BINARY_PATH) throw Error('HTR_CLI_BINARY_PATH environment variable not set. This should point to the llama-mtmd-cli binary.');
-	if (!envVariables.HTR_CLI_MODELS_FOLDER) throw Error('HTR_CLI_MODELS_FOLDER environment variable not set. This should point to the folder containing the AI models.');
 };

 const main = async () => {
@@ -39,6 +39,7 @@ describe('createJob', () => {
 			sendToQueue: (data: JobData) => queue.send(data),
 			imageMaxDimension: 400,
 			randomName: 'test_file_resized-1',
+			imagesFolder: './images',
 		};
 		const result = await createJob(requirements);
 		const job = await queue.fetch();
@@ -58,6 +59,7 @@ describe('createJob', () => {
 			sendToQueue: (data: JobData) => queue.send(data),
 			imageMaxDimension: 400,
 			randomName: 'test_file_resized-2',
+			imagesFolder: './images',
 		};

 		expect(async () => createJob(requirements)).rejects.toThrow();
@@ -78,6 +80,7 @@ describe('createJob', () => {
 			sendToQueue: (data: JobData) => queue.send(data),
 			imageMaxDimension: 400,
 			randomName: 'test_file_resized-3',
+			imagesFolder: './images',
 		};

 		await createJob(requirements);
@@ -11,10 +11,11 @@ type CreateJobContext = {
 	filepath: string;
 	imageMaxDimension: number;
 	randomName: string;
+	imagesFolder: string;
 };

 const createJob = async (context: CreateJobContext) => {
-	const imageResizedPath = join('images', context.randomName);
+	const imageResizedPath = join(context.imagesFolder, context.randomName);

 	await resizeImageAndDeleteInput(context.filepath, imageResizedPath, context.imageMaxDimension);

@@ -1,23 +1,44 @@
 import isFileAValidImage, { supportedImageFormat } from './isFileAValidImage';

+// Map MIME types to file extensions (for filenames) and detected extensions (from file-type library)
+const mimeToFileExt: Record<string, string> = {
+	'image/png': 'png',
+	'image/jpeg': 'jpeg',
+	'image/bmp': 'bmp',
+	'application/zip': 'zip',
+	'application/pdf': 'pdf',
+};
+
+const mimeToDetectedExt: Record<string, string> = {
+	'image/png': 'png',
+	'image/jpeg': 'jpg',
+	'image/bmp': 'bmp',
+	'application/zip': 'zip',
+	'application/pdf': 'pdf',
+};
+
 describe('isFileAValidImage', () => {

 	it.each(
 		supportedImageFormat,
 	)('should be valid if the format is supported: %s', async (format: string) => {
-		const fileName = `sample.${format.split('/')[1]}`;
+		const fileExt = mimeToFileExt[format];
+		const detectedExt = mimeToDetectedExt[format];
+		const fileName = `sample.${fileExt}`;
 		const fullFilePath = `./test-cases/${fileName}`;
 		const [isValid, fileFormat] = await isFileAValidImage(fullFilePath);
 		expect(isValid).toBe(true);
-		expect(fileFormat).toBe(format);
+		expect(fileFormat).toBe(detectedExt);
 	});

 	it.each(['application/zip', 'application/pdf'])('should not be valid if the format is not supported: %s', async (format: string) => {
-		const fileName = `sample.${format.split('/')[1]}`;
+		const fileExt = mimeToFileExt[format];
+		const detectedExt = mimeToDetectedExt[format];
+		const fileName = `sample.${fileExt}`;
 		const fullFilePath = `./test-cases/${fileName}`;
 		const [isValid, fileFormat] = await isFileAValidImage(fullFilePath);
 		expect(isValid).toBe(false);
-		expect(fileFormat).toBe(format);
+		expect(fileFormat).toBe(detectedExt);
 	});

 	it('should throw an error if it is not possible to determine the type of the file', async () => {
@@ -10,7 +10,7 @@ const isFileAValidImage = async (filepath: string): Promise<[boolean, string]> =
 	}

 	const isValid = supportedImageFormat.includes(result.mime);
-	return [isValid, result.mime];
+	return [isValid, result.ext];
 };

 export default isFileAValidImage;
@@ -29,6 +29,7 @@ export const parseCreateJobRequest = async (ctx: AppContext) => {
 		filepath: file.filepath,
 		imageMaxDimension: env().IMAGE_MAX_DIMENSION,
 		randomName: createFilename(formatProvided),
+		imagesFolder: env().HTR_CLI_IMAGES_FOLDER,
 	};
 };

@@ -6,12 +6,10 @@ export const defaultEnvValues: EnvVariables = {
 	QUEUE_TTL: 15 * Minute,
 	QUEUE_RETRY_COUNT: 2,
 	QUEUE_MAINTENANCE_INTERVAL: 60 * Second,
-	HTR_CLI_IMAGES_FOLDER: '',
+	DATA_DIR: '',
 	HTR_CLI_BINARY_PATH: '',
-	HTR_CLI_MODELS_FOLDER: '',
 	QUEUE_DRIVER: 'pg', // 'sqlite'
 	QUEUE_DATABASE_PASSWORD: '',
-	QUEUE_DATABASE_NAME: '',
 	QUEUE_DATABASE_USER: '',
 	QUEUE_DATABASE_PORT: 5432,
 	FILE_STORAGE_MAINTENANCE_INTERVAL: 1 * Hour,
@@ -26,12 +24,10 @@ export interface EnvVariables {
 	QUEUE_TTL: number;
 	QUEUE_RETRY_COUNT: number;
 	QUEUE_MAINTENANCE_INTERVAL: number;
-	HTR_CLI_IMAGES_FOLDER: string;
+	DATA_DIR: string;
 	HTR_CLI_BINARY_PATH: string;
-	HTR_CLI_MODELS_FOLDER: string;
 	QUEUE_DRIVER: string;
 	QUEUE_DATABASE_PASSWORD: string;
-	QUEUE_DATABASE_NAME: string;
 	QUEUE_DATABASE_USER: string;
 	QUEUE_DATABASE_PORT: number;
 	FILE_STORAGE_MAINTENANCE_INTERVAL: number;
@@ -40,7 +36,13 @@ export interface EnvVariables {
 	IMAGE_MAX_DIMENSION: number;
 }

-export function parseEnv(rawEnv: Record<string, string | undefined>): EnvVariables {
+export interface ComputedEnvVariables extends EnvVariables {
+	HTR_CLI_IMAGES_FOLDER: string;
+	HTR_CLI_MODELS_FOLDER: string;
+	QUEUE_DATABASE_NAME: string;
+}
+
+export function parseEnv(rawEnv: Record<string, string | undefined>): ComputedEnvVariables {
 	const output: EnvVariables = {
 		...defaultEnvValues,
 	};
@@ -48,7 +50,7 @@ export function parseEnv(rawEnv: Record<string, string | undefined>): EnvVariabl
 	for (const [key, value] of Object.entries(defaultEnvValues)) {
 		const rawEnvValue = rawEnv[key];

-		if (rawEnvValue === undefined) continue;
+		if (rawEnvValue === undefined || rawEnvValue === '') continue;

 		const typedKey = key as keyof EnvVariables;

@@ -63,19 +65,37 @@ export function parseEnv(rawEnv: Record<string, string | undefined>): EnvVariabl
 		}
 	}

-	return output;
+	// Derive paths from DATA_DIR
+	let queueDatabaseName: string;
+	if (output.QUEUE_DRIVER === 'sqlite') {
+		queueDatabaseName = `${output.DATA_DIR}/queue.sqlite3`;
+	} else {
+		// For PostgreSQL, use env var or default to 'transcribe'
+		queueDatabaseName = rawEnv['QUEUE_DATABASE_NAME'] || 'transcribe';
+	}
+
+	const computed: ComputedEnvVariables = {
+		...output,
+		HTR_CLI_IMAGES_FOLDER: `${output.DATA_DIR}/images`,
+		HTR_CLI_MODELS_FOLDER: `${output.DATA_DIR}/models`,
+		QUEUE_DATABASE_NAME: queueDatabaseName,
+	};
+
+	return computed;
 }

 // Should always be called after require('dotenv').config()
-const env = () => {
-	return parseEnv(
-		Object.keys(defaultEnvValues)
-			.reduce((env: Record<string, string | undefined>, key) => {
-				env[key] = process.env[key];
-				return env;
-			}, {}),
-	);
+const env = (): ComputedEnvVariables => {
+	const rawEnv = Object.keys(defaultEnvValues)
+		.reduce((env: Record<string, string | undefined>, key) => {
+			env[key] = process.env[key];
+			return env;
+		}, {} as Record<string, string | undefined>);

+	// Also include QUEUE_DATABASE_NAME for PostgreSQL driver
+	rawEnv['QUEUE_DATABASE_NAME'] = process.env['QUEUE_DATABASE_NAME'];
+
+	return parseEnv(rawEnv);
 };

 export default env;
@@ -1,11 +1,11 @@
 import Logger from '@joplin/utils/Logger';
 import PgBossQueue from './queue/PgBossQueue';
 import SqliteQueue from './queue/SqliteQueue';
-import { EnvVariables } from '../env';
+import { ComputedEnvVariables } from '../env';

 const logger = Logger.create('createQueue');

-const createQueue = async (envVariables: EnvVariables, isPrimary: boolean) => {
+const createQueue = async (envVariables: ComputedEnvVariables, isPrimary: boolean) => {
 	logger.info('Choosing queue');

 	if (envVariables.QUEUE_DRIVER === 'pg') {
@@ -1,12 +1,15 @@
 import { remove } from 'fs-extra';
 import createQueue from './services/createQueue';
-import env from './env';
+import env, { ComputedEnvVariables } from './env';

 export const initDb = async (sqliteFile: string) => {
 	const envVariables = env();
-	envVariables.QUEUE_DRIVER = 'sqlite';
-	envVariables.QUEUE_DATABASE_NAME = sqliteFile;
-	const queue = await createQueue(envVariables, true);
+	const testEnv: ComputedEnvVariables = {
+		...envVariables,
+		QUEUE_DRIVER: 'sqlite',
+		QUEUE_DATABASE_NAME: sqliteFile,
+	};
+	const queue = await createQueue(testEnv, true);
 	return queue;
 };