mirror of
https://github.com/abiosoft/colima.git
synced 2026-05-17 12:10:34 +00:00
cb1aa93cb9
Signed-off-by: Abiola Ibrahim <git@abiosoft.com>
336 lines
11 KiB
Go
336 lines
11 KiB
Go
package model
|
|
|
|
import (
|
|
"encoding/json"
|
|
"fmt"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/abiosoft/colima/environment"
|
|
"github.com/abiosoft/colima/environment/host"
|
|
"github.com/abiosoft/colima/environment/vm/lima"
|
|
"github.com/abiosoft/colima/util/terminal"
|
|
log "github.com/sirupsen/logrus"
|
|
)
|
|
|
|
// DockerModelInfo represents the output of docker model inspect.
|
|
type DockerModelInfo struct {
|
|
ID string `json:"id"`
|
|
Tags []string `json:"tags"`
|
|
Config struct {
|
|
Format string `json:"format"`
|
|
Quantization string `json:"quantization"`
|
|
Parameters string `json:"parameters"`
|
|
Architecture string `json:"architecture"`
|
|
Size string `json:"size"`
|
|
} `json:"config"`
|
|
}
|
|
|
|
// Hash returns the model's hash (without the "sha256:" prefix).
|
|
func (m *DockerModelInfo) Hash() string {
|
|
if hash, ok := strings.CutPrefix(m.ID, "sha256:"); ok {
|
|
return hash
|
|
}
|
|
return ""
|
|
}
|
|
|
|
// ociManifest represents the OCI manifest structure for Docker models.
|
|
type ociManifest struct {
|
|
Layers []struct {
|
|
MediaType string `json:"mediaType"`
|
|
Digest string `json:"digest"`
|
|
} `json:"layers"`
|
|
}
|
|
|
|
// findGGUFPath finds the GGUF file path for a model inside the docker-model-runner container.
|
|
// It handles both Docker registry models (bundle path) and HuggingFace models (blob path via manifest).
|
|
// For models without a bundle, it creates the bundle structure by hard-linking the blob.
|
|
func findGGUFPath(guest environment.VM, modelHash string) (string, error) {
|
|
// Standard bundle path used by Docker Model Runner for all models
|
|
bundlePath := fmt.Sprintf("/models/bundles/sha256/%s/model/model.gguf", modelHash)
|
|
|
|
// Check if bundle already exists
|
|
if err := guest.RunQuiet("docker", "exec", "docker-model-runner", "test", "-f", bundlePath); err == nil {
|
|
return bundlePath, nil
|
|
}
|
|
|
|
// Bundle doesn't exist - read manifest to find the GGUF blob and create the bundle
|
|
manifestPath := fmt.Sprintf("/models/manifests/sha256/%s", modelHash)
|
|
output, err := guest.RunOutput("docker", "exec", "docker-model-runner", "cat", manifestPath)
|
|
if err != nil {
|
|
return "", fmt.Errorf("failed to read model manifest: %w", err)
|
|
}
|
|
|
|
var manifest ociManifest
|
|
if err := json.Unmarshal([]byte(output), &manifest); err != nil {
|
|
return "", fmt.Errorf("failed to parse model manifest: %w", err)
|
|
}
|
|
|
|
// Find the GGUF layer (mediaType contains "gguf")
|
|
var blobPath string
|
|
for _, layer := range manifest.Layers {
|
|
if strings.Contains(layer.MediaType, "gguf") {
|
|
if blobHash, ok := strings.CutPrefix(layer.Digest, "sha256:"); ok {
|
|
blobPath = fmt.Sprintf("/models/blobs/sha256/%s", blobHash)
|
|
break
|
|
}
|
|
}
|
|
}
|
|
|
|
if blobPath == "" {
|
|
return "", fmt.Errorf("no GGUF layer found in model manifest")
|
|
}
|
|
|
|
// Create bundle directory and hard-link the blob (same approach as Docker Model Runner)
|
|
bundleDir := fmt.Sprintf("/models/bundles/sha256/%s/model", modelHash)
|
|
if err := guest.RunQuiet("docker", "exec", "docker-model-runner", "mkdir", "-p", bundleDir); err != nil {
|
|
return "", fmt.Errorf("failed to create bundle directory: %w", err)
|
|
}
|
|
|
|
if err := guest.RunQuiet("docker", "exec", "docker-model-runner", "ln", blobPath, bundlePath); err != nil {
|
|
return "", fmt.Errorf("failed to link model file: %w", err)
|
|
}
|
|
|
|
return bundlePath, nil
|
|
}
|
|
|
|
// InspectDockerModel returns information about a Docker model.
|
|
func InspectDockerModel(modelName string) (*DockerModelInfo, error) {
|
|
guest := lima.New(host.New())
|
|
output, err := guest.RunOutput("docker", "model", "inspect", modelName)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("error inspecting model %q: %w", modelName, err)
|
|
}
|
|
|
|
var info DockerModelInfo
|
|
if err := json.Unmarshal([]byte(strings.TrimSpace(output)), &info); err != nil {
|
|
return nil, fmt.Errorf("error parsing model info: %w", err)
|
|
}
|
|
|
|
return &info, nil
|
|
}
|
|
|
|
// SetupOrUpdateDocker reinstalls Docker Model Runner in the VM.
|
|
func SetupOrUpdateDocker() error {
|
|
guest := lima.New(host.New())
|
|
|
|
log.Println("reinstalling Docker Model Runner...")
|
|
|
|
if err := guest.RunInteractive("docker", "model", "reinstall-runner"); err != nil {
|
|
return fmt.Errorf("error reinstalling Docker Model Runner: %w", err)
|
|
}
|
|
|
|
log.Println("Docker Model Runner reinstalled")
|
|
|
|
// Print installed version
|
|
if version := GetDockerModelVersion(); version != "" {
|
|
fmt.Println("Docker Model Runner")
|
|
fmt.Printf("version: %s", version)
|
|
fmt.Println()
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// GetDockerModelVersion returns the Docker Model Runner version in the VM.
|
|
// Returns empty string if version cannot be determined.
|
|
func GetDockerModelVersion() string {
|
|
guest := lima.New(host.New())
|
|
output, err := guest.RunOutput("docker", "model", "version")
|
|
if err != nil {
|
|
return ""
|
|
}
|
|
return strings.TrimSpace(output)
|
|
}
|
|
|
|
// EnsureDockerModel ensures a Docker model is available, pulling if necessary.
|
|
// Returns the normalized model name (resolving aliases like hf.co → huggingface.co).
|
|
func EnsureDockerModel(modelName string) (string, error) {
|
|
guest := lima.New(host.New())
|
|
|
|
// Try to inspect the model first
|
|
modelInfo, err := InspectDockerModel(modelName)
|
|
if err != nil {
|
|
// Model not found locally, try to pull it
|
|
if pullErr := guest.RunInteractive("docker", "model", "pull", modelName); pullErr != nil {
|
|
return "", fmt.Errorf("failed to pull model %q: %w", modelName, pullErr)
|
|
}
|
|
// Retry inspect after pull
|
|
modelInfo, err = InspectDockerModel(modelName)
|
|
if err != nil {
|
|
return "", fmt.Errorf("failed to inspect model %q after pull: %w", modelName, err)
|
|
}
|
|
}
|
|
|
|
// Return the first tag as the normalized name (e.g., "docker.io/ai/smollm2:latest")
|
|
if len(modelInfo.Tags) > 0 {
|
|
return modelInfo.Tags[0], nil
|
|
}
|
|
return modelName, nil
|
|
}
|
|
|
|
// DockerModelServeConfig holds configuration for serving a Docker model.
|
|
type DockerModelServeConfig struct {
|
|
ModelName string // Model name (e.g., "smollm2")
|
|
Port int // Host port to expose the model on
|
|
Threads int // Number of CPU threads (default: 2)
|
|
GPULayers int // Number of GPU layers (default: 999 = all)
|
|
}
|
|
|
|
// ServeDockerModel serves a Docker model with llama-server.
|
|
// It runs llama-server interactively (with visible output) and uses socat to forward the port.
|
|
// The function blocks until interrupted (Ctrl-C) or llama-server exits.
|
|
// Note: Call EnsureDockerModel first to ensure the model is available.
|
|
func ServeDockerModel(cfg DockerModelServeConfig) error {
|
|
guest := lima.New(host.New())
|
|
|
|
// Set defaults
|
|
if cfg.Threads <= 0 {
|
|
cfg.Threads = 2
|
|
}
|
|
if cfg.GPULayers <= 0 {
|
|
cfg.GPULayers = 999
|
|
}
|
|
|
|
// Get the model info (model should already be available via EnsureDockerModel)
|
|
modelInfo, err := InspectDockerModel(cfg.ModelName)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to inspect model %q: %w", cfg.ModelName, err)
|
|
}
|
|
|
|
// Check model format - only GGUF models are supported
|
|
if modelInfo.Config.Format != "gguf" {
|
|
return fmt.Errorf("model %q has format %q, only GGUF models are supported\n"+
|
|
"Try a GGUF version of this model (e.g., from TheBloke on HuggingFace)",
|
|
cfg.ModelName, modelInfo.Config.Format)
|
|
}
|
|
|
|
modelHash := modelInfo.Hash()
|
|
if modelHash == "" {
|
|
return fmt.Errorf("could not determine hash for model %q", cfg.ModelName)
|
|
}
|
|
|
|
// Ensure docker-model-runner container is running (needed to find GGUF path)
|
|
if err := ensureDockerModelRunner(guest); err != nil {
|
|
return err
|
|
}
|
|
|
|
// Find the GGUF file path (handles both Docker registry and HuggingFace models)
|
|
ggufPath, err := findGGUFPath(guest, modelHash)
|
|
if err != nil {
|
|
return fmt.Errorf("could not find GGUF file for model %q: %w", cfg.ModelName, err)
|
|
}
|
|
|
|
// Get container IP
|
|
containerIP, err := getDockerModelRunnerIP(guest)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// Kill any existing socat on this port
|
|
stopSocat(guest, cfg.Port)
|
|
|
|
// Start socat in background to forward localhost:port → container_ip:port
|
|
if err := startSocat(guest, cfg.Port, containerIP); err != nil {
|
|
return fmt.Errorf("failed to start port forwarder: %w", err)
|
|
}
|
|
|
|
// Run llama-server interactively (blocking, with visible output)
|
|
// Ctrl-C will be received by the interactive process directly
|
|
// Use -it for TTY, -i for non-TTY (e.g., piped or CI environments)
|
|
execFlag := "-i"
|
|
if terminal.IsTerminal() {
|
|
execFlag = "-it"
|
|
}
|
|
|
|
err = guest.RunInteractive("docker", "exec", execFlag, "docker-model-runner",
|
|
"/app/bin/com.docker.llama-server",
|
|
"-ngl", fmt.Sprintf("%d", cfg.GPULayers),
|
|
"--metrics",
|
|
"--threads", fmt.Sprintf("%d", cfg.Threads),
|
|
"--model", ggufPath,
|
|
"--alias", cfg.ModelName,
|
|
"--host", "0.0.0.0",
|
|
"--port", fmt.Sprintf("%d", cfg.Port),
|
|
"--jinja",
|
|
)
|
|
|
|
// Cleanup socat on exit (whether normal exit or Ctrl-C)
|
|
stopSocat(guest, cfg.Port)
|
|
|
|
return err
|
|
}
|
|
|
|
// ensureDockerModelRunner ensures the docker-model-runner container is running.
|
|
// Attempts to start it up to 3 times if not found.
|
|
func ensureDockerModelRunner(guest environment.VM) error {
|
|
for attempt := 1; attempt <= 3; attempt++ {
|
|
// Check if container exists
|
|
if err := guest.RunQuiet("docker", "inspect", "docker-model-runner"); err == nil {
|
|
return nil
|
|
}
|
|
|
|
log.Infof("docker-model-runner not found, starting it (attempt %d/3)...", attempt)
|
|
_ = guest.Run("docker", "model", "start-runner")
|
|
time.Sleep(2 * time.Second)
|
|
}
|
|
|
|
return fmt.Errorf("could not start docker-model-runner after 3 attempts")
|
|
}
|
|
|
|
// getDockerModelRunnerIP returns the IP address of the docker-model-runner container.
|
|
func getDockerModelRunnerIP(guest environment.VM) (string, error) {
|
|
output, err := guest.RunOutput("docker", "inspect", "docker-model-runner",
|
|
"--format", "{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}")
|
|
if err != nil {
|
|
return "", fmt.Errorf("failed to get container IP: %w", err)
|
|
}
|
|
|
|
ip := strings.TrimSpace(output)
|
|
if ip == "" {
|
|
return "", fmt.Errorf("container IP is empty")
|
|
}
|
|
|
|
return ip, nil
|
|
}
|
|
|
|
// startSocat starts socat in the background to forward a port to the container.
|
|
func startSocat(guest environment.VM, port int, containerIP string) error {
|
|
cmd := fmt.Sprintf("nohup socat TCP-LISTEN:%d,fork,reuseaddr TCP:%s:%d > /dev/null 2>&1 &",
|
|
port, containerIP, port)
|
|
return guest.Run("sh", "-c", cmd)
|
|
}
|
|
|
|
// stopSocat stops the socat process for a given port.
|
|
func stopSocat(guest environment.VM, port int) {
|
|
cmd := fmt.Sprintf("pkill -f 'socat.*TCP-LISTEN:%d' 2>/dev/null || true", port)
|
|
_ = guest.Run("sh", "-c", cmd)
|
|
}
|
|
|
|
// StopDockerModelServe stops a Docker model serve instance.
|
|
func StopDockerModelServe(port int) error {
|
|
guest := lima.New(host.New())
|
|
|
|
// Stop the socat proxy on the VM
|
|
stopCmd := fmt.Sprintf("pkill -f 'socat.*TCP-LISTEN:%d' 2>/dev/null || true", port)
|
|
if err := guest.Run("sh", "-c", stopCmd); err != nil {
|
|
log.Debugf("error stopping socat: %v", err)
|
|
}
|
|
|
|
// Note: llama-server processes inside docker-model-runner are harder to clean up
|
|
// since they run in the same container. For now, we just stop the socat proxy.
|
|
// The llama-server process will remain running but be inaccessible.
|
|
|
|
return nil
|
|
}
|
|
|
|
// IsDockerModelServeRunning checks if a serve instance is running on the given port.
|
|
func IsDockerModelServeRunning(port int) bool {
|
|
guest := lima.New(host.New())
|
|
|
|
// Check if socat is running for this port
|
|
checkCmd := fmt.Sprintf("pgrep -f 'socat.*TCP-LISTEN:%d' > /dev/null 2>&1", port)
|
|
err := guest.Run("sh", "-c", checkCmd)
|
|
return err == nil
|
|
}
|