Add vision support in llama-server (#901)

* server: add support for vision model webui: add support for vision model * server : remove hack for extra parallel slot#10187 * llama : fix KV shift for qwen2vl #13870 * add no-context-shift parameter --------- Co-authored-by: firecoperana <firecoperana>
2026-04-25 17:09:22 +00:00 · 2025-11-05 08:43:46 +00:00
parent 5b38d431ac
commit 15159a87d4
26 changed files with 2456 additions and 729 deletions
--- a/examples/server/webui/src/utils/misc.ts
+++ b/examples/server/webui/src/utils/misc.ts
@@ -1,6 +1,6 @@
 // @ts-expect-error this package does not have typing
 import TextLineStream from 'textlinestream';
-import { APIMessage, Message } from './types';
+import { APIMessage, Message, LlamaCppServerProps, APIMessageContentPart } from './types';

 // ponyfill for missing ReadableStream asyncIterator on Safari
 import { asyncIterator } from '@sec-ant/readable-stream/ponyfill/asyncIterator';
@@ -57,21 +57,55 @@ export const copyStr = (textToCopy: string) => {
 */
 export function normalizeMsgsForAPI(messages: Readonly<Message[]>) {
  return messages.map((msg) => {
-    let newContent = '';
+    if (msg.role !== 'user' || !msg.extra) {
+      return {
+        role: msg.role,
+        content: msg.content,
+      } as APIMessage;
+    }
+
+    // extra content first, then user text message in the end
+    // this allow re-using the same cache prefix for long context
+    const contentArr: APIMessageContentPart[] = [];

    for (const extra of msg.extra ?? []) {
      if (extra.type === 'context') {
-		if (extra.content!='') {
-            newContent += `${extra.content}\n\n`;
-		}
+        contentArr.push({
+          type: 'text',
+          text: extra.content,
+        });
+      } else if (extra.type === 'textFile') {
+        contentArr.push({
+          type: 'text',
+          text: `File: ${extra.name}\nContent:\n\n${extra.content}`,
+        });
+      } else if (extra.type === 'imageFile') {
+        contentArr.push({
+          type: 'image_url',
+          image_url: { url: extra.base64Url },
+        });
+      } else if (extra.type === 'audioFile') {
+        contentArr.push({
+          type: 'input_audio',
+          input_audio: {
+            data: extra.base64Data,
+            format: /wav/.test(extra.mimeType) ? 'wav' : 'mp3',
+          },
+        });
+      } else {
+        throw new Error('Unknown extra type');
      }
    }

-    newContent += msg.content;
+    // add user message to the end
+    contentArr.push({
+      type: 'text',
+      text: msg.content,
+    });

    return {
      role: msg.role,
-      content: newContent,
+      content: contentArr,
    };
  }) as APIMessage[];
 }
@@ -137,3 +171,25 @@ export const cleanCurrentUrl = (removeQueryParams: string[]) => {
  });
  window.history.replaceState({}, '', url.toString());
 };
+
+export const getServerProps = async (
+  baseUrl: string,
+  apiKey?: string
+): Promise<LlamaCppServerProps> => {
+  try {
+    const response = await fetch(`${baseUrl}/props`, {
+      headers: {
+        'Content-Type': 'application/json',
+        ...(apiKey ? { Authorization: `Bearer ${apiKey}` } : {}),
+      },
+    });
+    if (!response.ok) {
+      throw new Error('Failed to fetch server props');
+    }
+    const data = await response.json();
+    return data as LlamaCppServerProps;
+  } catch (error) {
+    console.error('Error fetching server props:', error);
+    throw error;
+  }
+};