Add vision support in llama-server (#901)

* server: add support for vision model
webui: add support for vision model

* server : remove hack for extra parallel slot#10187

* llama : fix KV shift for qwen2vl #13870

* add no-context-shift parameter

---------

Co-authored-by: firecoperana <firecoperana>
This commit is contained in:
firecoperana
2025-11-05 08:43:46 +00:00
committed by GitHub
parent 92607d44c4
commit 7978f04996
26 changed files with 2456 additions and 729 deletions

View File

@@ -3,6 +3,7 @@ import {
APIMessage,
CanvasData,
Conversation,
LlamaCppServerProps,
Message,
PendingMessage,
ViewingChat,
@@ -12,6 +13,7 @@ import {
filterThoughtFromMsgs,
normalizeMsgsForAPI,
getSSEStreamAsync,
getServerProps
} from './misc';
import { BASE_URL, CONFIG_DEFAULT, isDev } from '../Config';
import { matchPath, useLocation, useNavigate } from 'react-router';
@@ -54,6 +56,10 @@ interface AppContextValue {
saveConfig: (config: typeof CONFIG_DEFAULT) => void;
showSettings: boolean;
setShowSettings: (show: boolean) => void;
// props
serverProps: LlamaCppServerProps | null;
}
// this callback is used for scrolling to the bottom of the chat and switching to the last node
@@ -82,6 +88,9 @@ export const AppContextProvider = ({
const params = matchPath('/chat/:convId', pathname);
const convId = params?.params?.convId;
const [serverProps, setServerProps] = useState<LlamaCppServerProps | null>(
null
);
const [viewingChat, setViewingChat] = useState<ViewingChat | null>(null);
const [pendingMessages, setPendingMessages] = useState<
Record<Conversation['id'], PendingMessage>
@@ -93,6 +102,20 @@ export const AppContextProvider = ({
const [canvasData, setCanvasData] = useState<CanvasData | null>(null);
const [showSettings, setShowSettings] = useState(false);
// get server props
useEffect(() => {
getServerProps(BASE_URL, config.apiKey)
.then((props) => {
console.debug('Server props:', props);
setServerProps(props);
})
.catch((err) => {
console.error(err);
toast.error('Failed to fetch server props');
});
// eslint-disable-next-line
}, []);
// handle change when the convId from URL is changed
useEffect(() => {
// also reset the canvas data
@@ -469,6 +492,7 @@ export const AppContextProvider = ({
saveConfig,
showSettings,
setShowSettings,
serverProps,
}}
>
{children}

View File

@@ -1,6 +1,6 @@
import { useEffect, useState } from 'react';
import { MessageExtraContext } from './types';
import { useEffect } from 'react';
import { ChatTextareaApi } from '../components/useChatTextarea.ts';
import { ChatExtraContextApi } from '../components/useChatExtraContext.tsx';
// Extra context when using llama.cpp WebUI from llama-vscode, inside an iframe
// Ref: https://github.com/ggml-org/llama.cpp/pull/11940
@@ -15,11 +15,10 @@ interface SetTextEvData {
* window.postMessage({ command: 'setText', text: 'Spot the syntax error', context: 'def test()\n return 123' }, '*');
*/
export const useVSCodeContext = (textarea: ChatTextareaApi) => {
const [extraContext, setExtraContext] = useState<MessageExtraContext | null>(
null
);
export const useVSCodeContext = (
textarea: ChatTextareaApi,
extraContext: ChatExtraContextApi
) => {
// Accept setText message from a parent window and set inputMsg and extraContext
useEffect(() => {
const handleMessage = (event: MessageEvent) => {
@@ -27,18 +26,25 @@ export const useVSCodeContext = (textarea: ChatTextareaApi) => {
const data: SetTextEvData = event.data;
textarea.setValue(data?.text);
if (data?.context && data.context.length > 0) {
setExtraContext({
type: 'context',
content: data.context,
});
extraContext.clearItems();
extraContext.addItems([
{
type: 'context',
name: 'Extra context',
content: data.context,
},
]);
}
textarea.focus();
setTimeout(() => {
textarea.refOnSubmit.current?.();
}, 10); // wait for setExtraContext to finish
}
};
window.addEventListener('message', handleMessage);
return () => window.removeEventListener('message', handleMessage);
}, [textarea]);
}, [textarea, extraContext]);
// Add a keydown listener that sends the "escapePressed" message to the parent window
useEffect(() => {
@@ -52,9 +58,5 @@ export const useVSCodeContext = (textarea: ChatTextareaApi) => {
return () => window.removeEventListener('keydown', handleKeyDown);
}, []);
return {
extraContext,
// call once the user message is sent, to clear the extra context
clearExtraContext: () => setExtraContext(null),
};
return {};
};

View File

@@ -1,6 +1,6 @@
// @ts-expect-error this package does not have typing
import TextLineStream from 'textlinestream';
import { APIMessage, Message } from './types';
import { APIMessage, Message, LlamaCppServerProps, APIMessageContentPart } from './types';
// ponyfill for missing ReadableStream asyncIterator on Safari
import { asyncIterator } from '@sec-ant/readable-stream/ponyfill/asyncIterator';
@@ -57,21 +57,55 @@ export const copyStr = (textToCopy: string) => {
*/
export function normalizeMsgsForAPI(messages: Readonly<Message[]>) {
return messages.map((msg) => {
let newContent = '';
if (msg.role !== 'user' || !msg.extra) {
return {
role: msg.role,
content: msg.content,
} as APIMessage;
}
// extra content first, then user text message in the end
// this allow re-using the same cache prefix for long context
const contentArr: APIMessageContentPart[] = [];
for (const extra of msg.extra ?? []) {
if (extra.type === 'context') {
if (extra.content!='') {
newContent += `${extra.content}\n\n`;
}
contentArr.push({
type: 'text',
text: extra.content,
});
} else if (extra.type === 'textFile') {
contentArr.push({
type: 'text',
text: `File: ${extra.name}\nContent:\n\n${extra.content}`,
});
} else if (extra.type === 'imageFile') {
contentArr.push({
type: 'image_url',
image_url: { url: extra.base64Url },
});
} else if (extra.type === 'audioFile') {
contentArr.push({
type: 'input_audio',
input_audio: {
data: extra.base64Data,
format: /wav/.test(extra.mimeType) ? 'wav' : 'mp3',
},
});
} else {
throw new Error('Unknown extra type');
}
}
newContent += msg.content;
// add user message to the end
contentArr.push({
type: 'text',
text: msg.content,
});
return {
role: msg.role,
content: newContent,
content: contentArr,
};
}) as APIMessage[];
}
@@ -137,3 +171,25 @@ export const cleanCurrentUrl = (removeQueryParams: string[]) => {
});
window.history.replaceState({}, '', url.toString());
};
export const getServerProps = async (
baseUrl: string,
apiKey?: string
): Promise<LlamaCppServerProps> => {
try {
const response = await fetch(`${baseUrl}/props`, {
headers: {
'Content-Type': 'application/json',
...(apiKey ? { Authorization: `Bearer ${apiKey}` } : {}),
},
});
if (!response.ok) {
throw new Error('Failed to fetch server props');
}
const data = await response.json();
return data as LlamaCppServerProps;
} catch (error) {
console.error('Error fetching server props:', error);
throw error;
}
};

View File

@@ -48,7 +48,11 @@ export interface Message {
children: Message['id'][];
}
type MessageExtra = MessageExtraTextFile | MessageExtraContext; // TODO: will add more in the future
export type MessageExtra =
| MessageExtraTextFile
| MessageExtraImageFile
| MessageExtraAudioFile
| MessageExtraContext;
export interface MessageExtraTextFile {
type: 'textFile';
@@ -56,12 +60,43 @@ export interface MessageExtraTextFile {
content: string;
}
export interface MessageExtraImageFile {
type: 'imageFile';
name: string;
base64Url: string;
}
export interface MessageExtraAudioFile {
type: 'audioFile';
name: string;
base64Data: string;
mimeType: string;
}
export interface MessageExtraContext {
type: 'context';
name: string;
content: string;
}
export type APIMessage = Pick<Message, 'role' | 'content'>;
export type APIMessageContentPart =
| {
type: 'text';
text: string;
}
| {
type: 'image_url';
image_url: { url: string };
}
| {
type: 'input_audio';
input_audio: { data: string; format: 'wav' | 'mp3' };
};
export type APIMessage = {
role: Message['role'];
content: string | APIMessageContentPart[];
};
export interface Conversation {
id: string; // format: `conv-{timestamp}`
@@ -96,4 +131,15 @@ export interface SettingsPreset {
name: string;
createdAt: number; // timestamp from Date.now()
config: Record<string, string | number | boolean>; // partial CONFIG_DEFAULT
}
// a non-complete list of props, only contains the ones we need
export interface LlamaCppServerProps {
model_path: string;
n_ctx: number;
modalities?: {
vision: boolean;
audio: boolean;
};
// TODO: support params
}