Add vision support in llama-server (#901)

* server: add support for vision model
webui: add support for vision model

* server : remove hack for extra parallel slot#10187

* llama : fix KV shift for qwen2vl #13870

* add no-context-shift parameter

---------

Co-authored-by: firecoperana <firecoperana>
This commit is contained in:
firecoperana
2025-11-05 08:43:46 +00:00
committed by GitHub
parent 92607d44c4
commit 7978f04996
26 changed files with 2456 additions and 729 deletions

View File

@@ -1,6 +1,6 @@
import { useEffect, useState } from 'react';
import { MessageExtraContext } from './types';
import { useEffect } from 'react';
import { ChatTextareaApi } from '../components/useChatTextarea.ts';
import { ChatExtraContextApi } from '../components/useChatExtraContext.tsx';
// Extra context when using llama.cpp WebUI from llama-vscode, inside an iframe
// Ref: https://github.com/ggml-org/llama.cpp/pull/11940
@@ -15,11 +15,10 @@ interface SetTextEvData {
* window.postMessage({ command: 'setText', text: 'Spot the syntax error', context: 'def test()\n return 123' }, '*');
*/
export const useVSCodeContext = (textarea: ChatTextareaApi) => {
const [extraContext, setExtraContext] = useState<MessageExtraContext | null>(
null
);
export const useVSCodeContext = (
textarea: ChatTextareaApi,
extraContext: ChatExtraContextApi
) => {
// Accept setText message from a parent window and set inputMsg and extraContext
useEffect(() => {
const handleMessage = (event: MessageEvent) => {
@@ -27,18 +26,25 @@ export const useVSCodeContext = (textarea: ChatTextareaApi) => {
const data: SetTextEvData = event.data;
textarea.setValue(data?.text);
if (data?.context && data.context.length > 0) {
setExtraContext({
type: 'context',
content: data.context,
});
extraContext.clearItems();
extraContext.addItems([
{
type: 'context',
name: 'Extra context',
content: data.context,
},
]);
}
textarea.focus();
setTimeout(() => {
textarea.refOnSubmit.current?.();
}, 10); // wait for setExtraContext to finish
}
};
window.addEventListener('message', handleMessage);
return () => window.removeEventListener('message', handleMessage);
}, [textarea]);
}, [textarea, extraContext]);
// Add a keydown listener that sends the "escapePressed" message to the parent window
useEffect(() => {
@@ -52,9 +58,5 @@ export const useVSCodeContext = (textarea: ChatTextareaApi) => {
return () => window.removeEventListener('keydown', handleKeyDown);
}, []);
return {
extraContext,
// call once the user message is sent, to clear the extra context
clearExtraContext: () => setExtraContext(null),
};
return {};
};