Add vision support in llama-server (#901)

* server: add support for vision model
webui: add support for vision model

* server : remove hack for extra parallel slot#10187

* llama : fix KV shift for qwen2vl #13870

* add no-context-shift parameter

---------

Co-authored-by: firecoperana <firecoperana>
This commit is contained in:
firecoperana
2025-11-05 08:43:46 +00:00
committed by GitHub
parent 92607d44c4
commit 7978f04996
26 changed files with 2456 additions and 729 deletions

View File

@@ -1,4 +1,4 @@
import { useEffect, useMemo, useState } from 'react';
import { ClipboardEvent, useEffect, useMemo, useState } from 'react';
import { CallbackGeneratedChunk, useAppContext } from '../utils/app.context';
import ChatMessage from './ChatMessage';
import { CanvasType, Message, PendingMessage } from '../utils/types';
@@ -7,7 +7,17 @@ import CanvasPyInterpreter from './CanvasPyInterpreter';
import StorageUtils from '../utils/storage';
import { useVSCodeContext } from '../utils/llama-vscode';
import { useChatTextarea, ChatTextareaApi } from './useChatTextarea.ts';
import {
ArrowUpIcon,
StopIcon,
PaperClipIcon,
} from '@heroicons/react/24/solid';
import {
ChatExtraContextApi,
useChatExtraContext,
} from './useChatExtraContext.tsx';
import Dropzone from 'react-dropzone';
import ChatInputExtraContextItem from './ChatInputExtraContextItem.tsx';
/**
* A message display is a message node with additional information for rendering.
* For example, siblings of the message node are stored as their last node (aka leaf node).
@@ -104,9 +114,10 @@ export default function ChatScreen() {
const textarea: ChatTextareaApi = useChatTextarea(prefilledMsg.content());
const { extraContext, clearExtraContext } = useVSCodeContext(textarea);
const extraContext = useChatExtraContext();
useVSCodeContext(textarea, extraContext);
//const { extraContext, clearExtraContext } = useVSCodeContext(textarea);
// TODO: improve this when we have "upload file" feature
const currExtra: Message['extra'] = extraContext ? [extraContext] : undefined;
// keep track of leaf node for rendering
const [currNodeId, setCurrNodeId] = useState<number>(-1);
@@ -147,7 +158,7 @@ export default function ChatScreen() {
currConvId,
lastMsgNodeId,
lastInpMsg,
currExtra,
extraContext.items,
onChunk
))
) {
@@ -155,7 +166,7 @@ export default function ChatScreen() {
textarea.setValue(lastInpMsg);
}
// OK
clearExtraContext();
extraContext.clearItems();
};
const handleEditMessage = async (msg: Message, content: string) => {
@@ -282,42 +293,14 @@ export default function ChatScreen() {
})}
</div>
{/* chat input */}
<div className="flex flex-row items-end pt-8 pb-6 sticky bottom-0 bg-base-100">
<textarea
// Default (mobile): Enable vertical resize, overflow auto for scrolling if needed
// Large screens (lg:): Disable manual resize, apply max-height for autosize limit
className="textarea textarea-bordered w-full resize-vertical lg:resize-none lg:max-h-48 lg:overflow-y-auto" // Adjust lg:max-h-48 as needed (e.g., lg:max-h-60)
placeholder="Type a message (Shift+Enter to add a new line)"
ref={textarea.ref}
onInput={textarea.onInput} // Hook's input handler (will only resize height on lg+ screens)
onKeyDown={(e) => {
if (e.nativeEvent.isComposing || e.keyCode === 229) return;
if (e.key === 'Enter' && !e.shiftKey) {
e.preventDefault();
sendNewMessage();
}
}}
id="msg-input"
dir="auto"
// Set a base height of 2 rows for mobile views
// On lg+ screens, the hook will calculate and set the initial height anyway
rows={2}
></textarea>
{isGenerating(currConvId ?? '') ? (
<button
className="btn btn-neutral ml-2"
onClick={() => stopGenerating(currConvId ?? '')}
>
Stop
</button>
) : (
<button className="btn btn-primary ml-2" onClick={sendNewMessage}>
Send
</button>
)}
</div>
{/* chat input */}
<ChatInput
textarea={textarea}
extraContext={extraContext}
onSend={sendNewMessage}
onStop={() => stopGenerating(currConvId ?? '')}
isGenerating={isGenerating(currConvId ?? '')}
/>
</div>
<div className="w-full sticky top-[7em] h-[calc(100vh-9em)]">
{canvasData?.type === CanvasType.PY_INTERPRETER && (
@@ -327,3 +310,183 @@ export default function ChatScreen() {
</div>
);
}
// function ServerInfo() {
// const { serverProps } = useAppContext();
// const modalities = [];
// if (serverProps?.modalities?.audio) {
// modalities.push('audio');
// }
// if (serverProps?.modalities?.vision) {
// modalities.push('vision');
// }
// return (
// <div
// className="card card-sm shadow-sm border-1 border-base-content/20 text-base-content/70 mb-6"
// tabIndex={0}
// aria-description="Server information"
// >
// <div className="card-body">
// <b>Server Info</b>
// <p>
// <b>Model</b>: {serverProps?.model_path?.split(/(\\|\/)/).pop()}
// <br />
// {modalities.length > 0 ? (
// <>
// <b>Supported modalities:</b> {modalities.join(', ')}
// </>
// ) : (
// ''
// )}
// </p>
// </div>
// </div>
// );
// }
function ChatInput({
textarea,
extraContext,
onSend,
onStop,
isGenerating,
}: {
textarea: ChatTextareaApi;
extraContext: ChatExtraContextApi;
onSend: () => void;
onStop: () => void;
isGenerating: boolean;
}) {
const { config } = useAppContext();
const [isDrag, setIsDrag] = useState(false);
return (
<div
role="group"
aria-label="Chat input"
className={classNames({
'flex items-end pt-8 pb-6 sticky bottom-0 bg-base-100': true,
'opacity-50': isDrag, // simply visual feedback to inform user that the file will be accepted
})}
>
<Dropzone
noClick
onDrop={(files: File[]) => {
setIsDrag(false);
extraContext.onFileAdded(files);
}}
onDragEnter={() => setIsDrag(true)}
onDragLeave={() => setIsDrag(false)}
multiple={true}
>
{({ getRootProps, getInputProps }) => (
<div
className="flex flex-col rounded-xl border-1 border-base-content/30 p-3 w-full"
// when a file is pasted to the input, we handle it here
// if a text is pasted, and if it is long text, we will convert it to a file
onPasteCapture={(e: ClipboardEvent<HTMLInputElement>) => {
const text = e.clipboardData.getData('text/plain');
if (
text.length > 0 &&
config.pasteLongTextToFileLen > 0 &&
text.length > config.pasteLongTextToFileLen
) {
// if the text is too long, we will convert it to a file
extraContext.addItems([
{
type: 'context',
name: 'Pasted Content',
content: text,
},
]);
e.preventDefault();
return;
}
// if a file is pasted, we will handle it here
const files = Array.from(e.clipboardData.items)
.filter((item) => item.kind === 'file')
.map((item) => item.getAsFile())
.filter((file) => file !== null);
if (files.length > 0) {
e.preventDefault();
extraContext.onFileAdded(files);
}
}}
{...getRootProps()}
>
{!isGenerating && (
<ChatInputExtraContextItem
items={extraContext.items}
removeItem={extraContext.removeItem}
/>
)}
<div className="flex flex-row w-full">
<textarea
// Default (mobile): Enable vertical resize, overflow auto for scrolling if needed
// Large screens (lg:): Disable manual resize, apply max-height for autosize limit
className="text-md outline-none border-none w-full resize-vertical lg:resize-none lg:max-h-48 lg:overflow-y-auto" // Adjust lg:max-h-48 as needed (e.g., lg:max-h-60)
placeholder="Type a message..."
ref={textarea.ref}
onInput={textarea.onInput} // Hook's input handler (will only resize height on lg+ screens)
onKeyDown={(e) => {
if (e.nativeEvent.isComposing || e.keyCode === 229) return;
if (e.key === 'Enter' && !e.shiftKey) {
e.preventDefault();
onSend();
}
}}
id="msg-input"
dir="auto"
// Set a base height of 2 rows for mobile views
// On lg+ screens, the hook will calculate and set the initial height anyway
rows={2}
></textarea>
{/* buttons area */}
<div className="flex flex-row gap-2 ml-2">
<label
htmlFor="file-upload"
className={classNames({
'btn w-8 h-8 p-0 rounded-full': true,
'btn-disabled': isGenerating,
})}
aria-label="Upload file"
tabIndex={0}
role="button"
>
<PaperClipIcon className="h-5 w-5" />
</label>
<input
id="file-upload"
type="file"
disabled={isGenerating}
{...getInputProps()}
hidden
/>
{isGenerating ? (
<button
className="btn btn-neutral w-8 h-8 p-0 rounded-full"
onClick={onStop}
>
<StopIcon className="h-5 w-5" />
</button>
) : (
<button
className="btn btn-primary w-8 h-8 p-0 rounded-full"
onClick={onSend}
aria-label="Send message"
>
<ArrowUpIcon className="h-5 w-5" />
</button>
)}
</div>
</div>
</div>
)}
</Dropzone>
</div>
);
}