mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-04-23 07:59:25 +00:00
Add vision support in llama-server (#901)
* server: add support for vision model webui: add support for vision model * server : remove hack for extra parallel slot#10187 * llama : fix KV shift for qwen2vl #13870 * add no-context-shift parameter --------- Co-authored-by: firecoperana <firecoperana>
This commit is contained in:
@@ -0,0 +1,135 @@
|
||||
import {
|
||||
DocumentTextIcon,
|
||||
SpeakerWaveIcon,
|
||||
XMarkIcon,
|
||||
} from '@heroicons/react/24/outline';
|
||||
import { MessageExtra } from '../utils/types';
|
||||
import { useState } from 'react';
|
||||
import { classNames } from '../utils/misc';
|
||||
|
||||
export default function ChatInputExtraContextItem({
|
||||
items,
|
||||
removeItem,
|
||||
clickToShow,
|
||||
}: {
|
||||
items?: MessageExtra[];
|
||||
removeItem?: (index: number) => void;
|
||||
clickToShow?: boolean;
|
||||
}) {
|
||||
const [show, setShow] = useState(-1);
|
||||
const showingItem = show >= 0 ? items?.[show] : undefined;
|
||||
|
||||
if (!items) return null;
|
||||
|
||||
return (
|
||||
<div
|
||||
className="flex flex-row gap-4 overflow-x-auto py-2 px-1 mb-1"
|
||||
role="group"
|
||||
aria-description="Selected files"
|
||||
>
|
||||
{items.map((item, i) => (
|
||||
<div
|
||||
className="indicator"
|
||||
key={i}
|
||||
onClick={() => clickToShow && setShow(i)}
|
||||
tabIndex={0}
|
||||
aria-description={
|
||||
clickToShow ? `Click to show: ${item.name}` : undefined
|
||||
}
|
||||
role={clickToShow ? 'button' : 'menuitem'}
|
||||
>
|
||||
{removeItem && (
|
||||
<div className="indicator-item indicator-top">
|
||||
<button
|
||||
aria-label="Remove file"
|
||||
className="btn btn-neutral btn-sm w-4 h-4 p-0 rounded-full"
|
||||
onClick={() => removeItem(i)}
|
||||
>
|
||||
<XMarkIcon className="h-3 w-3" />
|
||||
</button>
|
||||
</div>
|
||||
)}
|
||||
|
||||
<div
|
||||
className={classNames({
|
||||
'flex flex-row rounded-md shadow-sm items-center m-0 p-0': true,
|
||||
'cursor-pointer hover:shadow-md': !!clickToShow,
|
||||
})}
|
||||
>
|
||||
{item.type === 'imageFile' ? (
|
||||
<>
|
||||
<img
|
||||
src={item.base64Url}
|
||||
alt={`Preview image for ${item.name}`}
|
||||
className="w-14 h-14 object-cover rounded-md"
|
||||
/>
|
||||
</>
|
||||
) : (
|
||||
<>
|
||||
<div
|
||||
className="w-14 h-14 flex items-center justify-center"
|
||||
aria-description="Document icon"
|
||||
>
|
||||
{item.type === 'audioFile' ? (
|
||||
<SpeakerWaveIcon className="h-8 w-8 text-gray-500" />
|
||||
) : (
|
||||
<DocumentTextIcon className="h-8 w-8 text-gray-500" />
|
||||
)}
|
||||
</div>
|
||||
|
||||
<div className="text-xs pr-4">
|
||||
<b>{item.name ?? 'Extra content'}</b>
|
||||
</div>
|
||||
</>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
))}
|
||||
|
||||
{showingItem && (
|
||||
<dialog
|
||||
className="modal modal-open"
|
||||
aria-description={`Preview ${showingItem.name}`}
|
||||
>
|
||||
<div className="modal-box">
|
||||
<div className="flex justify-between items-center mb-4">
|
||||
<b>{showingItem.name ?? 'Extra content'}</b>
|
||||
<button
|
||||
className="btn btn-ghost btn-sm"
|
||||
aria-label="Close preview dialog"
|
||||
>
|
||||
<XMarkIcon className="h-5 w-5" onClick={() => setShow(-1)} />
|
||||
</button>
|
||||
</div>
|
||||
{showingItem.type === 'imageFile' ? (
|
||||
<img
|
||||
src={showingItem.base64Url}
|
||||
alt={`Preview image for ${showingItem.name}`}
|
||||
/>
|
||||
) : showingItem.type === 'audioFile' ? (
|
||||
<audio
|
||||
controls
|
||||
className="w-full"
|
||||
aria-description={`Audio file ${showingItem.name}`}
|
||||
>
|
||||
<source
|
||||
src={`data:${showingItem.mimeType};base64,${showingItem.base64Data}`}
|
||||
type={showingItem.mimeType}
|
||||
aria-description={`Audio file ${showingItem.name}`}
|
||||
/>
|
||||
Your browser does not support the audio element.
|
||||
</audio>
|
||||
) : (
|
||||
<div className="overflow-x-auto">
|
||||
<pre className="whitespace-pre-wrap break-words text-sm">
|
||||
{showingItem.content}
|
||||
</pre>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
<div className="modal-backdrop" onClick={() => setShow(-1)}></div>
|
||||
</dialog>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -3,7 +3,8 @@ import { useAppContext } from '../utils/app.context';
|
||||
import { Message, PendingMessage } from '../utils/types';
|
||||
import { classNames } from '../utils/misc';
|
||||
import MarkdownDisplay, { CopyButton } from './MarkdownDisplay';
|
||||
import { ChevronLeftIcon, ChevronRightIcon } from '@heroicons/react/24/outline';
|
||||
import { ChevronLeftIcon, ChevronRightIcon, ArrowPathIcon, PencilSquareIcon } from '@heroicons/react/24/outline';
|
||||
import ChatInputExtraContextItem from './ChatInputExtraContextItem';
|
||||
|
||||
interface SplitMessage {
|
||||
content: PendingMessage['content'];
|
||||
@@ -82,7 +83,11 @@ export default function ChatMessage({
|
||||
if (!viewingChat) return null;
|
||||
|
||||
return (
|
||||
<div className="group" id={id}>
|
||||
<div className="group"
|
||||
id={id}
|
||||
role="group"
|
||||
aria-description={`Message from ${msg.role}`}
|
||||
>
|
||||
<div
|
||||
className={classNames({
|
||||
chat: true,
|
||||
@@ -90,9 +95,13 @@ export default function ChatMessage({
|
||||
'chat-end': msg.role === 'user',
|
||||
})}
|
||||
>
|
||||
{msg.extra && msg.extra.length > 0 && (
|
||||
<ChatInputExtraContextItem items={msg.extra} clickToShow />
|
||||
)}
|
||||
|
||||
<div
|
||||
className={classNames({
|
||||
'chat-bubble markdown': true,
|
||||
'chat-bubble chat-bubble-primary': true,
|
||||
'chat-bubble-base-300': msg.role !== 'user',
|
||||
})}
|
||||
>
|
||||
@@ -168,35 +177,6 @@ export default function ChatMessage({
|
||||
</div>
|
||||
</details>
|
||||
)}
|
||||
|
||||
{msg.extra && msg.extra.length > 0 && (
|
||||
<details
|
||||
className={classNames({
|
||||
'collapse collapse-arrow mb-4 bg-base-200': true,
|
||||
'bg-opacity-10': msg.role !== 'assistant',
|
||||
})}
|
||||
>
|
||||
<summary className="collapse-title">
|
||||
Extra content
|
||||
</summary>
|
||||
<div className="collapse-content">
|
||||
{msg.extra.map(
|
||||
(extra, i) =>
|
||||
extra.type === 'textFile' ? (
|
||||
<div key={extra.name}>
|
||||
<b>{extra.name}</b>
|
||||
<pre>{extra.content}</pre>
|
||||
</div>
|
||||
) : extra.type === 'context' ? (
|
||||
<div key={i}>
|
||||
<pre>{extra.content}</pre>
|
||||
</div>
|
||||
) : null // TODO: support other extra types
|
||||
)}
|
||||
</div>
|
||||
</details>
|
||||
)}
|
||||
|
||||
<MarkdownDisplay
|
||||
content={content}
|
||||
isGenerating={isPending}
|
||||
@@ -273,7 +253,7 @@ export default function ChatMessage({
|
||||
onClick={() => setEditingContent(msg.content)}
|
||||
disabled={msg.content === null}
|
||||
>
|
||||
✍️ Edit
|
||||
<PencilSquareIcon className="h-4 w-4" /> Edit
|
||||
</button>
|
||||
)}
|
||||
{/* assistant message */}
|
||||
@@ -289,7 +269,7 @@ export default function ChatMessage({
|
||||
}}
|
||||
disabled={msg.content === null}
|
||||
>
|
||||
🔄 Regenerate
|
||||
<ArrowPathIcon className="h-4 w-4" /> Regenerate
|
||||
</button>
|
||||
)}
|
||||
{!isPending && (
|
||||
@@ -298,7 +278,7 @@ export default function ChatMessage({
|
||||
onClick={() => setEditingContent(msg.content)}
|
||||
disabled={msg.content === null}
|
||||
>
|
||||
✍️ Edit
|
||||
<PencilSquareIcon className="h-4 w-4" /> Edit
|
||||
</button>
|
||||
)}
|
||||
</>
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import { useEffect, useMemo, useState } from 'react';
|
||||
import { ClipboardEvent, useEffect, useMemo, useState } from 'react';
|
||||
import { CallbackGeneratedChunk, useAppContext } from '../utils/app.context';
|
||||
import ChatMessage from './ChatMessage';
|
||||
import { CanvasType, Message, PendingMessage } from '../utils/types';
|
||||
@@ -7,7 +7,17 @@ import CanvasPyInterpreter from './CanvasPyInterpreter';
|
||||
import StorageUtils from '../utils/storage';
|
||||
import { useVSCodeContext } from '../utils/llama-vscode';
|
||||
import { useChatTextarea, ChatTextareaApi } from './useChatTextarea.ts';
|
||||
|
||||
import {
|
||||
ArrowUpIcon,
|
||||
StopIcon,
|
||||
PaperClipIcon,
|
||||
} from '@heroicons/react/24/solid';
|
||||
import {
|
||||
ChatExtraContextApi,
|
||||
useChatExtraContext,
|
||||
} from './useChatExtraContext.tsx';
|
||||
import Dropzone from 'react-dropzone';
|
||||
import ChatInputExtraContextItem from './ChatInputExtraContextItem.tsx';
|
||||
/**
|
||||
* A message display is a message node with additional information for rendering.
|
||||
* For example, siblings of the message node are stored as their last node (aka leaf node).
|
||||
@@ -104,9 +114,10 @@ export default function ChatScreen() {
|
||||
|
||||
const textarea: ChatTextareaApi = useChatTextarea(prefilledMsg.content());
|
||||
|
||||
const { extraContext, clearExtraContext } = useVSCodeContext(textarea);
|
||||
const extraContext = useChatExtraContext();
|
||||
useVSCodeContext(textarea, extraContext);
|
||||
//const { extraContext, clearExtraContext } = useVSCodeContext(textarea);
|
||||
// TODO: improve this when we have "upload file" feature
|
||||
const currExtra: Message['extra'] = extraContext ? [extraContext] : undefined;
|
||||
|
||||
// keep track of leaf node for rendering
|
||||
const [currNodeId, setCurrNodeId] = useState<number>(-1);
|
||||
@@ -147,7 +158,7 @@ export default function ChatScreen() {
|
||||
currConvId,
|
||||
lastMsgNodeId,
|
||||
lastInpMsg,
|
||||
currExtra,
|
||||
extraContext.items,
|
||||
onChunk
|
||||
))
|
||||
) {
|
||||
@@ -155,7 +166,7 @@ export default function ChatScreen() {
|
||||
textarea.setValue(lastInpMsg);
|
||||
}
|
||||
// OK
|
||||
clearExtraContext();
|
||||
extraContext.clearItems();
|
||||
};
|
||||
|
||||
const handleEditMessage = async (msg: Message, content: string) => {
|
||||
@@ -282,42 +293,14 @@ export default function ChatScreen() {
|
||||
})}
|
||||
</div>
|
||||
|
||||
{/* chat input */}
|
||||
<div className="flex flex-row items-end pt-8 pb-6 sticky bottom-0 bg-base-100">
|
||||
<textarea
|
||||
// Default (mobile): Enable vertical resize, overflow auto for scrolling if needed
|
||||
// Large screens (lg:): Disable manual resize, apply max-height for autosize limit
|
||||
className="textarea textarea-bordered w-full resize-vertical lg:resize-none lg:max-h-48 lg:overflow-y-auto" // Adjust lg:max-h-48 as needed (e.g., lg:max-h-60)
|
||||
placeholder="Type a message (Shift+Enter to add a new line)"
|
||||
ref={textarea.ref}
|
||||
onInput={textarea.onInput} // Hook's input handler (will only resize height on lg+ screens)
|
||||
onKeyDown={(e) => {
|
||||
if (e.nativeEvent.isComposing || e.keyCode === 229) return;
|
||||
if (e.key === 'Enter' && !e.shiftKey) {
|
||||
e.preventDefault();
|
||||
sendNewMessage();
|
||||
}
|
||||
}}
|
||||
id="msg-input"
|
||||
dir="auto"
|
||||
// Set a base height of 2 rows for mobile views
|
||||
// On lg+ screens, the hook will calculate and set the initial height anyway
|
||||
rows={2}
|
||||
></textarea>
|
||||
|
||||
{isGenerating(currConvId ?? '') ? (
|
||||
<button
|
||||
className="btn btn-neutral ml-2"
|
||||
onClick={() => stopGenerating(currConvId ?? '')}
|
||||
>
|
||||
Stop
|
||||
</button>
|
||||
) : (
|
||||
<button className="btn btn-primary ml-2" onClick={sendNewMessage}>
|
||||
Send
|
||||
</button>
|
||||
)}
|
||||
</div>
|
||||
{/* chat input */}
|
||||
<ChatInput
|
||||
textarea={textarea}
|
||||
extraContext={extraContext}
|
||||
onSend={sendNewMessage}
|
||||
onStop={() => stopGenerating(currConvId ?? '')}
|
||||
isGenerating={isGenerating(currConvId ?? '')}
|
||||
/>
|
||||
</div>
|
||||
<div className="w-full sticky top-[7em] h-[calc(100vh-9em)]">
|
||||
{canvasData?.type === CanvasType.PY_INTERPRETER && (
|
||||
@@ -327,3 +310,183 @@ export default function ChatScreen() {
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
// function ServerInfo() {
|
||||
// const { serverProps } = useAppContext();
|
||||
// const modalities = [];
|
||||
// if (serverProps?.modalities?.audio) {
|
||||
// modalities.push('audio');
|
||||
// }
|
||||
// if (serverProps?.modalities?.vision) {
|
||||
// modalities.push('vision');
|
||||
// }
|
||||
// return (
|
||||
// <div
|
||||
// className="card card-sm shadow-sm border-1 border-base-content/20 text-base-content/70 mb-6"
|
||||
// tabIndex={0}
|
||||
// aria-description="Server information"
|
||||
// >
|
||||
// <div className="card-body">
|
||||
// <b>Server Info</b>
|
||||
// <p>
|
||||
// <b>Model</b>: {serverProps?.model_path?.split(/(\\|\/)/).pop()}
|
||||
// <br />
|
||||
// {modalities.length > 0 ? (
|
||||
// <>
|
||||
// <b>Supported modalities:</b> {modalities.join(', ')}
|
||||
// </>
|
||||
// ) : (
|
||||
// ''
|
||||
// )}
|
||||
// </p>
|
||||
// </div>
|
||||
// </div>
|
||||
// );
|
||||
// }
|
||||
|
||||
function ChatInput({
|
||||
textarea,
|
||||
extraContext,
|
||||
onSend,
|
||||
onStop,
|
||||
isGenerating,
|
||||
}: {
|
||||
textarea: ChatTextareaApi;
|
||||
extraContext: ChatExtraContextApi;
|
||||
onSend: () => void;
|
||||
onStop: () => void;
|
||||
isGenerating: boolean;
|
||||
}) {
|
||||
const { config } = useAppContext();
|
||||
const [isDrag, setIsDrag] = useState(false);
|
||||
|
||||
return (
|
||||
<div
|
||||
role="group"
|
||||
aria-label="Chat input"
|
||||
className={classNames({
|
||||
'flex items-end pt-8 pb-6 sticky bottom-0 bg-base-100': true,
|
||||
'opacity-50': isDrag, // simply visual feedback to inform user that the file will be accepted
|
||||
})}
|
||||
>
|
||||
<Dropzone
|
||||
noClick
|
||||
onDrop={(files: File[]) => {
|
||||
setIsDrag(false);
|
||||
extraContext.onFileAdded(files);
|
||||
}}
|
||||
onDragEnter={() => setIsDrag(true)}
|
||||
onDragLeave={() => setIsDrag(false)}
|
||||
multiple={true}
|
||||
>
|
||||
{({ getRootProps, getInputProps }) => (
|
||||
<div
|
||||
className="flex flex-col rounded-xl border-1 border-base-content/30 p-3 w-full"
|
||||
// when a file is pasted to the input, we handle it here
|
||||
// if a text is pasted, and if it is long text, we will convert it to a file
|
||||
onPasteCapture={(e: ClipboardEvent<HTMLInputElement>) => {
|
||||
const text = e.clipboardData.getData('text/plain');
|
||||
if (
|
||||
text.length > 0 &&
|
||||
config.pasteLongTextToFileLen > 0 &&
|
||||
text.length > config.pasteLongTextToFileLen
|
||||
) {
|
||||
// if the text is too long, we will convert it to a file
|
||||
extraContext.addItems([
|
||||
{
|
||||
type: 'context',
|
||||
name: 'Pasted Content',
|
||||
content: text,
|
||||
},
|
||||
]);
|
||||
e.preventDefault();
|
||||
return;
|
||||
}
|
||||
|
||||
// if a file is pasted, we will handle it here
|
||||
const files = Array.from(e.clipboardData.items)
|
||||
.filter((item) => item.kind === 'file')
|
||||
.map((item) => item.getAsFile())
|
||||
.filter((file) => file !== null);
|
||||
|
||||
if (files.length > 0) {
|
||||
e.preventDefault();
|
||||
extraContext.onFileAdded(files);
|
||||
}
|
||||
}}
|
||||
{...getRootProps()}
|
||||
>
|
||||
{!isGenerating && (
|
||||
<ChatInputExtraContextItem
|
||||
items={extraContext.items}
|
||||
removeItem={extraContext.removeItem}
|
||||
/>
|
||||
)}
|
||||
|
||||
<div className="flex flex-row w-full">
|
||||
<textarea
|
||||
// Default (mobile): Enable vertical resize, overflow auto for scrolling if needed
|
||||
// Large screens (lg:): Disable manual resize, apply max-height for autosize limit
|
||||
className="text-md outline-none border-none w-full resize-vertical lg:resize-none lg:max-h-48 lg:overflow-y-auto" // Adjust lg:max-h-48 as needed (e.g., lg:max-h-60)
|
||||
placeholder="Type a message..."
|
||||
ref={textarea.ref}
|
||||
onInput={textarea.onInput} // Hook's input handler (will only resize height on lg+ screens)
|
||||
onKeyDown={(e) => {
|
||||
if (e.nativeEvent.isComposing || e.keyCode === 229) return;
|
||||
if (e.key === 'Enter' && !e.shiftKey) {
|
||||
e.preventDefault();
|
||||
onSend();
|
||||
}
|
||||
}}
|
||||
id="msg-input"
|
||||
dir="auto"
|
||||
// Set a base height of 2 rows for mobile views
|
||||
// On lg+ screens, the hook will calculate and set the initial height anyway
|
||||
rows={2}
|
||||
></textarea>
|
||||
|
||||
{/* buttons area */}
|
||||
<div className="flex flex-row gap-2 ml-2">
|
||||
<label
|
||||
htmlFor="file-upload"
|
||||
className={classNames({
|
||||
'btn w-8 h-8 p-0 rounded-full': true,
|
||||
'btn-disabled': isGenerating,
|
||||
})}
|
||||
aria-label="Upload file"
|
||||
tabIndex={0}
|
||||
role="button"
|
||||
>
|
||||
<PaperClipIcon className="h-5 w-5" />
|
||||
</label>
|
||||
<input
|
||||
id="file-upload"
|
||||
type="file"
|
||||
disabled={isGenerating}
|
||||
{...getInputProps()}
|
||||
hidden
|
||||
/>
|
||||
{isGenerating ? (
|
||||
<button
|
||||
className="btn btn-neutral w-8 h-8 p-0 rounded-full"
|
||||
onClick={onStop}
|
||||
>
|
||||
<StopIcon className="h-5 w-5" />
|
||||
</button>
|
||||
) : (
|
||||
<button
|
||||
className="btn btn-primary w-8 h-8 p-0 rounded-full"
|
||||
onClick={onSend}
|
||||
aria-label="Send message"
|
||||
>
|
||||
<ArrowUpIcon className="h-5 w-5" />
|
||||
</button>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</Dropzone>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -12,6 +12,7 @@ import {
|
||||
ArrowDownTrayIcon,
|
||||
PencilIcon,
|
||||
TrashIcon,
|
||||
MoonIcon,
|
||||
} from '@heroicons/react/24/outline';
|
||||
|
||||
export default function Header() {
|
||||
@@ -204,16 +205,7 @@ export default function Header() {
|
||||
<div className="tooltip tooltip-bottom" data-tip="Themes">
|
||||
<div className="dropdown dropdown-end dropdown-bottom">
|
||||
<div tabIndex={0} role="button" className="btn m-1">
|
||||
<svg
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
width="16"
|
||||
height="16"
|
||||
fill="currentColor"
|
||||
className="bi bi-palette2"
|
||||
viewBox="0 0 16 16"
|
||||
>
|
||||
<path d="M0 .5A.5.5 0 0 1 .5 0h5a.5.5 0 0 1 .5.5v5.277l4.147-4.131a.5.5 0 0 1 .707 0l3.535 3.536a.5.5 0 0 1 0 .708L10.261 10H15.5a.5.5 0 0 1 .5.5v5a.5.5 0 0 1-.5.5H3a3 3 0 0 1-2.121-.879A3 3 0 0 1 0 13.044m6-.21 7.328-7.3-2.829-2.828L6 7.188zM4.5 13a1.5 1.5 0 1 0-3 0 1.5 1.5 0 0 0 3 0M15 15v-4H9.258l-4.015 4zM0 .5v12.495zm0 12.495V13z" />
|
||||
</svg>
|
||||
<MoonIcon className="w-5 h-5" />
|
||||
</div>
|
||||
<ul
|
||||
tabIndex={0}
|
||||
|
||||
@@ -11,6 +11,7 @@ import { ElementContent, Root } from 'hast';
|
||||
import { visit } from 'unist-util-visit';
|
||||
import { useAppContext } from '../utils/app.context';
|
||||
import { CanvasType } from '../utils/types';
|
||||
import { DocumentDuplicateIcon, PlayIcon } from '@heroicons/react/24/outline';
|
||||
|
||||
export default function MarkdownDisplay({
|
||||
content,
|
||||
@@ -109,7 +110,8 @@ export const CopyButton = ({
|
||||
}}
|
||||
onMouseLeave={() => setCopied(false)}
|
||||
>
|
||||
{copied ? 'Copied!' : '📋 Copy'}
|
||||
<DocumentDuplicateIcon className="h-4 w-4" />
|
||||
{copied ? 'Copied!' : 'Copy'}
|
||||
</button>
|
||||
);
|
||||
};
|
||||
@@ -133,7 +135,8 @@ export const RunPyCodeButton = ({
|
||||
})
|
||||
}
|
||||
>
|
||||
▶️ Run
|
||||
<PlayIcon className="h-4 w-4" />
|
||||
{"Run"}
|
||||
</button>
|
||||
</>
|
||||
);
|
||||
|
||||
@@ -275,6 +275,16 @@ const SETTING_SECTIONS = (
|
||||
key,
|
||||
}) as SettingFieldInput
|
||||
),
|
||||
{
|
||||
type: SettingInputType.SHORT_INPUT,
|
||||
label: 'Paste length to file',
|
||||
key: 'pasteLongTextToFileLen',
|
||||
},
|
||||
{
|
||||
type: SettingInputType.CHECKBOX,
|
||||
label: 'Parse PDF as image instead of text',
|
||||
key: 'pdfAsImage',
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
|
||||
371
examples/server/webui/src/components/useChatExtraContext.tsx
Normal file
371
examples/server/webui/src/components/useChatExtraContext.tsx
Normal file
@@ -0,0 +1,371 @@
|
||||
import { useState } from 'react';
|
||||
import { MessageExtra } from '../utils/types';
|
||||
import toast from 'react-hot-toast';
|
||||
import { useAppContext } from '../utils/app.context';
|
||||
import * as pdfjs from 'pdfjs-dist';
|
||||
import pdfjsWorkerSrc from 'pdfjs-dist/build/pdf.worker.min.mjs?url';
|
||||
import { TextContent, TextItem } from 'pdfjs-dist/types/src/display/api';
|
||||
|
||||
pdfjs.GlobalWorkerOptions.workerSrc = pdfjsWorkerSrc;
|
||||
|
||||
// This file handles uploading extra context items (a.k.a files)
|
||||
// It allows processing these kinds of files:
|
||||
// - image files (converted to base64)
|
||||
// - audio files (converted to base64)
|
||||
// - text files (including code files)
|
||||
// - pdf (converted to text)
|
||||
|
||||
// Interface describing the API returned by the hook
|
||||
export interface ChatExtraContextApi {
|
||||
items?: MessageExtra[]; // undefined if empty, similar to Message['extra']
|
||||
addItems: (items: MessageExtra[]) => void;
|
||||
removeItem: (idx: number) => void;
|
||||
clearItems: () => void;
|
||||
onFileAdded: (files: File[]) => void; // used by "upload" button
|
||||
}
|
||||
|
||||
export function useChatExtraContext(): ChatExtraContextApi {
|
||||
const { serverProps, config } = useAppContext();
|
||||
const [items, setItems] = useState<MessageExtra[]>([]);
|
||||
|
||||
const addItems = (newItems: MessageExtra[]) => {
|
||||
setItems((prev) => [...prev, ...newItems]);
|
||||
};
|
||||
|
||||
const removeItem = (idx: number) => {
|
||||
setItems((prev) => prev.filter((_, i) => i !== idx));
|
||||
};
|
||||
|
||||
const clearItems = () => {
|
||||
setItems([]);
|
||||
};
|
||||
|
||||
const isSupportVision = serverProps?.modalities?.vision;
|
||||
|
||||
const onFileAdded = async (files: File[]) => {
|
||||
try {
|
||||
for (const file of files) {
|
||||
const mimeType = file.type;
|
||||
|
||||
// this limit is only to prevent accidental uploads of huge files
|
||||
// it can potentially crashes the browser because we read the file as base64
|
||||
if (file.size > 500 * 1024 * 1024) {
|
||||
toast.error('File is too large. Maximum size is 500MB.');
|
||||
break;
|
||||
}
|
||||
|
||||
if (mimeType.startsWith('image/')) {
|
||||
if (!isSupportVision) {
|
||||
toast.error('Multimodal is not supported by this server or model.');
|
||||
break;
|
||||
}
|
||||
|
||||
let base64Url = await getFileAsBase64(file);
|
||||
if (mimeType === 'image/svg+xml') {
|
||||
// Convert SVG to PNG
|
||||
base64Url = await svgBase64UrlToPngDataURL(base64Url);
|
||||
}
|
||||
addItems([
|
||||
{
|
||||
type: 'imageFile',
|
||||
name: file.name,
|
||||
base64Url,
|
||||
},
|
||||
]);
|
||||
} else if (mimeType.startsWith('video/')) {
|
||||
toast.error('Video files are not supported yet.');
|
||||
break;
|
||||
} else if (mimeType.startsWith('audio/')) {
|
||||
if (!/mpeg|wav/.test(mimeType)) {
|
||||
toast.error('Only mp3 and wav audio files are supported.');
|
||||
break;
|
||||
}
|
||||
|
||||
// plain base64, not a data URL
|
||||
const base64Data = await getFileAsBase64(file, false);
|
||||
addItems([
|
||||
{
|
||||
type: 'audioFile',
|
||||
name: file.name,
|
||||
mimeType,
|
||||
base64Data,
|
||||
},
|
||||
]);
|
||||
} else if (mimeType.startsWith('application/pdf')) {
|
||||
if (config.pdfAsImage && !isSupportVision) {
|
||||
toast(
|
||||
'Multimodal is not supported, PDF will be converted to text instead of image.'
|
||||
);
|
||||
break;
|
||||
}
|
||||
|
||||
if (config.pdfAsImage && isSupportVision) {
|
||||
// Convert PDF to images
|
||||
const base64Urls = await convertPDFToImage(file);
|
||||
addItems(
|
||||
base64Urls.map((base64Url) => ({
|
||||
type: 'imageFile',
|
||||
name: file.name,
|
||||
base64Url,
|
||||
}))
|
||||
);
|
||||
} else {
|
||||
// Convert PDF to text
|
||||
const content = await convertPDFToText(file);
|
||||
addItems([
|
||||
{
|
||||
type: 'textFile',
|
||||
name: file.name,
|
||||
content,
|
||||
},
|
||||
]);
|
||||
if (isSupportVision) {
|
||||
toast.success(
|
||||
'PDF file converted to text. You can also convert it to image, see in Settings.'
|
||||
);
|
||||
}
|
||||
}
|
||||
break;
|
||||
} else {
|
||||
// Because there can be many text file types (like code file), we will not check the mime type
|
||||
// and will just check if the file is not binary.
|
||||
const reader = new FileReader();
|
||||
reader.onload = (event) => {
|
||||
if (event.target?.result) {
|
||||
const content = event.target.result as string;
|
||||
if (!isLikelyNotBinary(content)) {
|
||||
toast.error('File is binary. Please upload a text file.');
|
||||
return;
|
||||
}
|
||||
addItems([
|
||||
{
|
||||
type: 'textFile',
|
||||
name: file.name,
|
||||
content,
|
||||
},
|
||||
]);
|
||||
}
|
||||
};
|
||||
reader.readAsText(file);
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : String(error);
|
||||
const errorMessage = `Error processing file: ${message}`;
|
||||
toast.error(errorMessage);
|
||||
}
|
||||
};
|
||||
|
||||
return {
|
||||
items: items.length > 0 ? items : undefined,
|
||||
addItems,
|
||||
removeItem,
|
||||
clearItems,
|
||||
onFileAdded,
|
||||
};
|
||||
}
|
||||
|
||||
async function getFileAsBase64(file: File, outputUrl = true): Promise<string> {
|
||||
return new Promise((resolve, reject) => {
|
||||
const reader = new FileReader();
|
||||
reader.onload = (event) => {
|
||||
if (event.target?.result) {
|
||||
let result = event.target.result as string;
|
||||
if (!outputUrl) {
|
||||
// remove base64 url prefix and correct characters
|
||||
result = result.substring(result.indexOf(',') + 1);
|
||||
}
|
||||
resolve(result);
|
||||
} else {
|
||||
reject(new Error('Failed to read file.'));
|
||||
}
|
||||
};
|
||||
reader.readAsDataURL(file);
|
||||
});
|
||||
}
|
||||
|
||||
async function getFileAsBuffer(file: File): Promise<ArrayBuffer> {
|
||||
return new Promise((resolve, reject) => {
|
||||
const reader = new FileReader();
|
||||
reader.onload = (event) => {
|
||||
if (event.target?.result) {
|
||||
resolve(event.target.result as ArrayBuffer);
|
||||
} else {
|
||||
reject(new Error('Failed to read file.'));
|
||||
}
|
||||
};
|
||||
reader.readAsArrayBuffer(file);
|
||||
});
|
||||
}
|
||||
|
||||
async function convertPDFToText(file: File): Promise<string> {
|
||||
const buffer = await getFileAsBuffer(file);
|
||||
const pdf = await pdfjs.getDocument(buffer).promise;
|
||||
const numPages = pdf.numPages;
|
||||
const textContentPromises: Promise<TextContent>[] = [];
|
||||
for (let i = 1; i <= numPages; i++) {
|
||||
textContentPromises.push(
|
||||
pdf.getPage(i).then((page) => page.getTextContent())
|
||||
);
|
||||
}
|
||||
const textContents = await Promise.all(textContentPromises);
|
||||
const textItems = textContents.flatMap((textContent: TextContent) =>
|
||||
textContent.items.map((item) => (item as TextItem).str ?? '')
|
||||
);
|
||||
return textItems.join('\n');
|
||||
}
|
||||
|
||||
// returns list of base64 images
|
||||
async function convertPDFToImage(file: File): Promise<string[]> {
|
||||
const buffer = await getFileAsBuffer(file);
|
||||
const doc = await pdfjs.getDocument(buffer).promise;
|
||||
const pages: Promise<string>[] = [];
|
||||
|
||||
for (let i = 1; i <= doc.numPages; i++) {
|
||||
const page = await doc.getPage(i);
|
||||
const viewport = page.getViewport({ scale: 1.5 });
|
||||
const canvas = document.createElement('canvas');
|
||||
const ctx = canvas.getContext('2d');
|
||||
canvas.width = viewport.width;
|
||||
canvas.height = viewport.height;
|
||||
if (!ctx) {
|
||||
throw new Error('Failed to get 2D context from canvas');
|
||||
}
|
||||
const task = page.render({ canvasContext: ctx, viewport: viewport });
|
||||
pages.push(
|
||||
task.promise.then(() => {
|
||||
return canvas.toDataURL();
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
return await Promise.all(pages);
|
||||
}
|
||||
|
||||
// WARN: vibe code below
|
||||
// This code is a heuristic to determine if a string is likely not binary.
|
||||
// It is necessary because input file can have various mime types which we don't have time to investigate.
|
||||
// For example, a python file can be text/plain, application/x-python, etc.
|
||||
function isLikelyNotBinary(str: string): boolean {
|
||||
const options = {
|
||||
prefixLength: 1024 * 10, // Check the first 10KB of the string
|
||||
suspiciousCharThresholdRatio: 0.15, // Allow up to 15% suspicious chars
|
||||
maxAbsoluteNullBytes: 2,
|
||||
};
|
||||
|
||||
if (!str) {
|
||||
return true; // Empty string is considered "not binary" or trivially text.
|
||||
}
|
||||
|
||||
const sampleLength = Math.min(str.length, options.prefixLength);
|
||||
if (sampleLength === 0) {
|
||||
return true; // Effectively an empty string after considering prefixLength.
|
||||
}
|
||||
|
||||
let suspiciousCharCount = 0;
|
||||
let nullByteCount = 0;
|
||||
|
||||
for (let i = 0; i < sampleLength; i++) {
|
||||
const charCode = str.charCodeAt(i);
|
||||
|
||||
// 1. Check for Unicode Replacement Character (U+FFFD)
|
||||
// This is a strong indicator if the string was created from decoding bytes as UTF-8.
|
||||
if (charCode === 0xfffd) {
|
||||
suspiciousCharCount++;
|
||||
continue;
|
||||
}
|
||||
|
||||
// 2. Check for Null Bytes (U+0000)
|
||||
if (charCode === 0x0000) {
|
||||
nullByteCount++;
|
||||
// We also count nulls towards the general suspicious character count,
|
||||
// as they are less common in typical text files.
|
||||
suspiciousCharCount++;
|
||||
continue;
|
||||
}
|
||||
|
||||
// 3. Check for C0 Control Characters (U+0001 to U+001F)
|
||||
// Exclude common text control characters: TAB (9), LF (10), CR (13).
|
||||
// We can also be a bit lenient with BEL (7) and BS (8) which sometimes appear in logs.
|
||||
if (charCode < 32) {
|
||||
if (
|
||||
charCode !== 9 && // TAB
|
||||
charCode !== 10 && // LF
|
||||
charCode !== 13 && // CR
|
||||
charCode !== 7 && // BEL (Bell) - sometimes in logs
|
||||
charCode !== 8 // BS (Backspace) - less common, but possible
|
||||
) {
|
||||
suspiciousCharCount++;
|
||||
}
|
||||
}
|
||||
// Characters from 32 (space) up to 126 (~) are printable ASCII.
|
||||
// Characters 127 (DEL) is a control character.
|
||||
// Characters >= 128 are extended ASCII / multi-byte Unicode.
|
||||
// If they resulted in U+FFFD, we caught it. Otherwise, they are valid
|
||||
// (though perhaps unusual) Unicode characters from JS's perspective.
|
||||
// The main concern is if those higher characters came from misinterpreting
|
||||
// a single-byte encoding as UTF-8, which again, U+FFFD would usually flag.
|
||||
}
|
||||
|
||||
// Check absolute null byte count
|
||||
if (nullByteCount > options.maxAbsoluteNullBytes) {
|
||||
return false; // Too many null bytes is a strong binary indicator
|
||||
}
|
||||
|
||||
// Check ratio of suspicious characters
|
||||
const ratio = suspiciousCharCount / sampleLength;
|
||||
return ratio <= options.suspiciousCharThresholdRatio;
|
||||
}
|
||||
|
||||
// WARN: vibe code below
|
||||
// Converts a Base64URL encoded SVG string to a PNG Data URL using browser Canvas API.
|
||||
function svgBase64UrlToPngDataURL(base64UrlSvg: string): Promise<string> {
|
||||
const backgroundColor = 'white'; // Default background color for PNG
|
||||
|
||||
return new Promise((resolve, reject) => {
|
||||
try {
|
||||
const img = new Image();
|
||||
|
||||
img.onload = () => {
|
||||
const canvas = document.createElement('canvas');
|
||||
const ctx = canvas.getContext('2d');
|
||||
|
||||
if (!ctx) {
|
||||
reject(new Error('Failed to get 2D canvas context.'));
|
||||
return;
|
||||
}
|
||||
|
||||
// Use provided dimensions or SVG's natural dimensions, with fallbacks
|
||||
// Fallbacks (e.g., 300x300) are for SVGs without explicit width/height
|
||||
// or when naturalWidth/Height might be 0 before full processing.
|
||||
const targetWidth = img.naturalWidth || 300;
|
||||
const targetHeight = img.naturalHeight || 300;
|
||||
|
||||
canvas.width = targetWidth;
|
||||
canvas.height = targetHeight;
|
||||
|
||||
if (backgroundColor) {
|
||||
ctx.fillStyle = backgroundColor;
|
||||
ctx.fillRect(0, 0, canvas.width, canvas.height);
|
||||
}
|
||||
|
||||
ctx.drawImage(img, 0, 0, targetWidth, targetHeight);
|
||||
resolve(canvas.toDataURL('image/png'));
|
||||
};
|
||||
|
||||
img.onerror = () => {
|
||||
reject(
|
||||
new Error('Failed to load SVG image. Ensure the SVG data is valid.')
|
||||
);
|
||||
};
|
||||
|
||||
// Load SVG string into an Image element
|
||||
img.src = base64UrlSvg;
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : String(error);
|
||||
const errorMessage = `Error converting SVG to PNG: ${message}`;
|
||||
toast.error(errorMessage);
|
||||
reject(new Error(errorMessage));
|
||||
}
|
||||
});
|
||||
}
|
||||
@@ -37,6 +37,7 @@ export interface ChatTextareaApi {
|
||||
setValue: (value: string) => void;
|
||||
focus: () => void;
|
||||
ref: React.RefObject<HTMLTextAreaElement>;
|
||||
refOnSubmit: React.MutableRefObject<(() => void) | null>; // Submit handler
|
||||
onInput: (event: React.FormEvent<HTMLTextAreaElement>) => void; // Input handler
|
||||
}
|
||||
|
||||
@@ -46,7 +47,7 @@ export interface ChatTextareaApi {
|
||||
export function useChatTextarea(initValue: string): ChatTextareaApi {
|
||||
const [savedInitValue, setSavedInitValue] = useState<string>(initValue);
|
||||
const textareaRef = useRef<HTMLTextAreaElement>(null);
|
||||
|
||||
const onSubmitRef = useRef<(() => void) | null>(null);
|
||||
// Effect to set initial value and height on mount or when initValue changes
|
||||
useEffect(() => {
|
||||
const textarea = textareaRef.current;
|
||||
@@ -91,6 +92,7 @@ export function useChatTextarea(initValue: string): ChatTextareaApi {
|
||||
}
|
||||
},
|
||||
ref: textareaRef,
|
||||
refOnSubmit: onSubmitRef,
|
||||
onInput: handleInput,
|
||||
};
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user