mirror of
https://github.com/turboderp-org/exllamav3.git
synced 2026-05-12 00:41:38 +00:00
17 lines
827 B
JSON
17 lines
827 B
JSON
{
|
|
"model": "/mnt/str/models/qwen3.5-27b/exl3/4.00bpw",
|
|
"stream": true,
|
|
"messages": [
|
|
{
|
|
"role": "system",
|
|
"content": "Given the following conversation, relevant context, and a follow up question, reply with an answer to the current question the user is asking. Return only your response to the question given the above information following the users instructions as needed."
|
|
},
|
|
{
|
|
"role": "user",
|
|
"content": "Write a basic CUDA GEMM kernel (using tensor cores) for use in a gated FFN function. It needs to take a fused gate+up tensor and end in an epilog that runs a SiLU activation function on the 'gate' slice of the output and multiplies it elementwise with the 'up' slice."
|
|
}
|
|
],
|
|
"temperature": 0.7,
|
|
"max_tokens": 4096
|
|
}
|