mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-05-11 00:20:19 +00:00
Fix Qwen3 content extraction breaking code formatting (#661)
Problem: - qwen3::extract_content_during_parsing() used aggressive regex to collapse multiple newlines - This broke proper code formatting (e.g., PEP 8's 2 empty lines between functions) - Affected non-tool-call streaming output where formatting is critical Solution: - Replace aggressive std::regex_replace(R"(\n\s*\n)", "\n") with gentle string_strip() - Follow original llama.cpp patterns: only trim leading/trailing whitespace - Preserve internal formatting including multiple newlines - Add proper include for common.h to access string_strip function Changes: - examples/server/parsers/qwen3_parser.hpp: Replace whitespace cleanup with string_strip() - tests/test-function-calls.cpp: Add test_qwen3_whitespace_preservation() to prevent regression Testing: - ✅ PEP 8 compliance: 2 empty lines between functions preserved - ✅ Tool call parsing: All Qwen3 tests continue to pass - ✅ No regressions: Existing functionality maintained - ✅ Follows original llama.cpp whitespace handling patterns
This commit is contained in:
committed by
GitHub
parent
f4051d9c3e
commit
05a61510b9
@@ -1,6 +1,7 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include "json.hpp"
|
#include "json.hpp"
|
||||||
|
#include "../../common/common.h"
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <regex>
|
#include <regex>
|
||||||
|
|
||||||
@@ -102,12 +103,8 @@ static std::string extract_content_during_parsing(const std::string& text, bool
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Clean up extra whitespace
|
// Only trim leading/trailing whitespace, preserve internal formatting
|
||||||
content = std::regex_replace(content, std::regex(R"(\n\s*\n)"), "\n");
|
content = string_strip(content);
|
||||||
|
|
||||||
// Trim leading/trailing whitespace
|
|
||||||
content.erase(0, content.find_first_not_of(" \t\n\r"));
|
|
||||||
content.erase(content.find_last_not_of(" \t\n\r") + 1);
|
|
||||||
|
|
||||||
} catch (const std::exception&) {
|
} catch (const std::exception&) {
|
||||||
// Return original text on regex errors
|
// Return original text on regex errors
|
||||||
|
|||||||
@@ -2237,6 +2237,40 @@ void test_xml_tool_call_parsing() {
|
|||||||
std::cout << " ✅ XML tool call parsing works correctly!" << std::endl;
|
std::cout << " ✅ XML tool call parsing works correctly!" << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Test whitespace preservation in qwen3 content extraction
|
||||||
|
void test_qwen3_whitespace_preservation() {
|
||||||
|
std::cout << "\n🧹 Testing Qwen3 Whitespace Preservation Fix:" << std::endl;
|
||||||
|
|
||||||
|
// Test case with PEP 8 style: 2 empty lines between functions
|
||||||
|
const std::string pep8_content = R"(def celsius_to_fahrenheit(celsius):
|
||||||
|
return celsius * 9/5 + 32
|
||||||
|
|
||||||
|
|
||||||
|
def fahrenheit_to_celsius(fahrenheit):
|
||||||
|
return (fahrenheit - 32) * 5/9)";
|
||||||
|
|
||||||
|
std::cout << "🎯 Testing PEP 8 compliance (2 empty lines between functions)..." << std::endl;
|
||||||
|
std::cout << "Original content has: 2 empty lines between functions" << std::endl;
|
||||||
|
|
||||||
|
// Test the qwen3 content extraction directly
|
||||||
|
std::string result = qwen3::extract_content_during_parsing(pep8_content, false);
|
||||||
|
|
||||||
|
// Check if the double newlines are preserved (should have \n\n\n for 2 empty lines)
|
||||||
|
bool has_double_empty_lines = result.find("\n\n\n") != std::string::npos;
|
||||||
|
|
||||||
|
std::cout << "Result content: '" << result << "'" << std::endl;
|
||||||
|
std::cout << "Has 2 empty lines preserved: " << (has_double_empty_lines ? "YES" : "NO") << std::endl;
|
||||||
|
|
||||||
|
test_assert(has_double_empty_lines, "Qwen3: PEP 8 double empty lines preserved");
|
||||||
|
|
||||||
|
// Additional test: ensure no excessive trimming
|
||||||
|
test_assert(!result.empty(), "Qwen3: Content not empty after processing");
|
||||||
|
test_assert(result.find("celsius_to_fahrenheit") != std::string::npos, "Qwen3: Function content preserved");
|
||||||
|
test_assert(result.find("fahrenheit_to_celsius") != std::string::npos, "Qwen3: Second function preserved");
|
||||||
|
|
||||||
|
std::cout << " ✅ Qwen3 whitespace preservation working correctly!" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
// Test the streaming tool calls fix implementation
|
// Test the streaming tool calls fix implementation
|
||||||
void test_streaming_tool_calls_fix() {
|
void test_streaming_tool_calls_fix() {
|
||||||
std::cout << "\n=== Streaming Tool Calls Fix Validation ===" << std::endl;
|
std::cout << "\n=== Streaming Tool Calls Fix Validation ===" << std::endl;
|
||||||
@@ -2797,6 +2831,7 @@ int main() {
|
|||||||
test_content_cleaning();
|
test_content_cleaning();
|
||||||
test_contamination_reproduction(); // Added this test
|
test_contamination_reproduction(); // Added this test
|
||||||
test_mixed_formats();
|
test_mixed_formats();
|
||||||
|
test_qwen3_whitespace_preservation(); // Test whitespace fix
|
||||||
|
|
||||||
std::cout << "\n🌍 Unicode & International Tests:" << std::endl;
|
std::cout << "\n🌍 Unicode & International Tests:" << std::endl;
|
||||||
test_unicode_support();
|
test_unicode_support();
|
||||||
|
|||||||
Reference in New Issue
Block a user