mirror of
https://github.com/kvcache-ai/ktransformers.git
synced 2026-04-20 14:29:22 +00:00
[feat](cmake & doc): fix bug with cmake arch detect & update doc for sft
This commit is contained in:
@@ -11,6 +11,8 @@
|
||||
- [Multi-GPU Tutorial](en/multi-gpu-tutorial.md)
|
||||
- [Use FP8 GPU Kernel](en/fp8_kernel.md)
|
||||
- [Use AMD GPU](en/ROCm.md)
|
||||
- [SFT user guide](en/KTransformers-Fine-Tuning_User-Guide.md)
|
||||
- [SFT developer tech notes](en/KTransformers-Fine-Tuning_Developer-Technical-Notes.md)
|
||||
# Server
|
||||
- [Server](en/api/server/server.md)
|
||||
- [Website](en/api/server/website.md)
|
||||
|
||||
@@ -108,7 +108,6 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||
# Use header-only fmt to avoid needing to link libfmt (fix undefined symbol vprint)
|
||||
add_compile_definitions(FMT_HEADER_ONLY)
|
||||
|
||||
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -ffast-math")
|
||||
set(CMAKE_BUILD_TYPE "Release")
|
||||
# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -fsanitize=address -fno-omit-frame-pointer")
|
||||
@@ -116,6 +115,7 @@ set(CMAKE_BUILD_TYPE "Release")
|
||||
# set(CMAKE_BUILD_TYPE "Debug")
|
||||
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
|
||||
find_package(OpenMP REQUIRED)
|
||||
message(STATUS "CMAKE_CXX_FLAGS: ${CMAKE_CXX_FLAGS}")
|
||||
|
||||
|
||||
include(CheckCXXCompilerFlag)
|
||||
@@ -255,7 +255,7 @@ elseif(CMAKE_OSX_ARCHITECTURES STREQUAL "x86_64" OR CMAKE_GENERATOR_PLATFORM_LWR
|
||||
list(APPEND ARCH_FLAGS -mavx2 -mfma -msse3 -mf16c)
|
||||
endif()
|
||||
if(LLAMA_AVX512)
|
||||
list(APPEND ARCH_FLAGS -mavx512f -mavx512bw -mfma -mf16c)
|
||||
list(APPEND ARCH_FLAGS -mavx512f -mavx512bw -mfma -mf16c -msse3)
|
||||
endif()
|
||||
if(LLAMA_AVX512_VBMI)
|
||||
list(APPEND ARCH_FLAGS -mavx512vbmi)
|
||||
@@ -349,7 +349,6 @@ if(HOST_IS_X86)
|
||||
add_compile_definitions(HAVE_AMX=1)
|
||||
list(APPEND ARCH_FLAGS -mamx-tile -mamx-bf16 -mamx-int8)
|
||||
message(STATUS "AMX enabled")
|
||||
list(APPEND ARCH_FLAGS -mamx-tile)
|
||||
endif()
|
||||
# add_executable(amx-test ${CMAKE_CURRENT_SOURCE_DIR}/operators/amx/amx-test.cpp)
|
||||
# target_link_libraries(amx-test llama)
|
||||
@@ -366,7 +365,7 @@ if(HOST_IS_X86)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
|
||||
message(STATUS "ARCH_FLAGS: ${ARCH_FLAGS}")
|
||||
|
||||
add_compile_options("$<$<COMPILE_LANGUAGE:CXX>:${ARCH_FLAGS}>")
|
||||
add_compile_options("$<$<COMPILE_LANGUAGE:C>:${ARCH_FLAGS}>")
|
||||
@@ -611,6 +610,4 @@ else()
|
||||
message(FATAL_ERROR "NUMA library not found, please install NUMA, sudo apt install libnuma-dev")
|
||||
endif()
|
||||
|
||||
message(STATUS "CMAKE_CXX_FLAGS: ${CMAKE_CXX_FLAGS}")
|
||||
message(STATUS "ARCH_FLAGS: ${ARCH_FLAGS}")
|
||||
|
||||
|
||||
@@ -223,38 +223,36 @@ class CMakeBuild(build_ext):
|
||||
|
||||
# CPU feature flags mapping: if user specified CPUINFER_CPU_INSTRUCT, honor it;
|
||||
# else auto-pick based on detection (x86 only)
|
||||
if os.environ.get("CPUINFER_CPU_INSTRUCT"):
|
||||
cmake_args += cpu_feature_flags()
|
||||
else:
|
||||
d = self.detect_cpu_info()
|
||||
print(f"Detected CPU info: {d}")
|
||||
cmake_args += cpu_feature_flags()
|
||||
d = self.detect_cpu_info()
|
||||
print(f"Detected CPU info: {d}")
|
||||
|
||||
# Vendor / feature specific toggles
|
||||
# Enable AMD MoE kernel on AMD by default unless user explicitly set CPUINFER_ENABLE_AMD
|
||||
if d.get("vendor") == "amd" and os.environ.get("CPUINFER_ENABLE_AMD") is None:
|
||||
cmake_args.append("-DKTRANSFORMERS_CPU_MOE_AMD=ON")
|
||||
print("-- Detected AMD CPU; enabling AMD MoE kernel (-DKTRANSFORMERS_CPU_MOE_AMD=ON)")
|
||||
# Vendor / feature specific toggles
|
||||
# Enable AMD MoE kernel on AMD by default unless user explicitly set CPUINFER_ENABLE_AMD
|
||||
if d.get("vendor") == "amd" and os.environ.get("CPUINFER_ENABLE_AMD") is None:
|
||||
cmake_args.append("-DKTRANSFORMERS_CPU_MOE_AMD=ON")
|
||||
print("-- Detected AMD CPU; enabling AMD MoE kernel (-DKTRANSFORMERS_CPU_MOE_AMD=ON)")
|
||||
|
||||
# On ARM, enable KML by default if not explicitly toggled
|
||||
if d.get("vendor") == "arm" and os.environ.get("CPUINFER_ENABLE_KML") is None:
|
||||
cmake_args.append("-DKTRANSFORMERS_CPU_USE_KML=ON")
|
||||
print("-- Detected ARM CPU; enabling KML (-DKTRANSFORMERS_CPU_USE_KML=ON)")
|
||||
# On ARM, enable KML by default if not explicitly toggled
|
||||
if d.get("vendor") == "arm" and os.environ.get("CPUINFER_ENABLE_KML") is None:
|
||||
cmake_args.append("-DKTRANSFORMERS_CPU_USE_KML=ON")
|
||||
print("-- Detected ARM CPU; enabling KML (-DKTRANSFORMERS_CPU_USE_KML=ON)")
|
||||
|
||||
# If AMX or AVX512 present, enable umbrella unless overridden; enable AMX specifically when present
|
||||
if "AMX" in d["features"]:
|
||||
if os.environ.get("CPUINFER_ENABLE_AMX") is None:
|
||||
cmake_args.append("-DKTRANSFORMERS_CPU_USE_AMX=ON")
|
||||
print("-- AMX support detected; enabling (-DKTRANSFORMERS_CPU_USE_AMX=ON)")
|
||||
if ("AMX" in d["features"] or "AVX512" in d["features"]) and os.environ.get(
|
||||
"CPUINFER_ENABLE_AVX512"
|
||||
) is None:
|
||||
cmake_args.append("-DKTRANSFORMERS_CPU_USE_AMX_AVX512=ON")
|
||||
print("-- Enabling AMX/AVX512 umbrella (-DKTRANSFORMERS_CPU_USE_AMX_AVX512=ON)")
|
||||
# If AMX or AVX512 present, enable umbrella unless overridden; enable AMX specifically when present
|
||||
if "AMX" in d["features"]:
|
||||
if os.environ.get("CPUINFER_ENABLE_AMX") is None:
|
||||
cmake_args.append("-DKTRANSFORMERS_CPU_USE_AMX=ON")
|
||||
print("-- AMX support detected; enabling (-DKTRANSFORMERS_CPU_USE_AMX=ON)")
|
||||
if ("AMX" in d["features"] or "AVX512" in d["features"]) and os.environ.get(
|
||||
"CPUINFER_ENABLE_AVX512"
|
||||
) is None:
|
||||
cmake_args.append("-DKTRANSFORMERS_CPU_USE_AMX_AVX512=ON")
|
||||
print("-- Enabling AMX/AVX512 umbrella (-DKTRANSFORMERS_CPU_USE_AMX_AVX512=ON)")
|
||||
|
||||
# Friendly summary
|
||||
print(
|
||||
f"-- CPU detection: vendor={d.get('vendor')} arch={d.get('arch')} features={sorted(list(d.get('features', [])))}"
|
||||
)
|
||||
# Friendly summary
|
||||
print(
|
||||
f"-- CPU detection: vendor={d.get('vendor')} arch={d.get('arch')} features={sorted(list(d.get('features', [])))}"
|
||||
)
|
||||
|
||||
# Optional AMX / MLA toggles (explicit env overrides auto detection above)
|
||||
if os.environ.get("CPUINFER_ENABLE_AMX"):
|
||||
|
||||
Reference in New Issue
Block a user