[feat](cmake & doc): fix bug with cmake arch detect & update doc for sft

This commit is contained in:
KMSorSMS
2025-11-04 08:46:26 +00:00
parent e40ba6dfae
commit 0c15da437f
7 changed files with 31 additions and 34 deletions

View File

@@ -11,6 +11,8 @@
- [Multi-GPU Tutorial](en/multi-gpu-tutorial.md)
- [Use FP8 GPU Kernel](en/fp8_kernel.md)
- [Use AMD GPU](en/ROCm.md)
- [SFT user guide](en/KTransformers-Fine-Tuning_User-Guide.md)
- [SFT developer tech notes](en/KTransformers-Fine-Tuning_Developer-Technical-Notes.md)
# Server
- [Server](en/api/server/server.md)
- [Website](en/api/server/website.md)

View File

@@ -108,7 +108,6 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON)
# Use header-only fmt to avoid needing to link libfmt (fix undefined symbol vprint)
add_compile_definitions(FMT_HEADER_ONLY)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -ffast-math")
set(CMAKE_BUILD_TYPE "Release")
# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -fsanitize=address -fno-omit-frame-pointer")
@@ -116,6 +115,7 @@ set(CMAKE_BUILD_TYPE "Release")
# set(CMAKE_BUILD_TYPE "Debug")
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
find_package(OpenMP REQUIRED)
message(STATUS "CMAKE_CXX_FLAGS: ${CMAKE_CXX_FLAGS}")
include(CheckCXXCompilerFlag)
@@ -255,7 +255,7 @@ elseif(CMAKE_OSX_ARCHITECTURES STREQUAL "x86_64" OR CMAKE_GENERATOR_PLATFORM_LWR
list(APPEND ARCH_FLAGS -mavx2 -mfma -msse3 -mf16c)
endif()
if(LLAMA_AVX512)
list(APPEND ARCH_FLAGS -mavx512f -mavx512bw -mfma -mf16c)
list(APPEND ARCH_FLAGS -mavx512f -mavx512bw -mfma -mf16c -msse3)
endif()
if(LLAMA_AVX512_VBMI)
list(APPEND ARCH_FLAGS -mavx512vbmi)
@@ -349,7 +349,6 @@ if(HOST_IS_X86)
add_compile_definitions(HAVE_AMX=1)
list(APPEND ARCH_FLAGS -mamx-tile -mamx-bf16 -mamx-int8)
message(STATUS "AMX enabled")
list(APPEND ARCH_FLAGS -mamx-tile)
endif()
# add_executable(amx-test ${CMAKE_CURRENT_SOURCE_DIR}/operators/amx/amx-test.cpp)
# target_link_libraries(amx-test llama)
@@ -366,7 +365,7 @@ if(HOST_IS_X86)
endif()
endif()
message(STATUS "ARCH_FLAGS: ${ARCH_FLAGS}")
add_compile_options("$<$<COMPILE_LANGUAGE:CXX>:${ARCH_FLAGS}>")
add_compile_options("$<$<COMPILE_LANGUAGE:C>:${ARCH_FLAGS}>")
@@ -611,6 +610,4 @@ else()
message(FATAL_ERROR "NUMA library not found, please install NUMA, sudo apt install libnuma-dev")
endif()
message(STATUS "CMAKE_CXX_FLAGS: ${CMAKE_CXX_FLAGS}")
message(STATUS "ARCH_FLAGS: ${ARCH_FLAGS}")

View File

@@ -223,38 +223,36 @@ class CMakeBuild(build_ext):
# CPU feature flags mapping: if user specified CPUINFER_CPU_INSTRUCT, honor it;
# else auto-pick based on detection (x86 only)
if os.environ.get("CPUINFER_CPU_INSTRUCT"):
cmake_args += cpu_feature_flags()
else:
d = self.detect_cpu_info()
print(f"Detected CPU info: {d}")
cmake_args += cpu_feature_flags()
d = self.detect_cpu_info()
print(f"Detected CPU info: {d}")
# Vendor / feature specific toggles
# Enable AMD MoE kernel on AMD by default unless user explicitly set CPUINFER_ENABLE_AMD
if d.get("vendor") == "amd" and os.environ.get("CPUINFER_ENABLE_AMD") is None:
cmake_args.append("-DKTRANSFORMERS_CPU_MOE_AMD=ON")
print("-- Detected AMD CPU; enabling AMD MoE kernel (-DKTRANSFORMERS_CPU_MOE_AMD=ON)")
# Vendor / feature specific toggles
# Enable AMD MoE kernel on AMD by default unless user explicitly set CPUINFER_ENABLE_AMD
if d.get("vendor") == "amd" and os.environ.get("CPUINFER_ENABLE_AMD") is None:
cmake_args.append("-DKTRANSFORMERS_CPU_MOE_AMD=ON")
print("-- Detected AMD CPU; enabling AMD MoE kernel (-DKTRANSFORMERS_CPU_MOE_AMD=ON)")
# On ARM, enable KML by default if not explicitly toggled
if d.get("vendor") == "arm" and os.environ.get("CPUINFER_ENABLE_KML") is None:
cmake_args.append("-DKTRANSFORMERS_CPU_USE_KML=ON")
print("-- Detected ARM CPU; enabling KML (-DKTRANSFORMERS_CPU_USE_KML=ON)")
# On ARM, enable KML by default if not explicitly toggled
if d.get("vendor") == "arm" and os.environ.get("CPUINFER_ENABLE_KML") is None:
cmake_args.append("-DKTRANSFORMERS_CPU_USE_KML=ON")
print("-- Detected ARM CPU; enabling KML (-DKTRANSFORMERS_CPU_USE_KML=ON)")
# If AMX or AVX512 present, enable umbrella unless overridden; enable AMX specifically when present
if "AMX" in d["features"]:
if os.environ.get("CPUINFER_ENABLE_AMX") is None:
cmake_args.append("-DKTRANSFORMERS_CPU_USE_AMX=ON")
print("-- AMX support detected; enabling (-DKTRANSFORMERS_CPU_USE_AMX=ON)")
if ("AMX" in d["features"] or "AVX512" in d["features"]) and os.environ.get(
"CPUINFER_ENABLE_AVX512"
) is None:
cmake_args.append("-DKTRANSFORMERS_CPU_USE_AMX_AVX512=ON")
print("-- Enabling AMX/AVX512 umbrella (-DKTRANSFORMERS_CPU_USE_AMX_AVX512=ON)")
# If AMX or AVX512 present, enable umbrella unless overridden; enable AMX specifically when present
if "AMX" in d["features"]:
if os.environ.get("CPUINFER_ENABLE_AMX") is None:
cmake_args.append("-DKTRANSFORMERS_CPU_USE_AMX=ON")
print("-- AMX support detected; enabling (-DKTRANSFORMERS_CPU_USE_AMX=ON)")
if ("AMX" in d["features"] or "AVX512" in d["features"]) and os.environ.get(
"CPUINFER_ENABLE_AVX512"
) is None:
cmake_args.append("-DKTRANSFORMERS_CPU_USE_AMX_AVX512=ON")
print("-- Enabling AMX/AVX512 umbrella (-DKTRANSFORMERS_CPU_USE_AMX_AVX512=ON)")
# Friendly summary
print(
f"-- CPU detection: vendor={d.get('vendor')} arch={d.get('arch')} features={sorted(list(d.get('features', [])))}"
)
# Friendly summary
print(
f"-- CPU detection: vendor={d.get('vendor')} arch={d.get('arch')} features={sorted(list(d.get('features', [])))}"
)
# Optional AMX / MLA toggles (explicit env overrides auto detection above)
if os.environ.get("CPUINFER_ENABLE_AMX"):