#!/usr/bin/env bash set -euo pipefail usage() { cat < /dev/null; then SUDO="sudo" else echo "Warning: Not running as root and sudo not found. Package installation may fail." echo "Please run as root or install sudo." fi fi if command -v conda &> /dev/null; then echo "Installing cmake via conda..." conda install -y cmake else echo "Warning: conda not found. Skipping cmake installation via conda." echo "Please install conda or manually install cmake." fi # Detect OS type if [ -f /etc/os-release ]; then . /etc/os-release OS=$ID elif [ -f /etc/debian_version ]; then OS="debian" elif [ -f /etc/redhat-release ]; then OS="rhel" else echo "Warning: Unable to detect OS type. Skipping dependency installation." return 0 fi # Install dependencies based on OS case "$OS" in debian|ubuntu|linuxmint|pop) echo "Detected Debian-based system. Installing libhwloc-dev and pkg-config..." $SUDO apt update $SUDO apt install -y libhwloc-dev pkg-config ;; fedora|rhel|centos|rocky|almalinux) echo "Detected Red Hat-based system. Installing hwloc-devel and pkgconfig..." $SUDO dnf install -y hwloc-devel pkgconfig || $SUDO yum install -y hwloc-devel pkgconfig ;; arch|manjaro) echo "Detected Arch-based system. Installing hwloc and pkgconf..." $SUDO pacman -S --noconfirm hwloc pkgconf ;; opensuse*|sles) echo "Detected openSUSE-based system. Installing hwloc-devel and pkg-config..." $SUDO zypper install -y hwloc-devel pkg-config ;; *) echo "Warning: Unsupported OS '$OS'. Please manually install libhwloc-dev and pkg-config." ;; esac } # Function to detect CPU features # Returns: "has_amx has_avx512_vnni has_avx512_bf16" (space-separated 0/1 values) detect_cpu_features() { local has_amx=0 local has_avx512_vnni=0 local has_avx512_bf16=0 if [ -f /proc/cpuinfo ]; then local cpu_flags cpu_flags=$(grep -m1 "^flags" /proc/cpuinfo | tr ' ' '\n') # Check for AMX support on Linux if echo "$cpu_flags" | grep -qE "amx_tile|amx_int8|amx_bf16"; then has_amx=1 fi # Check for AVX512_VNNI support if echo "$cpu_flags" | grep -qE "avx512_vnni|avx512vnni"; then has_avx512_vnni=1 fi # Check for AVX512_BF16 support if echo "$cpu_flags" | grep -qE "avx512_bf16|avx512bf16"; then has_avx512_bf16=1 fi elif [ "$(uname)" = "Darwin" ]; then # macOS doesn't have AMX (ARM or Intel without AMX) has_amx=0 has_avx512_vnni=0 has_avx512_bf16=0 fi echo "$has_amx $has_avx512_vnni $has_avx512_bf16" } build_step() { # Parse build-only flags from arguments to this function local MANUAL_MODE=0 local CLEAN_BUILD=1 while [[ $# -gt 0 ]]; do case "$1" in --manual) MANUAL_MODE=1; shift ;; --no-clean) CLEAN_BUILD=0; shift ;; -h|--help) usage ;; *) break ;; esac done # Clean local build directory to ensure a fresh CMake/configure local REPO_ROOT REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" if [[ "$CLEAN_BUILD" -eq 1 ]]; then if [[ -d "$REPO_ROOT/build" ]]; then echo "Cleaning previous build directory: $REPO_ROOT/build" rm -rf "$REPO_ROOT/build" fi else echo "Skipping clean of $REPO_ROOT/build (requested by --no-clean)" fi if [ "$MANUAL_MODE" = "0" ]; then # Auto-detection mode echo "==========================================" echo "Auto-detecting CPU capabilities..." echo "==========================================" echo "" # detect_cpu_features returns "has_amx has_avx512_vnni has_avx512_bf16" CPU_FEATURES=$(detect_cpu_features) HAS_AMX=$(echo "$CPU_FEATURES" | cut -d' ' -f1) HAS_AVX512_VNNI=$(echo "$CPU_FEATURES" | cut -d' ' -f2) HAS_AVX512_BF16=$(echo "$CPU_FEATURES" | cut -d' ' -f3) export CPUINFER_CPU_INSTRUCT=NATIVE if [ "$HAS_AMX" = "1" ]; then echo "✓ AMX instructions detected" export CPUINFER_ENABLE_AMX=ON echo "" echo "Configuration: NATIVE + AMX=ON" echo " ✓ Best performance on this machine" echo " ✗ Binary requires Sapphire Rapids or newer CPU" else echo "ℹ AMX instructions not detected" export CPUINFER_ENABLE_AMX=OFF echo "" echo "Configuration: NATIVE + AMX=OFF" echo " ✓ Using AVX512/AVX2 instructions" fi echo "" echo " ⚠️ IMPORTANT: This binary is optimized for THIS CPU only" echo " To build portable binaries for distribution, use:" echo " export CPUINFER_CPU_INSTRUCT=AVX512 # or AVX2" echo " export CPUINFER_ENABLE_AMX=OFF" echo " ./install.sh build --manual" # Fine-grained AVX512 subset detection (with fallback support) echo "" echo "AVX512 Feature Detection:" # VNNI: Check if user manually set it, otherwise auto-detect if [ -n "${CPUINFER_ENABLE_AVX512_VNNI:-}" ]; then echo " VNNI: User override = $CPUINFER_ENABLE_AVX512_VNNI" else if [ "$HAS_AVX512_VNNI" = "1" ]; then echo " VNNI: ✓ Detected (hardware acceleration enabled)" export CPUINFER_ENABLE_AVX512_VNNI=ON else echo " VNNI: ✗ Not detected (will use software fallback, 2-3x slower)" export CPUINFER_ENABLE_AVX512_VNNI=OFF fi fi # BF16: Check if user manually set it, otherwise auto-detect if [ -n "${CPUINFER_ENABLE_AVX512_BF16:-}" ]; then echo " BF16: User override = $CPUINFER_ENABLE_AVX512_BF16" else if [ "$HAS_AVX512_BF16" = "1" ]; then echo " BF16: ✓ Detected (hardware acceleration enabled)" export CPUINFER_ENABLE_AVX512_BF16=ON else echo " BF16: ✗ Not detected (will use software fallback, 5-10x slower)" export CPUINFER_ENABLE_AVX512_BF16=OFF fi fi echo "" echo " Note: Software fallbacks ensure all code works on older CPUs" echo " Tip: Override with CPUINFER_ENABLE_AVX512_VNNI/BF16=ON/OFF" echo "" echo "To use manual configuration instead, run: $0 build --manual" echo "" else # Manual mode - validate user configuration (no exports) if [ -z "$CPUINFER_CPU_INSTRUCT" ] || [ -z "$CPUINFER_ENABLE_AMX" ]; then echo "Error: Manual mode requires CPUINFER_CPU_INSTRUCT and CPUINFER_ENABLE_AMX to be set." echo "" usage fi # Validate CPUINFER_CPU_INSTRUCT case "$CPUINFER_CPU_INSTRUCT" in NATIVE|FANCY|AVX512|AVX2) ;; *) echo "Error: Invalid CPUINFER_CPU_INSTRUCT='$CPUINFER_CPU_INSTRUCT'" echo "Must be one of: NATIVE, FANCY, AVX512, AVX2" exit 1 ;; esac # Validate CPUINFER_ENABLE_AMX case "$CPUINFER_ENABLE_AMX" in ON|OFF) ;; *) echo "Error: Invalid CPUINFER_ENABLE_AMX='$CPUINFER_ENABLE_AMX'" echo "Must be either: ON or OFF" exit 1 ;; esac # Warn about problematic configuration if [ "$CPUINFER_CPU_INSTRUCT" = "NATIVE" ] && [ "$CPUINFER_ENABLE_AMX" = "OFF" ]; then CPU_FEATURES=$(detect_cpu_features) HAS_AMX=$(echo "$CPU_FEATURES" | cut -d' ' -f1) if [ "$HAS_AMX" = "1" ]; then echo "==========================================" echo "⚠️ WARNING: Risky Configuration" echo "==========================================" echo "" echo "Your configuration:" echo " CPUINFER_CPU_INSTRUCT = NATIVE" echo " CPUINFER_ENABLE_AMX = OFF" echo "" echo "Your CPU HAS AMX support!" echo "" echo "Problem:" echo " • NATIVE uses -march=native which auto-enables ALL CPU features" echo " • This may IGNORE your AMX=OFF setting" echo " • The binary may still contain AMX instructions" echo "" echo "Recommended fixes:" echo " 1) For portable build (recommended for distribution):" echo " export CPUINFER_CPU_INSTRUCT=AVX512" echo "" echo " 2) If you want best performance on this CPU:" echo " export CPUINFER_ENABLE_AMX=ON" echo "" read -p "Continue with risky configuration? (y/N) " -n 1 -r echo if [[ ! $REPLY =~ ^[Yy]$ ]]; then exit 1 fi fi fi # Close MANUAL_MODE conditional fi # Set defaults for optional variables export CPUINFER_BUILD_TYPE=${CPUINFER_BUILD_TYPE:-Release} export CPUINFER_PARALLEL=${CPUINFER_PARALLEL:-8} export CPUINFER_VERBOSE=${CPUINFER_VERBOSE:-1} echo "==========================================" echo "Building kt-kernel with configuration:" echo "==========================================" echo " CPUINFER_CPU_INSTRUCT = $CPUINFER_CPU_INSTRUCT" echo " CPUINFER_ENABLE_AMX = $CPUINFER_ENABLE_AMX" echo " CPUINFER_ENABLE_AVX512_VNNI = ${CPUINFER_ENABLE_AVX512_VNNI:-AUTO}" echo " CPUINFER_ENABLE_AVX512_BF16 = ${CPUINFER_ENABLE_AVX512_BF16:-AUTO}" echo " CPUINFER_BUILD_TYPE = $CPUINFER_BUILD_TYPE" echo " CPUINFER_PARALLEL = $CPUINFER_PARALLEL" echo "" pip install . -v } # Subcommand dispatcher: default to "all" SUBCMD="all" if [[ $# -gt 0 ]]; then case "$1" in deps|build|all) SUBCMD="$1"; shift ;; -h|--help) usage ;; *) SUBCMD="build" ;; # backward compatibility: flags-only => build esac fi case "$SUBCMD" in deps) install_dependencies ;; build) build_step "$@" ;; all) install_dependencies build_step "$@" ;; esac