improve installation experience (#666)

This commit is contained in:
TerminalMan
2024-11-02 20:11:14 +00:00
committed by GitHub
parent 84b1f9017d
commit d92ff8d9e4
3 changed files with 93 additions and 76 deletions

View File

@@ -1,3 +1,4 @@
recursive-include exllamav2 *
global-include *.typed
global-exclude *.pyc
global-exclude dni_*

0
exllamav2/py.typed Normal file
View File

168
setup.py
View File

@@ -1,15 +1,23 @@
from setuptools import setup, Extension
from torch.utils import cpp_extension
from torch import version as torch_version
import importlib.util
import os
if torch := importlib.util.find_spec("torch") is not None:
from torch.utils import cpp_extension
from torch import version as torch_version
extension_name = "exllamav2_ext"
verbose = False
ext_debug = False
precompile = "EXLLAMA_NOCOMPILE" not in os.environ
verbose = "EXLLAMA_VERBOSE" in os.environ
ext_debug = "EXLLAMA_EXT_DEBUG" in os.environ
precompile = 'EXLLAMA_NOCOMPILE' not in os.environ
if precompile and not torch:
print(
"cannot precompile unless torch is installed \
To explicitly JIT install run EXLLAMA_NOCOMPILE= pip install <xyz>"
)
windows = (os.name == "nt")
windows = os.name == "nt"
extra_cflags = ["/Ox"] if windows else ["-O3"]
@@ -18,7 +26,7 @@ if ext_debug:
extra_cuda_cflags = ["-lineinfo", "-O3"]
if torch_version.hip:
if torch and torch_version.hip:
extra_cuda_cflags += ["-DHIPBLAS_USE_HIP_HALF"]
extra_compile_args = {
@@ -26,67 +34,72 @@ extra_compile_args = {
"nvcc": extra_cuda_cflags,
}
setup_kwargs = {
"ext_modules": [
cpp_extension.CUDAExtension(
extension_name,
[
"exllamav2/exllamav2_ext/ext_bindings.cpp",
"exllamav2/exllamav2_ext/ext_cache.cpp",
"exllamav2/exllamav2_ext/ext_gemm.cpp",
"exllamav2/exllamav2_ext/ext_hadamard.cpp",
"exllamav2/exllamav2_ext/ext_norm.cpp",
"exllamav2/exllamav2_ext/ext_qattn.cpp",
"exllamav2/exllamav2_ext/ext_qmatrix.cpp",
"exllamav2/exllamav2_ext/ext_qmlp.cpp",
"exllamav2/exllamav2_ext/ext_quant.cpp",
"exllamav2/exllamav2_ext/ext_rope.cpp",
"exllamav2/exllamav2_ext/ext_stloader.cpp",
"exllamav2/exllamav2_ext/ext_sampling.cpp",
"exllamav2/exllamav2_ext/ext_element.cpp",
"exllamav2/exllamav2_ext/ext_tp.cpp",
"exllamav2/exllamav2_ext/cuda/graph.cu",
"exllamav2/exllamav2_ext/cuda/h_add.cu",
"exllamav2/exllamav2_ext/cuda/h_gemm.cu",
"exllamav2/exllamav2_ext/cuda/lora.cu",
"exllamav2/exllamav2_ext/cuda/pack_tensor.cu",
"exllamav2/exllamav2_ext/cuda/quantize.cu",
"exllamav2/exllamav2_ext/cuda/q_matrix.cu",
"exllamav2/exllamav2_ext/cuda/q_attn.cu",
"exllamav2/exllamav2_ext/cuda/q_mlp.cu",
"exllamav2/exllamav2_ext/cuda/q_gemm.cu",
"exllamav2/exllamav2_ext/cuda/rms_norm.cu",
"exllamav2/exllamav2_ext/cuda/head_norm.cu",
"exllamav2/exllamav2_ext/cuda/layer_norm.cu",
"exllamav2/exllamav2_ext/cuda/rope.cu",
"exllamav2/exllamav2_ext/cuda/cache.cu",
"exllamav2/exllamav2_ext/cuda/util.cu",
"exllamav2/exllamav2_ext/cuda/softcap.cu",
"exllamav2/exllamav2_ext/cuda/tp.cu",
"exllamav2/exllamav2_ext/cuda/comp_units/kernel_select.cu",
"exllamav2/exllamav2_ext/cuda/comp_units/unit_gptq_1.cu",
"exllamav2/exllamav2_ext/cuda/comp_units/unit_gptq_2.cu",
"exllamav2/exllamav2_ext/cuda/comp_units/unit_gptq_3.cu",
"exllamav2/exllamav2_ext/cuda/comp_units/unit_exl2_1a.cu",
"exllamav2/exllamav2_ext/cuda/comp_units/unit_exl2_1b.cu",
"exllamav2/exllamav2_ext/cuda/comp_units/unit_exl2_2a.cu",
"exllamav2/exllamav2_ext/cuda/comp_units/unit_exl2_2b.cu",
"exllamav2/exllamav2_ext/cuda/comp_units/unit_exl2_3a.cu",
"exllamav2/exllamav2_ext/cuda/comp_units/unit_exl2_3b.cu",
"exllamav2/exllamav2_ext/cpp/quantize_func.cpp",
"exllamav2/exllamav2_ext/cpp/profiling.cpp",
"exllamav2/exllamav2_ext/cpp/generator.cpp",
"exllamav2/exllamav2_ext/cpp/sampling.cpp",
"exllamav2/exllamav2_ext/cpp/sampling_avx2.cpp",
],
extra_compile_args=extra_compile_args,
libraries=["cublas"] if windows else [],
)],
"cmdclass": {"build_ext": cpp_extension.BuildExtension}
} if precompile else {}
setup_kwargs = (
{
"ext_modules": [
cpp_extension.CUDAExtension(
extension_name,
[
"exllamav2/exllamav2_ext/ext_bindings.cpp",
"exllamav2/exllamav2_ext/ext_cache.cpp",
"exllamav2/exllamav2_ext/ext_gemm.cpp",
"exllamav2/exllamav2_ext/ext_hadamard.cpp",
"exllamav2/exllamav2_ext/ext_norm.cpp",
"exllamav2/exllamav2_ext/ext_qattn.cpp",
"exllamav2/exllamav2_ext/ext_qmatrix.cpp",
"exllamav2/exllamav2_ext/ext_qmlp.cpp",
"exllamav2/exllamav2_ext/ext_quant.cpp",
"exllamav2/exllamav2_ext/ext_rope.cpp",
"exllamav2/exllamav2_ext/ext_stloader.cpp",
"exllamav2/exllamav2_ext/ext_sampling.cpp",
"exllamav2/exllamav2_ext/ext_element.cpp",
"exllamav2/exllamav2_ext/ext_tp.cpp",
"exllamav2/exllamav2_ext/cuda/graph.cu",
"exllamav2/exllamav2_ext/cuda/h_add.cu",
"exllamav2/exllamav2_ext/cuda/h_gemm.cu",
"exllamav2/exllamav2_ext/cuda/lora.cu",
"exllamav2/exllamav2_ext/cuda/pack_tensor.cu",
"exllamav2/exllamav2_ext/cuda/quantize.cu",
"exllamav2/exllamav2_ext/cuda/q_matrix.cu",
"exllamav2/exllamav2_ext/cuda/q_attn.cu",
"exllamav2/exllamav2_ext/cuda/q_mlp.cu",
"exllamav2/exllamav2_ext/cuda/q_gemm.cu",
"exllamav2/exllamav2_ext/cuda/rms_norm.cu",
"exllamav2/exllamav2_ext/cuda/head_norm.cu",
"exllamav2/exllamav2_ext/cuda/layer_norm.cu",
"exllamav2/exllamav2_ext/cuda/rope.cu",
"exllamav2/exllamav2_ext/cuda/cache.cu",
"exllamav2/exllamav2_ext/cuda/util.cu",
"exllamav2/exllamav2_ext/cuda/softcap.cu",
"exllamav2/exllamav2_ext/cuda/tp.cu",
"exllamav2/exllamav2_ext/cuda/comp_units/kernel_select.cu",
"exllamav2/exllamav2_ext/cuda/comp_units/unit_gptq_1.cu",
"exllamav2/exllamav2_ext/cuda/comp_units/unit_gptq_2.cu",
"exllamav2/exllamav2_ext/cuda/comp_units/unit_gptq_3.cu",
"exllamav2/exllamav2_ext/cuda/comp_units/unit_exl2_1a.cu",
"exllamav2/exllamav2_ext/cuda/comp_units/unit_exl2_1b.cu",
"exllamav2/exllamav2_ext/cuda/comp_units/unit_exl2_2a.cu",
"exllamav2/exllamav2_ext/cuda/comp_units/unit_exl2_2b.cu",
"exllamav2/exllamav2_ext/cuda/comp_units/unit_exl2_3a.cu",
"exllamav2/exllamav2_ext/cuda/comp_units/unit_exl2_3b.cu",
"exllamav2/exllamav2_ext/cpp/quantize_func.cpp",
"exllamav2/exllamav2_ext/cpp/profiling.cpp",
"exllamav2/exllamav2_ext/cpp/generator.cpp",
"exllamav2/exllamav2_ext/cpp/sampling.cpp",
"exllamav2/exllamav2_ext/cpp/sampling_avx2.cpp",
],
extra_compile_args=extra_compile_args,
libraries=["cublas"] if windows else [],
)
],
"cmdclass": {"build_ext": cpp_extension.BuildExtension},
}
if precompile and torch
else {}
)
version_py = {}
with open("exllamav2/version.py", encoding = "utf8") as fp:
with open("exllamav2/version.py", encoding="utf8") as fp:
exec(fp.read(), version_py)
version = version_py["__version__"]
print("Version:", version)
@@ -94,9 +107,9 @@ print("Version:", version)
# version = "0.0.5"
setup(
name = "exllamav2",
version = version,
packages = [
name="exllamav2",
version=version,
packages=[
"exllamav2",
"exllamav2.generator",
# "exllamav2.generator.filters",
@@ -106,10 +119,10 @@ setup(
# "exllamav2.exllamav2_ext.cuda",
# "exllamav2.exllamav2_ext.cuda.quant",
],
url = "https://github.com/turboderp/exllamav2",
license = "MIT",
author = "turboderp",
install_requires = [
url="https://github.com/turboderp/exllamav2",
license="MIT",
author="turboderp",
install_requires=[
"pandas",
"ninja",
"fastparquet",
@@ -120,9 +133,12 @@ setup(
"websockets",
"regex",
"numpy",
"rich"
"rich",
],
include_package_data = True,
verbose = verbose,
include_package_data=True,
package_data={
"": ["py.typed"],
},
verbose=verbose,
**setup_kwargs,
)