mirror of
https://github.com/turboderp-org/exllamav3.git
synced 2026-03-15 00:07:24 +00:00
122 lines
3.1 KiB
Python
122 lines
3.1 KiB
Python
from setuptools import setup
|
|
import importlib.util
|
|
import os
|
|
|
|
if torch := importlib.util.find_spec("torch") is not None:
|
|
from torch.utils import cpp_extension
|
|
from torch import version as torch_version
|
|
|
|
extension_name = "exllamav3_ext"
|
|
precompile = "EXLLAMA_NOCOMPILE" not in os.environ
|
|
verbose = "EXLLAMA_VERBOSE" in os.environ
|
|
ext_debug = "EXLLAMA_EXT_DEBUG" in os.environ
|
|
|
|
if precompile and not torch:
|
|
print("Cannot precompile unless torch is installed.")
|
|
print("To explicitly JIT install run EXLLAMA_NOCOMPILE= pip install <xyz>")
|
|
|
|
windows = os.name == "nt"
|
|
|
|
extra_cflags = []
|
|
extra_cuda_cflags = ["-lineinfo", "-O3"]
|
|
|
|
if windows:
|
|
extra_cflags += ["/Ox"]
|
|
extra_cuda_cflags += []
|
|
if ext_debug:
|
|
extra_cflags += ["/Zi"]
|
|
extra_cuda_cflags += []
|
|
else:
|
|
extra_cflags += ["-Ofast"]
|
|
extra_cuda_cflags += []
|
|
if ext_debug:
|
|
extra_cflags += ["-ftime-report", "-DTORCH_USE_CUDA_DSA"]
|
|
extra_cuda_cflags += []
|
|
|
|
if torch and torch_version.hip:
|
|
extra_cuda_cflags += ["-DHIPBLAS_USE_HIP_HALF"]
|
|
|
|
extra_compile_args = {
|
|
"cxx": extra_cflags,
|
|
"nvcc": extra_cuda_cflags,
|
|
}
|
|
|
|
library_dir = "exllamav3"
|
|
sources_dir = os.path.join(library_dir, extension_name)
|
|
sources = [
|
|
os.path.relpath(os.path.join(root, file), start=os.path.dirname(__file__))
|
|
for root, _, files in os.walk(sources_dir)
|
|
for file in files
|
|
if file.endswith(('.c', '.cpp', '.cu'))
|
|
]
|
|
|
|
print (sources)
|
|
|
|
setup_kwargs = (
|
|
{
|
|
"ext_modules": [
|
|
cpp_extension.CUDAExtension(
|
|
extension_name,
|
|
sources,
|
|
extra_compile_args=extra_compile_args,
|
|
libraries=["cublas"] if windows else [],
|
|
)
|
|
],
|
|
"cmdclass": {"build_ext": cpp_extension.BuildExtension},
|
|
}
|
|
if precompile and torch
|
|
else {}
|
|
)
|
|
|
|
version_py = {}
|
|
with open("exllamav3/version.py", encoding="utf8") as fp:
|
|
exec(fp.read(), version_py)
|
|
version = version_py["__version__"]
|
|
print("Version:", version)
|
|
|
|
setup(
|
|
name="exllamav3",
|
|
version=version,
|
|
packages=[
|
|
"exllamav3",
|
|
"exllamav3.generator",
|
|
"exllamav3.generator.sampler",
|
|
"exllamav3.generator.filter",
|
|
"exllamav3.conversion",
|
|
"exllamav3.integration",
|
|
"exllamav3.architecture",
|
|
"exllamav3.model",
|
|
"exllamav3.modules",
|
|
"exllamav3.modules.quant",
|
|
"exllamav3.modules.quant.exl3_lib",
|
|
"exllamav3.tokenizer",
|
|
"exllamav3.cache",
|
|
"exllamav3.loader",
|
|
"exllamav3.util",
|
|
],
|
|
url="https://github.com/turboderp/exllamav3",
|
|
license="MIT",
|
|
author="turboderp",
|
|
install_requires=[
|
|
"torch>=2.6.0",
|
|
"flash_attn>=2.7.4.post1",
|
|
"tokenizers>=0.21.1",
|
|
"numpy>=2.1.0",
|
|
"rich",
|
|
"typing_extensions",
|
|
"ninja",
|
|
"safetensors>=0.3.2",
|
|
"pyyaml",
|
|
"marisa_trie",
|
|
"kbnf>=0.4.2",
|
|
"formatron>=0.5.0",
|
|
"pydantic==2.11.0"
|
|
],
|
|
include_package_data=True,
|
|
package_data = {
|
|
"": ["py.typed"],
|
|
},
|
|
verbose=verbose,
|
|
**setup_kwargs,
|
|
)
|