mirror of
https://github.com/turboderp-org/exllamav3.git
synced 2026-04-20 14:29:51 +00:00
Cleanup unused imports
This commit is contained in:
@@ -3,7 +3,7 @@ import os, sys
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
from exllamav3 import model_init, Generator, Job, ComboSampler
|
||||
from exllamav3.util.progress import ProgressBar
|
||||
import argparse, contextlib, subprocess
|
||||
import argparse, subprocess
|
||||
from human_eval.data import write_jsonl, read_problems
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
@@ -1,14 +1,12 @@
|
||||
from __future__ import annotations
|
||||
import sys, os
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
from exllamav3 import model_init
|
||||
from exllamav3 import model_init, Generator, Job
|
||||
import argparse, contextlib
|
||||
import argparse
|
||||
import torch
|
||||
import util
|
||||
import random, math
|
||||
from datasets import load_dataset
|
||||
from exllamav3.util.file import disk_lru_cache, disk_lru_cache_clear
|
||||
from exllamav3.util.file import disk_lru_cache
|
||||
from exllamav3.util.progress import ProgressBar
|
||||
|
||||
@disk_lru_cache("get_dataset_mmlu")
|
||||
|
||||
@@ -2,10 +2,9 @@ import sys, os
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
import argparse
|
||||
from exllamav3.util.file import disk_lru_cache, disk_lru_cache_clear
|
||||
from exllamav3.util.file import disk_lru_cache
|
||||
from exllamav3.util.progress import ProgressBar
|
||||
from exllamav3.util.memory import free_mem
|
||||
from exllamav3 import Config, Model, Cache, Tokenizer, model_init
|
||||
from exllamav3 import model_init
|
||||
from datasets import load_dataset
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
|
||||
@@ -5,8 +5,8 @@ import argparse
|
||||
from exllamav3.util.file import disk_lru_cache, disk_lru_cache_clear
|
||||
from exllamav3.util.progress import ProgressBar
|
||||
from exllamav3.util.memory import free_mem
|
||||
from exllamav3 import Config, Model, Cache, Tokenizer, model_init
|
||||
from transformers import AutoTokenizer, AutoModelForCausalLM
|
||||
from exllamav3 import Config, Tokenizer, model_init
|
||||
from transformers import AutoModelForCausalLM
|
||||
from datasets import load_dataset
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
|
||||
@@ -2,20 +2,17 @@ import sys, os
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
import argparse
|
||||
from exllamav3.util.file import disk_lru_cache, disk_lru_cache_clear
|
||||
from exllamav3.util.file import disk_lru_cache
|
||||
from exllamav3.util.progress import ProgressBar
|
||||
from exllamav3.util.memory import free_mem
|
||||
from exllamav3 import Config, Model, Cache, Tokenizer, model_init
|
||||
from exllamav3 import Config, Model, Tokenizer
|
||||
from exllamav3.ext import exllamav3_ext as ext
|
||||
from datasets import load_dataset
|
||||
from exllamav3.modules import Linear
|
||||
from exllamav3.modules.quant import LinearFP16, LinearEXL3
|
||||
from exllamav3.modules.quant.exl3_lib.quantize import regularize
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
import math
|
||||
import numpy as np
|
||||
import termplotlib as tpl
|
||||
|
||||
# ANSI codes
|
||||
ESC = "\u001b"
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import sys, os
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
from exllamav3 import Model, Config, Cache, Tokenizer, AsyncGenerator, AsyncJob, Sampler
|
||||
from exllamav3 import Model, Config, Cache, Tokenizer, AsyncGenerator, AsyncJob
|
||||
import asyncio
|
||||
|
||||
"""
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
import sys, os
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
from exllamav3 import Model, Config, Cache, Tokenizer, Generator, Job, GreedySampler
|
||||
from exllamav3.util import Timer
|
||||
from common import format_prompt, get_stop_conditions
|
||||
|
||||
"""
|
||||
|
||||
@@ -3,8 +3,8 @@ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
import argparse
|
||||
from transformers import AutoTokenizer
|
||||
from exllamav3.util.progress import ProgressBar
|
||||
from exllamav3.util.file import disk_lru_cache, disk_lru_cache_clear
|
||||
from exllamav3 import Config, Model, Cache, Tokenizer, model_init, Generator, Job
|
||||
from exllamav3.util.file import disk_lru_cache
|
||||
from exllamav3 import model_init, Generator, Job
|
||||
from datasets import load_dataset
|
||||
import torch
|
||||
import time
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
import sys, shutil
|
||||
|
||||
from rich.prompt import Prompt
|
||||
from rich.live import Live
|
||||
from rich.markdown import Markdown
|
||||
from rich.console import Console
|
||||
from prompt_toolkit import prompt as ptk_prompt
|
||||
|
||||
@@ -1,8 +1,5 @@
|
||||
import re
|
||||
import sys
|
||||
import pyperclip
|
||||
import time
|
||||
import os
|
||||
|
||||
def copy_last_codeblock(text: str, num) -> str | None:
|
||||
pattern = re.compile(r"```[^\n`]*\n(.*?)```", re.DOTALL)
|
||||
|
||||
@@ -6,7 +6,6 @@ from formatron.formatter import FormatterBuilder
|
||||
from pydantic import conlist
|
||||
from typing import Literal, Optional
|
||||
from formatron.extractor import NonterminalExtractor
|
||||
import json
|
||||
|
||||
def get_superhero_filter(tokenizer) -> list[Filter]:
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import sys, os
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from exllamav3 import Config, Model, Cache, Tokenizer, DefaultSampler, GreedySampler
|
||||
from exllamav3 import Config, Model, Cache, Tokenizer, GreedySampler
|
||||
from exllamav3.util import Timer
|
||||
from common import format_prompt, get_stop_conditions
|
||||
import torch
|
||||
|
||||
@@ -7,7 +7,7 @@ import torch.nn.functional as F
|
||||
from ..model.config import Config
|
||||
from ..model.model import Model
|
||||
from ..util.rope import RopeStyle
|
||||
from ..util.file import read_dict, no_value, no_default
|
||||
from ..util.file import read_dict, no_default
|
||||
from ..util.vision import convert_to_rgb, normalize_image
|
||||
from ..modules import (
|
||||
Module,
|
||||
|
||||
@@ -3,15 +3,14 @@ from typing_extensions import override
|
||||
import numpy as np
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
from ..model.config import Config, no_default
|
||||
from ..model.config import Config
|
||||
from ..model.model import Model
|
||||
from ..util.rope import RopeStyle, position_embedding_grid_2d, RopeSettings
|
||||
from ..util.file import read_dict, no_value, no_default
|
||||
from ..util.file import read_dict, no_default
|
||||
from ..util.vision import convert_to_rgb, normalize_image, smart_resize
|
||||
from ..modules import (
|
||||
Module,
|
||||
RMSNorm,
|
||||
Embedding,
|
||||
TransformerBlock,
|
||||
Attention,
|
||||
GatedMLP,
|
||||
@@ -19,9 +18,7 @@ from ..modules import (
|
||||
Conv,
|
||||
LayerNorm,
|
||||
Glm4VPosEmbedding,
|
||||
MLP
|
||||
)
|
||||
from ..modules.attn import prepare_for_attn
|
||||
from .glm4 import Glm4Model
|
||||
from types import SimpleNamespace
|
||||
from ..tokenizer import Tokenizer, MMEmbedding
|
||||
|
||||
@@ -1,31 +1,7 @@
|
||||
from __future__ import annotations
|
||||
from typing_extensions import override
|
||||
import numpy as np
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
from ..model.config import Config, no_default
|
||||
from ..model.model import Model
|
||||
from ..util.rope import RopeStyle, position_embedding_grid_2d, RopeSettings
|
||||
from ..util.file import read_dict, no_value, no_default
|
||||
from ..util.vision import convert_to_rgb, normalize_image, smart_resize
|
||||
from ..modules import (
|
||||
Module,
|
||||
RMSNorm,
|
||||
Embedding,
|
||||
TransformerBlock,
|
||||
Attention,
|
||||
GatedMLP,
|
||||
Linear,
|
||||
Conv,
|
||||
LayerNorm,
|
||||
Glm4VPosEmbedding,
|
||||
MLP
|
||||
)
|
||||
from ..modules.attn import prepare_for_attn
|
||||
from ..util.rope import RopeStyle
|
||||
from .glm4_moe import Glm4MoeModel
|
||||
from types import SimpleNamespace
|
||||
from ..tokenizer import Tokenizer, MMEmbedding
|
||||
from PIL import Image
|
||||
import os, json
|
||||
from .glm4v import read_glm4v_vision_config, read_glm4v_pp_config, Glm4VVisionModel
|
||||
|
||||
|
||||
@@ -7,7 +7,7 @@ import torch.nn.functional as F
|
||||
from ..model.config import Config
|
||||
from ..model.model import Model
|
||||
from ..util.rope import RopeSettings, RopeStyle
|
||||
from ..util.file import read_dict, no_value, no_default
|
||||
from ..util.file import read_dict, no_default
|
||||
from ..util.vision import size_to_longest_edge_and_patch_size, convert_to_rgb, normalize_image
|
||||
from ..util.tensor import to2
|
||||
from ..modules import (
|
||||
@@ -20,7 +20,6 @@ from ..modules import (
|
||||
Linear,
|
||||
Conv,
|
||||
MLP,
|
||||
LayerNorm,
|
||||
)
|
||||
from ..modules.attn import prepare_for_attn
|
||||
from ..tokenizer import Tokenizer, MMEmbedding
|
||||
|
||||
@@ -2,21 +2,16 @@ from __future__ import annotations
|
||||
from typing_extensions import override
|
||||
import numpy as np
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
from ..model.config import Config, no_default
|
||||
from ..model.config import Config
|
||||
from ..model.model import Model
|
||||
from ..util.rope import RopeStyle, position_embedding_grid_2d, RopeSettings, RoPE
|
||||
from ..util.rope import RopeStyle, position_embedding_grid_2d, RopeSettings
|
||||
from ..util.vision import convert_to_rgb, normalize_image, smart_resize
|
||||
from ..modules.attn import prepare_for_attn
|
||||
from ..util.file import read_dict, no_value, no_default
|
||||
from ..util.file import read_dict, no_default
|
||||
from ..modules import (
|
||||
Module,
|
||||
RMSNorm,
|
||||
Embedding,
|
||||
TransformerBlock,
|
||||
Attention,
|
||||
GatedMLP,
|
||||
Linear,
|
||||
Conv,
|
||||
MLP,
|
||||
@@ -24,7 +19,6 @@ from ..modules import (
|
||||
Qwen3VLPosEmbedding
|
||||
)
|
||||
from .qwen3 import Qwen3Model
|
||||
from ..modules.attn import prepare_for_attn
|
||||
from ..tokenizer import Tokenizer, MMEmbedding
|
||||
from types import SimpleNamespace
|
||||
from PIL import Image
|
||||
|
||||
@@ -1,33 +1,7 @@
|
||||
from __future__ import annotations
|
||||
from typing_extensions import override
|
||||
import numpy as np
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
from ..model.config import Config, no_default
|
||||
from ..model.model import Model
|
||||
from ..util.rope import RopeStyle, position_embedding_grid_2d, RopeSettings, RoPE
|
||||
from ..util.vision import convert_to_rgb, normalize_image, smart_resize
|
||||
from ..modules.attn import prepare_for_attn
|
||||
from ..util.file import read_dict, no_value, no_default
|
||||
from ..modules import (
|
||||
Module,
|
||||
RMSNorm,
|
||||
Embedding,
|
||||
TransformerBlock,
|
||||
Attention,
|
||||
GatedMLP,
|
||||
Linear,
|
||||
Conv,
|
||||
MLP,
|
||||
LayerNorm,
|
||||
Qwen3VLPosEmbedding
|
||||
)
|
||||
from ..util.rope import RopeStyle
|
||||
from .qwen3_moe import Qwen3MoeModel
|
||||
from ..modules.attn import prepare_for_attn
|
||||
from ..tokenizer import Tokenizer, MMEmbedding
|
||||
from types import SimpleNamespace
|
||||
from PIL import Image
|
||||
import os, json
|
||||
from .qwen3_vl import read_qwen3_vl_vision_config, read_qwen3_vl_pp_config, Qwen3VLVisionModel
|
||||
|
||||
|
||||
2
exllamav3/cache/cache.py
vendored
2
exllamav3/cache/cache.py
vendored
@@ -2,8 +2,6 @@ from __future__ import annotations
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Type
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
from torch import nn
|
||||
from ..model import Model, Config
|
||||
from typing import TYPE_CHECKING
|
||||
if TYPE_CHECKING:
|
||||
|
||||
2
exllamav3/cache/fp16.py
vendored
2
exllamav3/cache/fp16.py
vendored
@@ -1,8 +1,6 @@
|
||||
from __future__ import annotations
|
||||
from typing_extensions import override
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
from torch import nn
|
||||
from ..constants import PAGE_SIZE
|
||||
from ..model import Model, Config
|
||||
from .cache import CacheLayer
|
||||
|
||||
4
exllamav3/cache/quant.py
vendored
4
exllamav3/cache/quant.py
vendored
@@ -1,10 +1,8 @@
|
||||
from __future__ import annotations
|
||||
from typing_extensions import override
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
from torch import nn
|
||||
from ..constants import PAGE_SIZE
|
||||
from ..model import Model, Config
|
||||
from ..model import Config
|
||||
from .cache import CacheLayer
|
||||
from typing import TYPE_CHECKING
|
||||
from exllamav3.ext import exllamav3_ext as ext
|
||||
|
||||
2
exllamav3/cache/recurrent.py
vendored
2
exllamav3/cache/recurrent.py
vendored
@@ -1,7 +1,5 @@
|
||||
from collections import OrderedDict
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
|
||||
class CacheableState(ABC):
|
||||
def __init__(self):
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
from __future__ import annotations
|
||||
import math
|
||||
import bisect
|
||||
from functools import lru_cache
|
||||
from typing import TYPE_CHECKING
|
||||
if TYPE_CHECKING:
|
||||
from ..modules.linear import Linear
|
||||
|
||||
@@ -1,14 +1,9 @@
|
||||
import argparse
|
||||
import torch
|
||||
from .. import Config, Model, Tokenizer
|
||||
from ..util.progress import ProgressBar
|
||||
from .calibration_data import get_default_calibration
|
||||
from ..modules import Linear
|
||||
import json
|
||||
import torch.nn.functional as F
|
||||
import math
|
||||
from .compile import compile_model
|
||||
from ..loader.safetensors import SafetensorsCollection, VariantSafetensorsCollection
|
||||
from ..loader.safetensors import VariantSafetensorsCollection
|
||||
|
||||
col_default = "\u001b[0m"
|
||||
col_red = "\u001b[31;1m"
|
||||
|
||||
@@ -1,10 +1,8 @@
|
||||
from __future__ import annotations
|
||||
import torch
|
||||
from torch.utils.cpp_extension import load
|
||||
import os, glob
|
||||
import os
|
||||
import sys
|
||||
import platform
|
||||
import threading
|
||||
from .util.arch_list import maybe_set_arch_list_env
|
||||
|
||||
extension_name = "exllamav3_ext"
|
||||
|
||||
@@ -10,12 +10,10 @@ from .pagetable import PageTable
|
||||
from .job import Job
|
||||
from .filter import Filter
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from .sampler import Sampler, GumbelSampler
|
||||
from .sampler import Sampler
|
||||
from .visualizer import CacheVisualizer
|
||||
import time
|
||||
import threading
|
||||
import numpy as np
|
||||
from ..util import profile_opt
|
||||
from ..tokenizer import MMEmbedding
|
||||
from ..util import profile_opt
|
||||
|
||||
|
||||
@@ -1,21 +1,17 @@
|
||||
from __future__ import annotations
|
||||
import torch
|
||||
from dataclasses import dataclass
|
||||
from typing import TYPE_CHECKING
|
||||
from ..cache.cache import Cache
|
||||
if TYPE_CHECKING:
|
||||
from .generator import Generator
|
||||
from ..constants import PAGE_SIZE
|
||||
import numpy as np
|
||||
from .pagetable import CachePage, Sequence, tensor_hash_checksum, random_hash
|
||||
from .pagetable import Sequence, tensor_hash_checksum, random_hash
|
||||
from .filter import Filter
|
||||
import random
|
||||
from collections import deque
|
||||
import time
|
||||
from ..ext import exllamav3_ext as ext
|
||||
from .sampler import Sampler, DefaultSampler
|
||||
from ..util.tensor import SeqTensor
|
||||
from ..util import profile_opt
|
||||
from ..tokenizer import MMEmbedding
|
||||
from functools import lru_cache
|
||||
from ..util import profile_opt
|
||||
|
||||
@@ -6,11 +6,9 @@ import torch
|
||||
import os, glob
|
||||
import numpy as np
|
||||
import json
|
||||
import mmap
|
||||
from ..util import Timer, cuda_sync_active
|
||||
from ..ext import exllamav3_ext as ext
|
||||
from functools import cached_property
|
||||
from fnmatch import fnmatch
|
||||
import time
|
||||
|
||||
MAX_DEFERRED_LOAD_CHUNK = 2*1024**2
|
||||
|
||||
@@ -1,8 +1,5 @@
|
||||
from __future__ import annotations
|
||||
from abc import ABC, abstractproperty, abstractmethod
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
from torch import nn
|
||||
from abc import ABC
|
||||
import os, json
|
||||
from ..util.rope import RopeSettings, RopeStyle
|
||||
from ..loader import SafetensorsCollection
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
from __future__ import annotations
|
||||
from functools import lru_cache
|
||||
from typing import Callable
|
||||
import torch
|
||||
from ..util.memory import (
|
||||
|
||||
@@ -1,8 +1,6 @@
|
||||
from __future__ import annotations
|
||||
import torch
|
||||
import multiprocessing
|
||||
from multiprocessing import Process, Pipe
|
||||
import torch.distributed as dist
|
||||
from ..util import find_free_port
|
||||
from .model_tp_alloc import TPAllocator
|
||||
import os
|
||||
@@ -11,9 +9,7 @@ from ..util.memory import touch_device_measure_vram
|
||||
from ..util.progress import ProgressBar
|
||||
from .config import Config
|
||||
from ..util.misc import Cleanupper
|
||||
from .model_tp_shared import SMProducer
|
||||
from .model_tp_fn import *
|
||||
from .model_tp_backend import TPBackend, TPBackendNCCL, TPBackendNative
|
||||
import uuid
|
||||
from ..util import log_tp, global_t0
|
||||
|
||||
|
||||
@@ -4,7 +4,7 @@ import time
|
||||
import numpy as np
|
||||
from .model_tp_cuda import cuda_host_register, cuda_host_unregister, CUDA_HOST_REGISTER_PORTABLE
|
||||
from ..ext import exllamav3_ext as ext
|
||||
from multiprocessing import shared_memory, Barrier
|
||||
from multiprocessing import shared_memory
|
||||
from ..util import log_tp
|
||||
|
||||
GLOBALS_SIZE = 128*1024
|
||||
|
||||
@@ -2,7 +2,6 @@ from . import Model, Config, Cache, Tokenizer
|
||||
from .loader import SafetensorsCollection, VariantSafetensorsCollection
|
||||
from .cache import CacheLayer_fp16, CacheLayer_quant
|
||||
from argparse import ArgumentParser
|
||||
import torch
|
||||
import yaml
|
||||
|
||||
def add_args(
|
||||
|
||||
@@ -7,13 +7,11 @@ from ..util.rope import RopeSettings, RoPE
|
||||
from ..util.tensor import get_for_device, to2
|
||||
from . import Module, Linear, RMSNorm, LayerNorm
|
||||
from ..constants import PAGE_SIZE
|
||||
from ..cache import Cache
|
||||
from flash_attn import flash_attn_func, flash_attn_with_kvcache
|
||||
from ..util import profile_opt
|
||||
from .multilinear import MultiLinear
|
||||
from ..ext import exllamav3_ext as ext
|
||||
from ..model.model_tp_alloc import TPAllocation
|
||||
import torch.distributed as dist
|
||||
from ..util import profile_opt
|
||||
|
||||
"""
|
||||
SDPA:
|
||||
|
||||
@@ -2,19 +2,15 @@ from __future__ import annotations
|
||||
from typing_extensions import override
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
from torch import nn
|
||||
from ..model.config import Config
|
||||
from ..util.tensor import to2
|
||||
from . import Module, Linear
|
||||
from .multilinear import MultiLinear
|
||||
from ..ext import exllamav3_ext as ext
|
||||
from ..constants import MAX_MLP_INTERMEDIATE
|
||||
from ..util import first_not_none
|
||||
from ..util import profile_opt
|
||||
from dataclasses import dataclass
|
||||
from .mlp import MLP, GatedMLP
|
||||
from ..model.model_tp_alloc import TPAllocation
|
||||
import torch.distributed as dist
|
||||
from ..util import profile_opt
|
||||
|
||||
|
||||
@dataclass
|
||||
|
||||
@@ -2,7 +2,6 @@ from __future__ import annotations
|
||||
from typing_extensions import override
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
from torch import nn
|
||||
from ..model.config import Config
|
||||
from . import Module
|
||||
from ..ext import exllamav3_ext as ext
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
from __future__ import annotations
|
||||
from typing_extensions import override
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
from torch import nn
|
||||
from ..model.config import Config
|
||||
from ..util.tensor import to2
|
||||
|
||||
@@ -5,12 +5,12 @@ import torch.nn.functional as F
|
||||
from ..model.config import Config
|
||||
from ..util.tensor import get_for_device, to2
|
||||
from . import Module, Linear
|
||||
from ..util import profile_opt
|
||||
from ..ext import exllamav3_ext as ext
|
||||
from ..model.model_tp_alloc import TPAllocation
|
||||
from .gated_rmsnorm import GatedRMSNorm
|
||||
from ..cache import CacheableState
|
||||
from ..util.tensor import g_tensor_cache
|
||||
from ..util import profile_opt
|
||||
|
||||
"""
|
||||
causal_conv1d wrappers and fallback functions
|
||||
|
||||
@@ -2,7 +2,6 @@ from __future__ import annotations
|
||||
from typing_extensions import override
|
||||
from . import Module
|
||||
import torch
|
||||
import torch.distributed as dist
|
||||
|
||||
class OutputGather(Module):
|
||||
def __init__(
|
||||
|
||||
@@ -2,11 +2,8 @@ from __future__ import annotations
|
||||
from typing_extensions import override
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
from torch import nn
|
||||
from ..model.config import Config
|
||||
from . import Module
|
||||
from ..ext import exllamav3_ext as ext
|
||||
from ..model.model_tp_alloc import TPAllocation
|
||||
from ..util.tensor import get_for_device, to2
|
||||
|
||||
class Glm4VPosEmbedding(Module):
|
||||
|
||||
@@ -5,7 +5,6 @@ import torch.nn.functional as F
|
||||
from torch import nn
|
||||
from ..model.config import Config
|
||||
from . import Module
|
||||
from ..ext import exllamav3_ext as ext
|
||||
from ..model.model_tp_alloc import TPAllocation
|
||||
|
||||
class LayerNorm(Module):
|
||||
|
||||
@@ -2,16 +2,13 @@ from __future__ import annotations
|
||||
from functools import cached_property
|
||||
from typing_extensions import override
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
import numpy as np
|
||||
from torch import nn
|
||||
from ..model.config import Config
|
||||
from . import Module
|
||||
from .quant import LinearFP16, LinearFP16_torch, LinearEXL3
|
||||
from .quant import LinearFP16, LinearEXL3
|
||||
from .quant.exl3_lib import quantize_exl3
|
||||
from ..ext import exllamav3_ext as ext
|
||||
from ..conversion.allocation import allocate_linear
|
||||
from ..util.memory import free_mem
|
||||
from ..model.model_tp_alloc import TPAllocation
|
||||
|
||||
|
||||
|
||||
@@ -9,7 +9,6 @@ from . import Module, Linear
|
||||
from ..ext import exllamav3_ext as ext
|
||||
from ..constants import MAX_MLP_INTERMEDIATE
|
||||
from ..model.model_tp_alloc import TPAllocation
|
||||
import torch.distributed as dist
|
||||
from .multilinear import MultiLinear
|
||||
from ..util.tensor import g_tensor_cache
|
||||
|
||||
|
||||
@@ -2,8 +2,6 @@ from __future__ import annotations
|
||||
from abc import ABC, abstractmethod
|
||||
import torch
|
||||
import os
|
||||
import torch.nn.functional as F
|
||||
from torch import nn
|
||||
from typing import TYPE_CHECKING
|
||||
if TYPE_CHECKING:
|
||||
from ..model.config import Config
|
||||
|
||||
@@ -1,11 +1,9 @@
|
||||
from __future__ import annotations
|
||||
from typing_extensions import override
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
from torch import nn
|
||||
from ..model.config import Config
|
||||
from . import Module
|
||||
from ..ext import exllamav3_ext as ext
|
||||
from ..util.tensor import to2
|
||||
from ..model.model_tp_alloc import TPAllocation
|
||||
|
||||
|
||||
@@ -3,8 +3,8 @@ import torch
|
||||
from ...model.config import Config
|
||||
from .exl3_lib.quantize import preapply_had_l, preapply_had_r, had_k, had_n
|
||||
from ...ext import exllamav3_ext as ext
|
||||
from ...util import profile_opt
|
||||
from ...util.tensor import g_tensor_cache
|
||||
from ...util import profile_opt
|
||||
|
||||
class LinearEXL3:
|
||||
|
||||
|
||||
@@ -1,7 +1,5 @@
|
||||
from __future__ import annotations
|
||||
from typing_extensions import override
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
from torch import nn
|
||||
from ...ext import exllamav3_ext as ext
|
||||
from ...util.tensor import to2
|
||||
|
||||
@@ -1,13 +1,10 @@
|
||||
from __future__ import annotations
|
||||
from typing_extensions import override
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
from torch import nn
|
||||
from ..model.config import Config
|
||||
from . import Module
|
||||
from ..ext import exllamav3_ext as ext
|
||||
from ..util.tensor import to2
|
||||
from ..model.model_tp_alloc import TPAllocation
|
||||
|
||||
|
||||
class Qwen3VLPosEmbedding(Module):
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
from __future__ import annotations
|
||||
from typing_extensions import override
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
from torch import nn
|
||||
from ..model.config import Config
|
||||
from . import Module
|
||||
|
||||
@@ -1,14 +1,11 @@
|
||||
from __future__ import annotations
|
||||
from typing_extensions import override
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
from torch import nn
|
||||
from ..util.tensor import to2
|
||||
from ..model.config import Config
|
||||
from . import Module, RMSNorm, LayerNorm, Attention, GatedDeltaNet, GatedMLP, MLP, BlockSparseMLP
|
||||
from ..conversion.allocation import allocate_transformer
|
||||
from ..util import profile_opt
|
||||
import torch.distributed as dist
|
||||
|
||||
class TransformerBlock(Module):
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
from __future__ import annotations
|
||||
import torch
|
||||
import os, json, re
|
||||
import os, re
|
||||
from tokenizers import Tokenizer as HFTokenizer, models
|
||||
from ..util import synchronized
|
||||
from ..util.file import maybe_read_json
|
||||
|
||||
@@ -2,8 +2,8 @@ import os
|
||||
import shelve
|
||||
import sys
|
||||
import json
|
||||
from typing import Any, Dict, List, TypeVar, Union, cast
|
||||
from functools import lru_cache, wraps
|
||||
from typing import Any, TypeVar, cast
|
||||
from functools import wraps
|
||||
|
||||
|
||||
def disk_lru_cache_name(filename):
|
||||
|
||||
Reference in New Issue
Block a user