Merge mainline llama.cpp (#3)

* Merging mainline - WIP * Merging mainline - WIP AVX2 and CUDA appear to work. CUDA performance seems slightly (~1-2%) lower as it is so often the case with llama.cpp/ggml after some "improvements" have been made. * Merging mainline - fix Metal * Remove check --------- Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
2026-04-28 18:32:04 +00:00 · 2024-07-27 07:55:01 +02:00
parent 0684c3e9c7
commit 154e0d75fc
612 changed files with 50817 additions and 165936 deletions
--- a/scripts/compare-llama-bench.py
+++ b/scripts/compare-llama-bench.py
@@ -123,13 +123,13 @@ builds = cursor.execute("SELECT DISTINCT build_commit FROM test;").fetchall()

 try:
    repo = git.Repo(".", search_parent_directories=True)
-except git.exc.InvalidGitRepositoryError:
+except git.InvalidGitRepositoryError:
    repo = None


-def find_parent_in_data(commit):
+def find_parent_in_data(commit: git.Commit):
    """Helper function to find the most recent parent measured in number of commits for which there is data."""
-    heap = [(0, commit)]
+    heap: list[tuple[int, git.Commit]] = [(0, commit)]
    seen_hexsha8 = set()
    while heap:
        depth, current_commit = heapq.heappop(heap)
@@ -144,7 +144,7 @@ def find_parent_in_data(commit):
    return None


-def get_all_parent_hexsha8s(commit):
+def get_all_parent_hexsha8s(commit: git.Commit):
    """Helper function to recursively get hexsha8 values for all parents of a commit."""
    unvisited = [commit]
    visited   = []