From 94ec8884c3301c6fd6c41d9e71fa1da047765c88 Mon Sep 17 00:00:00 2001
From: DominikDoom <rehdominik@gmail.com>
Date: Tue, 26 Sep 2023 10:27:50 +0200
Subject: [PATCH 1/7] Fix SD.Next error caused by embeddings without filenames
 This only ignores these embeddings, the root cause is a bug / behavioral
 difference in SD.Next Fixes #242

---
 scripts/tag_autocomplete_helper.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/scripts/tag_autocomplete_helper.py b/scripts/tag_autocomplete_helper.py
index 0456add..3e0c7ca 100644
--- a/scripts/tag_autocomplete_helper.py
+++ b/scripts/tag_autocomplete_helper.py
@@ -174,14 +174,14 @@ def get_embeddings(sd_model):
 
         # Add embeddings to the correct list
         if (emb_a_shape == V1_SHAPE):
-            emb_v1 = [(Path(v.filename), k, "v1") for (k,v) in emb_type_a.items()]
+            emb_v1 = [(Path(v.filename), k, "v1") for (k,v) in emb_type_a.items() if v.filename is not None]
         elif (emb_a_shape == V2_SHAPE):
-            emb_v2 = [(Path(v.filename), k, "v2") for (k,v) in emb_type_a.items()]
+            emb_v2 = [(Path(v.filename), k, "v2") for (k,v) in emb_type_a.items() if v.filename is not None]
 
         if (emb_b_shape == V1_SHAPE):
-            emb_v1 = [(Path(v.filename), k, "v1") for (k,v) in emb_type_b.items()]
+            emb_v1 = [(Path(v.filename), k, "v1") for (k,v) in emb_type_b.items() if v.filename is not None]
         elif (emb_b_shape == V2_SHAPE):
-            emb_v2 = [(Path(v.filename), k, "v2") for (k,v) in emb_type_b.items()]
+            emb_v2 = [(Path(v.filename), k, "v2") for (k,v) in emb_type_b.items() if v.filename is not None]
 
         # Get shape of current model
         #vec = sd_model.cond_stage_model.encode_embedding_init_text(",", 1)

From 998514bebb2f03d8aac9bbaf09eb5bf46fa37067 Mon Sep 17 00:00:00 2001
From: DominikDoom <rehdominik@gmail.com>
Date: Tue, 26 Sep 2023 14:14:20 +0200
Subject: [PATCH 2/7] Proper support for SDXL embeddings Now in their own
 category, other embeddings don't get mislabeled anymore if an XL model is
 loaded

---
 javascript/ext_embeddings.js       |  5 +++-
 scripts/tag_autocomplete_helper.py | 42 ++++++++++--------------------
 2 files changed, 18 insertions(+), 29 deletions(-)

diff --git a/javascript/ext_embeddings.js b/javascript/ext_embeddings.js
index 9c7bd44..820aae4 100644
--- a/javascript/ext_embeddings.js
+++ b/javascript/ext_embeddings.js
@@ -11,12 +11,15 @@ class EmbeddingParser extends BaseTagParser {
             if (searchTerm.startsWith("v1") || searchTerm.startsWith("v2")) {
                 versionString = searchTerm.slice(0, 2);
                 searchTerm = searchTerm.slice(2);
+            } else if (searchTerm.startsWith("vxl")) {
+                versionString = searchTerm.slice(0, 3);
+                searchTerm = searchTerm.slice(3);
             }
 
             let filterCondition = x => x[0].toLowerCase().includes(searchTerm) || x[0].toLowerCase().replaceAll(" ", "_").includes(searchTerm);
 
             if (versionString)
-                tempResults = embeddings.filter(x => filterCondition(x) && x[2] && x[2] === versionString); // Filter by tagword
+                tempResults = embeddings.filter(x => filterCondition(x) && x[2] && x[2].toLowerCase() === versionString.toLowerCase()); // Filter by tagword
             else
                 tempResults = embeddings.filter(x => filterCondition(x)); // Filter by tagword
         } else {
diff --git a/scripts/tag_autocomplete_helper.py b/scripts/tag_autocomplete_helper.py
index 3e0c7ca..eca09f3 100644
--- a/scripts/tag_autocomplete_helper.py
+++ b/scripts/tag_autocomplete_helper.py
@@ -156,44 +156,30 @@ def get_embeddings(sd_model):
     # Version constants
     V1_SHAPE = 768
     V2_SHAPE = 1024
+    VXL_SHAPE = 2048
     emb_v1 = []
     emb_v2 = []
+    emb_vXL = []
     results = []
 
     try:
         # Get embedding dict from sd_hijack to separate v1/v2 embeddings
-        emb_type_a = sd_hijack.model_hijack.embedding_db.word_embeddings
-        emb_type_b = sd_hijack.model_hijack.embedding_db.skipped_embeddings
-        # Get the shape of the first item in the dict
-        emb_a_shape = -1
-        emb_b_shape = -1
-        if (len(emb_type_a) > 0):
-            emb_a_shape = next(iter(emb_type_a.items()))[1].shape
-        if (len(emb_type_b) > 0):
-            emb_b_shape = next(iter(emb_type_b.items()))[1].shape
+        loaded = sd_hijack.model_hijack.embedding_db.word_embeddings
+        skipped = sd_hijack.model_hijack.embedding_db.skipped_embeddings
 
         # Add embeddings to the correct list
-        if (emb_a_shape == V1_SHAPE):
-            emb_v1 = [(Path(v.filename), k, "v1") for (k,v) in emb_type_a.items() if v.filename is not None]
-        elif (emb_a_shape == V2_SHAPE):
-            emb_v2 = [(Path(v.filename), k, "v2") for (k,v) in emb_type_a.items() if v.filename is not None]
+        for key, emb in (loaded | skipped).items():
+            if (emb.filename is None):
+                continue
 
-        if (emb_b_shape == V1_SHAPE):
-            emb_v1 = [(Path(v.filename), k, "v1") for (k,v) in emb_type_b.items() if v.filename is not None]
-        elif (emb_b_shape == V2_SHAPE):
-            emb_v2 = [(Path(v.filename), k, "v2") for (k,v) in emb_type_b.items() if v.filename is not None]
+            if emb.shape == V1_SHAPE:
+                emb_v1.append((Path(emb.filename), key, "v1"))
+            elif emb.shape == V2_SHAPE:
+                emb_v2.append((Path(emb.filename), key, "v2"))
+            elif emb.shape == VXL_SHAPE:
+                emb_vXL.append((Path(emb.filename), key, "vXL"))
 
-        # Get shape of current model
-        #vec = sd_model.cond_stage_model.encode_embedding_init_text(",", 1)
-        #model_shape = vec.shape[1]
-        # Show relevant entries at the top
-        #if (model_shape == V1_SHAPE):
-        #    results = [e + ",v1" for e in emb_v1] + [e + ",v2" for e in emb_v2]
-        #elif (model_shape == V2_SHAPE):
-        #    results = [e + ",v2" for e in emb_v2] + [e + ",v1" for e in emb_v1]
-        #else:
-        #    raise AttributeError # Fallback to old method
-        results = sort_models(emb_v1) + sort_models(emb_v2)
+        results = sort_models(emb_v1) + sort_models(emb_v2) + sort_models(emb_vXL)
     except AttributeError:
         print("tag_autocomplete_helper: Old webui version or unrecognized model shape, using fallback for embedding completion.")
         # Get a list of all embeddings in the folder

From b3e71e840d84e17a3cb54197e771ebb0f853cbbd Mon Sep 17 00:00:00 2001
From: DominikDoom <rehdominik@gmail.com>
Date: Tue, 26 Sep 2023 15:12:29 +0200
Subject: [PATCH 3/7] Safety check for missing shape

---
 scripts/tag_autocomplete_helper.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/tag_autocomplete_helper.py b/scripts/tag_autocomplete_helper.py
index eca09f3..12e2c66 100644
--- a/scripts/tag_autocomplete_helper.py
+++ b/scripts/tag_autocomplete_helper.py
@@ -169,7 +169,7 @@ def get_embeddings(sd_model):
 
         # Add embeddings to the correct list
         for key, emb in (loaded | skipped).items():
-            if (emb.filename is None):
+            if emb.filename is None or emb.shape is None:
                 continue
 
             if emb.shape == V1_SHAPE:

From 446ac14e7fa9098b1dbc0ca53f4be6ef063b38fa Mon Sep 17 00:00:00 2001
From: DominikDoom <rehdominik@gmail.com>
Date: Sun, 1 Oct 2023 23:47:02 +0200
Subject: [PATCH 4/7] Fix umi list not resetting after deleting chars behind
 "["

---
 javascript/ext_umi.js | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/javascript/ext_umi.js b/javascript/ext_umi.js
index a55f80c..ea5067a 100644
--- a/javascript/ext_umi.js
+++ b/javascript/ext_umi.js
@@ -129,7 +129,7 @@ class UmiParser extends BaseTagParser {
                 return;
             }
 
-            let umiTagword = diff[0] || '';
+            let umiTagword = tagCountChange < 0 ? '' : diff[0] || '';
             let tempResults = [];
             if (umiTagword && umiTagword.length > 0) {
                 umiTagword = umiTagword.toLowerCase().replace(/[\n\r]/g, "");
@@ -188,7 +188,7 @@ class UmiParser extends BaseTagParser {
     }
 }
 
-function updateUmiTags( tagType, sanitizedText, newPrompt, textArea) {
+function updateUmiTags(tagType, sanitizedText, newPrompt, textArea) {
     // If it was a umi wildcard, also update the umiPreviousTags
     if (tagType === ResultType.umiWildcard && originalTagword.length > 0) {
         let umiSubPrompts = [...newPrompt.matchAll(UMI_PROMPT_REGEX)];

From 5fd48f53de5b81d2bb8f10ee0fb88d9acaae2208 Mon Sep 17 00:00:00 2001
From: DominikDoom <rehdominik@gmail.com>
Date: Fri, 6 Oct 2023 14:44:03 +0200
Subject: [PATCH 5/7] Fix csv parsing for unclosed quotes Fixes #245

---
 javascript/_utils.js           | 14 +++++++-------
 javascript/ext_modelKeyword.js |  2 +-
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/javascript/_utils.js b/javascript/_utils.js
index de745f1..19de9ad 100644
--- a/javascript/_utils.js
+++ b/javascript/_utils.js
@@ -2,12 +2,12 @@
 
 // Parse the CSV file into a 2D array. Doesn't use regex, so it is very lightweight.
 function parseCSV(str) {
-    var arr = [];
-    var quote = false;  // 'true' means we're inside a quoted field
+    const arr = [];
+    let quote = false;  // 'true' means we're inside a quoted field
 
     // Iterate over each character, keep track of current row and column (of the returned array)
-    for (var row = 0, col = 0, c = 0; c < str.length; c++) {
-        var cc = str[c], nc = str[c + 1];        // Current character, next character
+    for (let row = 0, col = 0, c = 0; c < str.length; c++) {
+        let cc = str[c], nc = str[c+1];        // Current character, next character
         arr[row] = arr[row] || [];             // Create a new row if necessary
         arr[row][col] = arr[row][col] || '';   // Create a new column (start with empty string) if necessary
 
@@ -24,12 +24,12 @@ function parseCSV(str) {
 
         // If it's a newline (CRLF) and we're not in a quoted field, skip the next character
         // and move on to the next row and move to column 0 of that new row
-        if (cc == '\r' && nc == '\n' && !quote) { ++row; col = 0; ++c; continue; }
+        if (cc == '\r' && nc == '\n') { ++row; col = 0; ++c; quote = false; continue; }
 
         // If it's a newline (LF or CR) and we're not in a quoted field,
         // move on to the next row and move to column 0 of that new row
-        if (cc == '\n' && !quote) { ++row; col = 0; continue; }
-        if (cc == '\r' && !quote) { ++row; col = 0; continue; }
+        if (cc == '\n') { ++row; col = 0; quote = false; continue; }
+        if (cc == '\r') { ++row; col = 0; quote = false; continue; }
 
         // Otherwise, append the current character to the current column
         arr[row][col] += cc;
diff --git a/javascript/ext_modelKeyword.js b/javascript/ext_modelKeyword.js
index ff07910..ac88747 100644
--- a/javascript/ext_modelKeyword.js
+++ b/javascript/ext_modelKeyword.js
@@ -20,7 +20,7 @@ async function load() {
             // Add to the dict
             csv_lines.forEach(parts => {
                 const hash = parts[0];
-                const keywords = parts[1].replaceAll("| ", ", ").replaceAll("|", ", ").trim();
+                const keywords = parts[1]?.replaceAll("| ", ", ")?.replaceAll("|", ", ")?.trim();
                 const lastSepIndex = parts[2]?.lastIndexOf("/") + 1 || parts[2]?.lastIndexOf("\\") + 1 || 0;
                 const name = parts[2]?.substring(lastSepIndex).trim() || "none"
 

From 44c5450b28a8c66bbbbbc91674e7c244b10f8a7d Mon Sep 17 00:00:00 2001
From: DominikDoom <rehdominik@gmail.com>
Date: Fri, 6 Oct 2023 14:54:29 +0200
Subject: [PATCH 6/7] Fix special characters breaking wiki link urls

---
 javascript/_utils.js          | 7 +++----
 javascript/tagAutocomplete.js | 2 ++
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/javascript/_utils.js b/javascript/_utils.js
index 19de9ad..9a93104 100644
--- a/javascript/_utils.js
+++ b/javascript/_utils.js
@@ -1,6 +1,7 @@
 // Utility functions for tag autocomplete
 
 // Parse the CSV file into a 2D array. Doesn't use regex, so it is very lightweight.
+// We are ignoring newlines in quote fields since we expect one-line entries and parsing would break for unclosed quotes otherwise
 function parseCSV(str) {
     const arr = [];
     let quote = false;  // 'true' means we're inside a quoted field
@@ -22,12 +23,10 @@ function parseCSV(str) {
         // If it's a comma and we're not in a quoted field, move on to the next column
         if (cc == ',' && !quote) { ++col; continue; }
 
-        // If it's a newline (CRLF) and we're not in a quoted field, skip the next character
-        // and move on to the next row and move to column 0 of that new row
+        // If it's a newline (CRLF), skip the next character and move on to the next row and move to column 0 of that new row
         if (cc == '\r' && nc == '\n') { ++row; col = 0; ++c; quote = false; continue; }
 
-        // If it's a newline (LF or CR) and we're not in a quoted field,
-        // move on to the next row and move to column 0 of that new row
+        // If it's a newline (LF or CR) move on to the next row and move to column 0 of that new row
         if (cc == '\n') { ++row; col = 0; quote = false; continue; }
         if (cc == '\r') { ++row; col = 0; quote = false; continue; }
 
diff --git a/javascript/tagAutocomplete.js b/javascript/tagAutocomplete.js
index 87b09b5..fe6d759 100644
--- a/javascript/tagAutocomplete.js
+++ b/javascript/tagAutocomplete.js
@@ -680,6 +680,8 @@ function addResultsToList(textArea, results, tagword, resetList) {
                 linkPart = linkPart.split("[")[0]
             }
 
+            linkPart = encodeURIComponent(linkPart);
+
             // Set link based on selected file
             let tagFileNameLower = tagFileName.toLowerCase();
             if (tagFileNameLower.startsWith("danbooru")) {

From 5ebe22ddfcb1c0b1cd9f609b8b4d072be373a199 Mon Sep 17 00:00:00 2001
From: DominikDoom <rehdominik@gmail.com>
Date: Fri, 6 Oct 2023 16:46:18 +0200
Subject: [PATCH 7/7] Add sha256 (V2) keyword lookup As discussed in #245

---
 javascript/tagAutocomplete.js      |  8 ++++++++
 scripts/tag_autocomplete_helper.py | 15 +++++++++++++--
 2 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/javascript/tagAutocomplete.js b/javascript/tagAutocomplete.js
index fe6d759..f1755f6 100644
--- a/javascript/tagAutocomplete.js
+++ b/javascript/tagAutocomplete.js
@@ -512,6 +512,14 @@ async function insertTextAtCursor(textArea, result, tagword, tabCompletedWithout
             let nameDict = modelKeywordDict.get(result.hash);
             let names = [result.text + ".safetensors", result.text + ".pt", result.text + ".ckpt"];
 
+            // No match, try to find a sha256 match from the cache file
+            if (!nameDict) {
+                const sha256 = await fetchAPI(`/tacapi/v1/lora-cached-hash/${result.text}`)
+                if (sha256) {
+                    nameDict = modelKeywordDict.get(sha256);
+                }
+            }
+
             if (nameDict) {
                 let found = false;
                 names.forEach(name => {
diff --git a/scripts/tag_autocomplete_helper.py b/scripts/tag_autocomplete_helper.py
index 12e2c66..9641d5a 100644
--- a/scripts/tag_autocomplete_helper.py
+++ b/scripts/tag_autocomplete_helper.py
@@ -1,7 +1,6 @@
 # This helper script scans folders for wildcards and embeddings and writes them
 # to a temporary file to expose it to the javascript side
 
-import os
 import glob
 import json
 import urllib.parse
@@ -11,7 +10,7 @@ import gradio as gr
 import yaml
 from fastapi import FastAPI
 from fastapi.responses import FileResponse, JSONResponse
-from modules import script_callbacks, sd_hijack, shared
+from modules import script_callbacks, sd_hijack, shared, hashes
 
 from scripts.model_keyword_support import (get_lora_simple_hash,
                                            load_hash_cache, update_hash_cache,
@@ -515,6 +514,18 @@ def api_tac(_: gr.Blocks, app: FastAPI):
     async def get_lyco_info(lyco_name):
         return await get_json_info(LYCO_PATH, lyco_name)
 
+    @app.get("/tacapi/v1/lora-cached-hash/{lora_name}")
+    async def get_lora_cached_hash(lora_name: str):
+        path_glob = glob.glob(LORA_PATH.as_posix() + f"/**/{lora_name}.*", recursive=True)
+        paths = [lora for lora in path_glob if Path(lora).suffix in [".safetensors", ".ckpt", ".pt"]]
+        if paths is not None and len(paths) > 0:
+            path = paths[0]
+            hash = hashes.sha256_from_cache(path, f"lora/{lora_name}", path.endswith(".safetensors"))
+            if hash is not None:
+                return hash
+        
+        return None
+
     def get_path_for_type(type):
         if type == "lora":
             return LORA_PATH