diff --git a/javascript/_utils.js b/javascript/_utils.js index a02cbc4..f44aff2 100644 --- a/javascript/_utils.js +++ b/javascript/_utils.js @@ -196,16 +196,9 @@ function flatten(obj, roots = [], sep = ".") { } // Calculate biased tag score based on post count and frequent usage -function calculateUsageBias(result, count, uses, lastUseDate) { - // Calculate days since last use - const diffTime = Math.abs(Date.now() - (lastUseDate || Date.now())); - const diffDays = Math.ceil(diffTime / (1000 * 60 * 60 * 24)); +function calculateUsageBias(result, count, uses) { // Check setting conditions - if ( - uses < TAC_CFG.frequencyMinCount || - diffDays > TAC_CFG.frequencyMaxAge || - (!TAC_CFG.frequencyIncludeAlias && result.aliases && !result.text.includes(tagword)) - ) { + if (uses < TAC_CFG.frequencyMinCount) { uses = 0; } else if (uses != 0) { result.usageBias = true; diff --git a/javascript/tagAutocomplete.js b/javascript/tagAutocomplete.js index e221831..e29a983 100644 --- a/javascript/tagAutocomplete.js +++ b/javascript/tagAutocomplete.js @@ -229,6 +229,7 @@ async function syncOptions() { frequencyFunction: opts["tac_frequencyFunction"], frequencyMinCount: opts["tac_frequencyMinCount"], frequencyMaxAge: opts["tac_frequencyMaxAge"], + frequencyRecommendCap: opts["tac_frequencyRecommendCap"], frequencyIncludeAlias: opts["tac_frequencyIncludeAlias"], useStyleVars: opts["tac_useStyleVars"], // Insertion related settings @@ -1177,12 +1178,20 @@ async function autocomplete(textArea, prompt, fixedTag = null) { // Sort again with frequency / usage count if enabled if (TAC_CFG.frequencySort) { // Split our results into a list of names and types - let names = []; + let tagNames = []; + let aliasNames = []; let types = []; // Limit to 2k for performance reasons + const aliasTypes = [ResultType.tag, ResultType.extra]; results.slice(0,2000).forEach(r => { const name = r.type === ResultType.chant ? r.aliases : r.text; - names.push(name); + // Add to alias list or tag list depending on if the name includes the tagword + // (the same criteria is used in the filter in calculateUsageBias) + if (aliasTypes.includes(r.type) && !name.includes(tagword)) { + aliasNames.push(name); + } else { + tagNames.push(name); + } types.push(r.type); }); @@ -1191,10 +1200,9 @@ async function autocomplete(textArea, prompt, fixedTag = null) { let isNegative = textAreaId.includes("n"); // Request use counts from the DB + const names = TAC_CFG.frequencyIncludeAlias ? tagNames.concat(aliasNames) : tagNames; const counts = await getUseCounts(names, types, isNegative); - // Sort all - // Pre-calculate weights to prevent duplicate work const resultBiasMap = new Map(); results.forEach(result => { @@ -1203,9 +1211,8 @@ async function autocomplete(textArea, prompt, fixedTag = null) { // Find matching pair from DB results const useStats = counts.find(c => c.name === name && c.type === type); const uses = useStats?.count || 0; - const lastUseDate = Date.parse(useStats?.lastUseDate); // Calculate & set weight - const weight = calculateUsageBias(result, result.count, uses, lastUseDate) + const weight = calculateUsageBias(result, result.count, uses) resultBiasMap.set(result, weight); }); // Actual sorting with the pre-calculated weights diff --git a/scripts/tag_autocomplete_helper.py b/scripts/tag_autocomplete_helper.py index 981bd5d..925f138 100644 --- a/scripts/tag_autocomplete_helper.py +++ b/scripts/tag_autocomplete_helper.py @@ -546,7 +546,8 @@ def on_ui_settings(): "tac_frequencySort": shared.OptionInfo(True, "Locally record tag usage and sort frequent tags higher").info("Will also work for extra networks, keeping the specified base order"), "tac_frequencyFunction": shared.OptionInfo("Logarithmic (weak)", "Function to use for frequency sorting", gr.Dropdown, lambda: {"choices": list(frequency_sort_functions.keys())}).info("; ".join([f'{key}: {val}' for key, val in frequency_sort_functions.items()])), "tac_frequencyMinCount": shared.OptionInfo(3, "Minimum number of uses for a tag to be considered frequent").info("Tags with less uses than this will not be sorted higher, even if the sorting function would normally result in a higher position."), - "tac_frequencyMaxAge": shared.OptionInfo(30, "Maximum days since last use for a tag to be considered frequent").info("Similar to the above, tags that haven't been used in this many days will not be sorted higher."), + "tac_frequencyMaxAge": shared.OptionInfo(30, "Maximum days since last use for a tag to be considered frequent").info("Similar to the above, tags that haven't been used in this many days will not be sorted higher. Set to 0 to disable."), + "tac_frequencyRecommendCap": shared.OptionInfo(10, "Maximum number of recommended tags").info("Limits the maximum number of recommended tags to not drown out normal results. Set to 0 to disable."), "tac_frequencyIncludeAlias": shared.OptionInfo(False, "Frequency sorting matches aliases for frequent tags").info("Tag frequency will be increased for the main tag even if an alias is used for completion. This option can be used to override the default behavior of alias results being ignored for frequency sorting."), # Insertion related settings "tac_replaceUnderscores": shared.OptionInfo(True, "Replace underscores with spaces on insertion"), @@ -783,7 +784,20 @@ def api_tac(_: gr.Blocks, app: FastAPI): # Semantically weird to use post here, but it's required for the body on js side @app.post("/tacapi/v1/get-use-count-list") async def get_use_count_list(body: UseCountListRequest): - return db_request(lambda: list(db.get_tag_counts(body.tagNames, body.tagTypes, body.neg)), get=True) + # If a date limit is set > 0, pass it to the db + date_limit = getattr(shared.opts, "tac_frequencyMaxAge", 30) + date_limit = date_limit if date_limit > 0 else None + + count_list = list(db.get_tag_counts(body.tagNames, body.tagTypes, body.neg, date_limit)) + + # If a limit is set, return at max the top n results by count + if count_list and len(count_list): + limit = int(min(getattr(shared.opts, "tac_frequencyRecommendCap", 10), len(count_list))) + # Sort by count and return the top n + if limit > 0: + count_list = sorted(count_list, key=lambda x: x[2], reverse=True)[:limit] + + return db_request(lambda: count_list, get=True) @app.put("/tacapi/v1/reset-use-count") async def reset_use_count(tagname: str, ttype: int, pos: bool, neg: bool): diff --git a/scripts/tag_frequency_db.py b/scripts/tag_frequency_db.py index 9a0cd10..5b1b195 100644 --- a/scripts/tag_frequency_db.py +++ b/scripts/tag_frequency_db.py @@ -13,15 +13,19 @@ def transaction(db=db_file): """Context manager for database transactions. Ensures that the connection is properly closed after the transaction. """ - conn = sqlite3.connect(db, timeout=timeout) try: + conn = sqlite3.connect(db, timeout=timeout) + conn.isolation_level = None cursor = conn.cursor() cursor.execute("BEGIN") yield cursor cursor.execute("COMMIT") + except sqlite3.Error as e: + print("Tag Autocomplete: Frequency database error:", e) finally: - conn.close() + if conn: + conn.close() class TagFrequencyDb: @@ -118,18 +122,29 @@ class TagFrequencyDb: else: return 0, None - def get_tag_counts(self, tags: list[str], ttypes: list[str], negative=False): + def get_tag_counts(self, tags: list[str], ttypes: list[str], negative=False, date_limit=None): count_str = "count_neg" if negative else "count_pos" with transaction() as cursor: for tag, ttype in zip(tags, ttypes): - cursor.execute( - f""" - SELECT {count_str}, last_used - FROM tag_frequency - WHERE name = ? AND type = ? - """, - (tag, ttype), - ) + if date_limit is not None: + cursor.execute( + f""" + SELECT {count_str}, last_used + FROM tag_frequency + WHERE name = ? AND type = ? + AND last_used > datetime('now', '-' || ? || ' days') + """, + (tag, ttype, date_limit), + ) + else: + cursor.execute( + f""" + SELECT {count_str}, last_used + FROM tag_frequency + WHERE name = ? AND type = ? + """, + (tag, ttype), + ) tag_count = cursor.fetchone() if tag_count: yield (tag, ttype, tag_count[0], tag_count[1])