Fix return_word_ids=True with Anima tokenizer (#12328)

This commit is contained in:
asagi4
2026-02-07 02:38:04 +02:00
committed by GitHub
parent eba6c940fd
commit a831c19b70

View File

@@ -23,7 +23,7 @@ class AnimaTokenizer:
def tokenize_with_weights(self, text:str, return_word_ids=False, **kwargs):
out = {}
qwen_ids = self.qwen3_06b.tokenize_with_weights(text, return_word_ids, **kwargs)
out["qwen3_06b"] = [[(token, 1.0) for token, _ in inner_list] for inner_list in qwen_ids] # Set weights to 1.0
out["qwen3_06b"] = [[(token, 1.0, id) if return_word_ids else (token, 1.0) for token, _, id in inner_list] for inner_list in qwen_ids] # Set weights to 1.0
out["t5xxl"] = self.t5xxl.tokenize_with_weights(text, return_word_ids, **kwargs)
return out