From 88ed89303463ab030654ef87f7f54d72ee3979bb Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Tue, 23 Jul 2024 14:17:42 -0400 Subject: [PATCH] Allow SPieceTokenizer to load model from a byte string. --- comfy/text_encoders/spiece_tokenizer.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/comfy/text_encoders/spiece_tokenizer.py b/comfy/text_encoders/spiece_tokenizer.py index d611d5bb..fa303da0 100644 --- a/comfy/text_encoders/spiece_tokenizer.py +++ b/comfy/text_encoders/spiece_tokenizer.py @@ -1,14 +1,18 @@ import os class SPieceTokenizer: + add_eos = True + @staticmethod def from_pretrained(path): return SPieceTokenizer(path) def __init__(self, tokenizer_path): import sentencepiece - self.tokenizer = sentencepiece.SentencePieceProcessor(model_file=tokenizer_path) - self.end = self.tokenizer.eos_id() + if isinstance(tokenizer_path, bytes): + self.tokenizer = sentencepiece.SentencePieceProcessor(model_proto=tokenizer_path, add_eos=self.add_eos) + else: + self.tokenizer = sentencepiece.SentencePieceProcessor(model_file=tokenizer_path, add_eos=self.add_eos) def get_vocab(self): out = {} @@ -18,5 +22,4 @@ class SPieceTokenizer: def __call__(self, string): out = self.tokenizer.encode(string) - out += [self.end] return {"input_ids": out}