From 65042f7d395e92d9cc10dc66b94f63f5e40a697d Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Tue, 4 Mar 2025 09:26:05 -0500 Subject: [PATCH] Make it easier to set a custom template for hunyuan video. --- comfy/sd.py | 4 ++-- comfy/sd1_clip.py | 4 ++-- comfy/sdxl_clip.py | 2 +- comfy/text_encoders/flux.py | 2 +- comfy/text_encoders/hunyuan_video.py | 7 +++++-- comfy/text_encoders/hydit.py | 2 +- comfy/text_encoders/sd3_clip.py | 2 +- 7 files changed, 13 insertions(+), 10 deletions(-) diff --git a/comfy/sd.py b/comfy/sd.py index 21913cf3..b866c66c 100644 --- a/comfy/sd.py +++ b/comfy/sd.py @@ -134,8 +134,8 @@ class CLIP: def clip_layer(self, layer_idx): self.layer_idx = layer_idx - def tokenize(self, text, return_word_ids=False): - return self.tokenizer.tokenize_with_weights(text, return_word_ids) + def tokenize(self, text, return_word_ids=False, **kwargs): + return self.tokenizer.tokenize_with_weights(text, return_word_ids, **kwargs) def add_hooks_to_dict(self, pooled_dict: dict[str]): if self.apply_hooks_to_conds: diff --git a/comfy/sd1_clip.py b/comfy/sd1_clip.py index d2457731..692ae051 100644 --- a/comfy/sd1_clip.py +++ b/comfy/sd1_clip.py @@ -482,7 +482,7 @@ class SDTokenizer: return (embed, leftover) - def tokenize_with_weights(self, text:str, return_word_ids=False): + def tokenize_with_weights(self, text:str, return_word_ids=False, **kwargs): ''' Takes a prompt and converts it to a list of (token, weight, word id) elements. Tokens can both be integer tokens and pre computed CLIP tensors. @@ -596,7 +596,7 @@ class SD1Tokenizer: tokenizer = tokenizer_data.get("{}_tokenizer_class".format(self.clip), tokenizer) setattr(self, self.clip, tokenizer(embedding_directory=embedding_directory, tokenizer_data=tokenizer_data)) - def tokenize_with_weights(self, text:str, return_word_ids=False): + def tokenize_with_weights(self, text:str, return_word_ids=False, **kwargs): out = {} out[self.clip_name] = getattr(self, self.clip).tokenize_with_weights(text, return_word_ids) return out diff --git a/comfy/sdxl_clip.py b/comfy/sdxl_clip.py index 4d0a4e8e..5b7c8a41 100644 --- a/comfy/sdxl_clip.py +++ b/comfy/sdxl_clip.py @@ -26,7 +26,7 @@ class SDXLTokenizer: self.clip_l = clip_l_tokenizer_class(embedding_directory=embedding_directory) self.clip_g = SDXLClipGTokenizer(embedding_directory=embedding_directory) - def tokenize_with_weights(self, text:str, return_word_ids=False): + def tokenize_with_weights(self, text:str, return_word_ids=False, **kwargs): out = {} out["g"] = self.clip_g.tokenize_with_weights(text, return_word_ids) out["l"] = self.clip_l.tokenize_with_weights(text, return_word_ids) diff --git a/comfy/text_encoders/flux.py b/comfy/text_encoders/flux.py index b945b1aa..a12995ec 100644 --- a/comfy/text_encoders/flux.py +++ b/comfy/text_encoders/flux.py @@ -18,7 +18,7 @@ class FluxTokenizer: self.clip_l = clip_l_tokenizer_class(embedding_directory=embedding_directory) self.t5xxl = T5XXLTokenizer(embedding_directory=embedding_directory) - def tokenize_with_weights(self, text:str, return_word_ids=False): + def tokenize_with_weights(self, text:str, return_word_ids=False, **kwargs): out = {} out["l"] = self.clip_l.tokenize_with_weights(text, return_word_ids) out["t5xxl"] = self.t5xxl.tokenize_with_weights(text, return_word_ids) diff --git a/comfy/text_encoders/hunyuan_video.py b/comfy/text_encoders/hunyuan_video.py index 7149d687..bdee0b3d 100644 --- a/comfy/text_encoders/hunyuan_video.py +++ b/comfy/text_encoders/hunyuan_video.py @@ -41,11 +41,14 @@ class HunyuanVideoTokenizer: self.llama_template = """<|start_header_id|>system<|end_header_id|>\n\nDescribe the video by detailing the following aspects: 1. The main content and theme of the video.2. The color, shape, size, texture, quantity, text, and spatial relationships of the objects.3. Actions, events, behaviors temporal relationships, physical movement changes of the objects.4. background environment, light, style and atmosphere.5. camera angles, movements, and transitions used in the video:<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n""" # 95 tokens self.llama = LLAMA3Tokenizer(embedding_directory=embedding_directory, min_length=1) - def tokenize_with_weights(self, text:str, return_word_ids=False): + def tokenize_with_weights(self, text:str, return_word_ids=False, llama_template=None, **kwargs): out = {} out["l"] = self.clip_l.tokenize_with_weights(text, return_word_ids) - llama_text = "{}{}".format(self.llama_template, text) + if llama_template is None: + llama_text = "{}{}".format(self.llama_template, text) + else: + llama_text = "{}{}".format(llama_template, text) out["llama"] = self.llama.tokenize_with_weights(llama_text, return_word_ids) return out diff --git a/comfy/text_encoders/hydit.py b/comfy/text_encoders/hydit.py index 7cb790f4..7da3e9fc 100644 --- a/comfy/text_encoders/hydit.py +++ b/comfy/text_encoders/hydit.py @@ -37,7 +37,7 @@ class HyditTokenizer: self.hydit_clip = HyditBertTokenizer(embedding_directory=embedding_directory) self.mt5xl = MT5XLTokenizer(tokenizer_data={"spiece_model": mt5_tokenizer_data}, embedding_directory=embedding_directory) - def tokenize_with_weights(self, text:str, return_word_ids=False): + def tokenize_with_weights(self, text:str, return_word_ids=False, **kwargs): out = {} out["hydit_clip"] = self.hydit_clip.tokenize_with_weights(text, return_word_ids) out["mt5xl"] = self.mt5xl.tokenize_with_weights(text, return_word_ids) diff --git a/comfy/text_encoders/sd3_clip.py b/comfy/text_encoders/sd3_clip.py index 00d7e31a..3ad2ed93 100644 --- a/comfy/text_encoders/sd3_clip.py +++ b/comfy/text_encoders/sd3_clip.py @@ -43,7 +43,7 @@ class SD3Tokenizer: self.clip_g = sdxl_clip.SDXLClipGTokenizer(embedding_directory=embedding_directory) self.t5xxl = T5XXLTokenizer(embedding_directory=embedding_directory) - def tokenize_with_weights(self, text:str, return_word_ids=False): + def tokenize_with_weights(self, text:str, return_word_ids=False, **kwargs): out = {} out["g"] = self.clip_g.tokenize_with_weights(text, return_word_ids) out["l"] = self.clip_l.tokenize_with_weights(text, return_word_ids)