mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2025-02-28 22:51:45 +00:00
Slightly lower hunyuan video memory usage.
This commit is contained in:
parent
25683b5b02
commit
6320d05696
@ -5,8 +5,15 @@ from torch import Tensor
|
|||||||
from comfy.ldm.modules.attention import optimized_attention
|
from comfy.ldm.modules.attention import optimized_attention
|
||||||
import comfy.model_management
|
import comfy.model_management
|
||||||
|
|
||||||
|
|
||||||
def attention(q: Tensor, k: Tensor, v: Tensor, pe: Tensor, mask=None) -> Tensor:
|
def attention(q: Tensor, k: Tensor, v: Tensor, pe: Tensor, mask=None) -> Tensor:
|
||||||
q, k = apply_rope(q, k, pe)
|
q_shape = q.shape
|
||||||
|
k_shape = k.shape
|
||||||
|
|
||||||
|
q = q.float().reshape(*q.shape[:-1], -1, 1, 2)
|
||||||
|
k = k.float().reshape(*k.shape[:-1], -1, 1, 2)
|
||||||
|
q = (pe[..., 0] * q[..., 0] + pe[..., 1] * q[..., 1]).reshape(*q_shape).type_as(v)
|
||||||
|
k = (pe[..., 0] * k[..., 0] + pe[..., 1] * k[..., 1]).reshape(*k_shape).type_as(v)
|
||||||
|
|
||||||
heads = q.shape[1]
|
heads = q.shape[1]
|
||||||
x = optimized_attention(q, k, v, heads, skip_reshape=True, mask=mask)
|
x = optimized_attention(q, k, v, heads, skip_reshape=True, mask=mask)
|
||||||
|
Loading…
Reference in New Issue
Block a user