mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2026-05-08 08:12:34 +08:00
Restore ViT spatial order after windowed attention.
This commit is contained in:
parent
b202f842af
commit
e00ae62907
@ -425,4 +425,7 @@ class Qwen2VLVisionTransformer(nn.Module):
|
|||||||
hidden_states = block(hidden_states, position_embeddings, cu_seqlens_now, optimized_attention=optimized_attention)
|
hidden_states = block(hidden_states, position_embeddings, cu_seqlens_now, optimized_attention=optimized_attention)
|
||||||
|
|
||||||
hidden_states = self.merger(hidden_states)
|
hidden_states = self.merger(hidden_states)
|
||||||
|
# Potentially important for spatially precise edits. This is present in the HF implementation.
|
||||||
|
reverse_indices = torch.argsort(window_index)
|
||||||
|
hidden_states = hidden_states[reverse_indices, :]
|
||||||
return hidden_states
|
return hidden_states
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user