Remove the Sage Attention3 switch, but retain the attention function

registration.
This commit is contained in:
Jianqiao Huang 2025-12-11 14:16:47 -08:00
parent 483ba1e98b
commit 7883076f5c
3 changed files with 6 additions and 18 deletions

View File

@ -112,7 +112,6 @@ attn_group.add_argument("--use-split-cross-attention", action="store_true", help
attn_group.add_argument("--use-quad-cross-attention", action="store_true", help="Use the sub-quadratic cross attention optimization . Ignored when xformers is used.") attn_group.add_argument("--use-quad-cross-attention", action="store_true", help="Use the sub-quadratic cross attention optimization . Ignored when xformers is used.")
attn_group.add_argument("--use-pytorch-cross-attention", action="store_true", help="Use the new pytorch 2.0 cross attention function.") attn_group.add_argument("--use-pytorch-cross-attention", action="store_true", help="Use the new pytorch 2.0 cross attention function.")
attn_group.add_argument("--use-sage-attention", action="store_true", help="Use sage attention.") attn_group.add_argument("--use-sage-attention", action="store_true", help="Use sage attention.")
attn_group.add_argument("--use-sage-attention3", action="store_true", help="Use sage attention 3.")
attn_group.add_argument("--use-flash-attention", action="store_true", help="Use FlashAttention.") attn_group.add_argument("--use-flash-attention", action="store_true", help="Use FlashAttention.")
parser.add_argument("--disable-xformers", action="store_true", help="Disable xformers.") parser.add_argument("--disable-xformers", action="store_true", help="Disable xformers.")

View File

@ -35,12 +35,7 @@ try:
from sageattn3 import sageattn3_blackwell from sageattn3 import sageattn3_blackwell
SAGE_ATTENTION3_IS_AVAILABLE = True SAGE_ATTENTION3_IS_AVAILABLE = True
except ImportError as e: except ImportError as e:
if model_management.sage_attention3_enabled(): pass
if e.name == "sageattn3":
logging.error(f"\n\nTo use the `--use-sage-attention3` feature, the `sageattn3` package must be installed first.\nPlease check https://github.com/thu-ml/SageAttention/tree/main/sageattention3_blackwell")
else:
raise e
exit(-1)
FLASH_ATTENTION_IS_AVAILABLE = False FLASH_ATTENTION_IS_AVAILABLE = False
try: try:
@ -726,9 +721,6 @@ optimized_attention = attention_basic
if model_management.sage_attention_enabled(): if model_management.sage_attention_enabled():
logging.info("Using sage attention") logging.info("Using sage attention")
optimized_attention = attention_sage optimized_attention = attention_sage
if model_management.sage_attention3_enabled():
logging.info("Using sage attention 3")
optimized_attention = attention3_sage
elif model_management.xformers_enabled(): elif model_management.xformers_enabled():
logging.info("Using xformers attention") logging.info("Using xformers attention")
optimized_attention = attention_xformers optimized_attention = attention_xformers

View File

@ -1189,9 +1189,6 @@ def unpin_memory(tensor):
def sage_attention_enabled(): def sage_attention_enabled():
return args.use_sage_attention return args.use_sage_attention
def sage_attention3_enabled():
return args.use_sage_attention3
def flash_attention_enabled(): def flash_attention_enabled():
return args.use_flash_attention return args.use_flash_attention