@@ -14349,14 +14349,14 @@
1434914349 variants: function
1435014350 tags: nondeterministic_seeded
1435114351
14352- - func: _scaled_dot_product_flash_attention(Tensor query, Tensor key, Tensor value, float dropout_p=0.0, bool is_causal=False, bool return_debug_mask=False, *, float? scale=None) -> (Tensor output, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k,int max_q,int max_k, Tensor philox_seed, Tensor philox_offset, Tensor debug_attn_mask)
14352+ - func: _scaled_dot_product_flash_attention(Tensor query, Tensor key, Tensor value, float dropout_p=0.0, bool is_causal=False, bool return_debug_mask=False, *, float? scale=None) -> (Tensor output, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k,SymInt max_q,SymInt max_k, Tensor philox_seed, Tensor philox_offset, Tensor debug_attn_mask)
1435314353 dispatch:
1435414354 CPU: _scaled_dot_product_flash_attention_cpu
1435514355 CUDA: _scaled_dot_product_flash_attention_cuda
1435614356 NestedTensorCUDA: _scaled_dot_product_flash_attention_nestedtensor_cuda
1435714357 tags: nondeterministic_seeded
1435814358
14359- - func: _scaled_dot_product_flash_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k,int max_q,int max_k, float dropout_p, bool is_causal, Tensor philox_seed, Tensor philox_offset, *, float? scale=None) -> (Tensor grad_query, Tensor grad_key, Tensor grad_value)
14359+ - func: _scaled_dot_product_flash_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k,SymInt max_q,SymInt max_k, float dropout_p, bool is_causal, Tensor philox_seed, Tensor philox_offset, *, float? scale=None) -> (Tensor grad_query, Tensor grad_key, Tensor grad_value)
1436014360 device_check: NoCheck
1436114361 variants: function
1436214362 dispatch:
@@ -14375,13 +14375,13 @@
1437514375 CUDA: _scaled_dot_product_efficient_attention_backward_cuda
1437614376 tags: nondeterministic_seeded
1437714377
14378- - func: _flash_attention_forward(Tensor query, Tensor key, Tensor value, Tensor? cum_seq_q, Tensor? cum_seq_k,int ? max_q,int ? max_k, float dropout_p, bool is_causal, bool return_debug_mask, *, float? scale=None) -> (Tensor output, Tensor softmax_logsumexp, Tensor philox_seed, Tensor philox_offset, Tensor debug_attn_mask)
14378+ - func: _flash_attention_forward(Tensor query, Tensor key, Tensor value, Tensor? cum_seq_q, Tensor? cum_seq_k,SymInt ? max_q,SymInt ? max_k, float dropout_p, bool is_causal, bool return_debug_mask, *, float? scale=None) -> (Tensor output, Tensor softmax_logsumexp, Tensor philox_seed, Tensor philox_offset, Tensor debug_attn_mask)
1437914379 variants: function
1438014380 dispatch:
1438114381 CUDA: _flash_attention_forward
1438214382 tags: nondeterministic_seeded
1438314383
14384- - func: _flash_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k,int max_q,int max_k, float dropout_p, bool is_causal, Tensor philox_seed, Tensor philox_offset, *, float? scale=None) -> (Tensor, Tensor, Tensor)
14384+ - func: _flash_attention_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k,SymInt max_q,SymInt max_k, float dropout_p, bool is_causal, Tensor philox_seed, Tensor philox_offset, *, float? scale=None) -> (Tensor, Tensor, Tensor)
1438514385 device_check: NoCheck
1438614386 variants: function
1438714387 dispatch: