/*
* SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION &
* AFFILIATES. All rights reserved. SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once

namespace tensorrt_llm
{
namespace kernels
{
// clang-format off




#ifndef EXCLUDE_SM_90
extern unsigned char cubin_fmha_v2_fp16_64_32_ldgsts_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_fp16_128_32_ldgsts_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_fp16_256_32_ldgsts_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_fp16_64_64_ldgsts_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_fp16_128_64_ldgsts_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_fp16_256_64_ldgsts_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_fp16_384_32_ldgsts_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_fp16_512_32_ldgsts_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_fp16_384_64_ldgsts_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_fp16_512_64_ldgsts_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_bf16_64_32_ldgsts_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_bf16_128_32_ldgsts_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_bf16_256_32_ldgsts_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_bf16_64_64_ldgsts_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_bf16_128_64_ldgsts_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_bf16_256_64_ldgsts_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_bf16_384_32_ldgsts_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_bf16_512_32_ldgsts_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_bf16_384_64_ldgsts_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_bf16_512_64_ldgsts_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_fp16_fp32_64_32_ldgsts_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_fp16_fp32_128_32_ldgsts_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_fp16_fp32_256_32_ldgsts_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_fp16_fp32_64_64_ldgsts_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_fp16_fp32_128_64_ldgsts_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_fp16_fp32_256_64_ldgsts_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_fp16_fp32_384_32_ldgsts_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_fp16_fp32_512_32_ldgsts_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_fp16_fp32_384_64_ldgsts_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_fp16_fp32_512_64_ldgsts_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_256_S_qkv_32_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_256_S_qkv_40_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_256_S_qkv_48_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_256_S_qkv_64_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_80_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_96_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_104_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_128_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_160_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_192_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_256_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_128_qk_tanh_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_256_qk_tanh_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_256_S_q_kv_32_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_256_S_q_kv_64_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_128_S_q_kv_128_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_128_S_q_kv_128_qk_tanh_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_256_S_q_paged_kv_32_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_256_S_q_paged_kv_40_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_256_S_q_paged_kv_48_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_256_S_q_paged_kv_64_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_80_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_96_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_104_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_160_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_192_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_256_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_qk_tanh_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_256_qk_tanh_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_256_S_qkv_32_alibi_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_256_S_qkv_40_alibi_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_256_S_qkv_48_alibi_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_256_S_qkv_64_alibi_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_80_alibi_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_96_alibi_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_104_alibi_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_128_alibi_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_160_alibi_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_192_alibi_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_256_alibi_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_128_alibi_qk_tanh_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_256_alibi_qk_tanh_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_256_S_q_paged_kv_32_alibi_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_256_S_q_paged_kv_40_alibi_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_256_S_q_paged_kv_48_alibi_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_256_S_q_paged_kv_64_alibi_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_80_alibi_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_96_alibi_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_104_alibi_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_alibi_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_160_alibi_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_192_alibi_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_256_alibi_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_alibi_qk_tanh_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_256_alibi_qk_tanh_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_256_S_qkv_32_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_256_S_qkv_40_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_256_S_qkv_48_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_256_S_qkv_64_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_80_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_96_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_104_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_128_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_160_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_192_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_256_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_128_qk_tanh_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_256_qk_tanh_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_256_S_q_kv_32_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_256_S_q_kv_64_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_128_S_q_kv_128_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_128_S_q_kv_128_qk_tanh_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_256_S_q_paged_kv_32_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_256_S_q_paged_kv_40_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_256_S_q_paged_kv_48_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_256_S_q_paged_kv_64_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_80_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_96_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_104_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_160_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_192_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_256_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_qk_tanh_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_256_qk_tanh_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_256_S_qkv_32_alibi_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_256_S_qkv_40_alibi_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_256_S_qkv_48_alibi_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_256_S_qkv_64_alibi_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_80_alibi_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_96_alibi_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_104_alibi_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_128_alibi_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_160_alibi_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_192_alibi_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_256_alibi_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_128_alibi_qk_tanh_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_256_alibi_qk_tanh_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_256_S_q_paged_kv_32_alibi_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_256_S_q_paged_kv_40_alibi_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_256_S_q_paged_kv_48_alibi_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_256_S_q_paged_kv_64_alibi_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_80_alibi_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_96_alibi_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_104_alibi_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_alibi_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_160_alibi_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_192_alibi_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_256_alibi_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_alibi_qk_tanh_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_256_alibi_qk_tanh_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_32_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_40_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_48_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_64_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_80_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_96_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_104_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_128_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_e4m3_64_128_S_qkv_160_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_e4m3_64_128_S_qkv_192_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_e4m3_64_128_S_qkv_256_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_128_qk_tanh_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_e4m3_64_128_S_qkv_256_qk_tanh_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_kv_32_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_kv_64_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_kv_128_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_kv_128_qk_tanh_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_32_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_40_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_48_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_64_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_80_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_96_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_104_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_128_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_e4m3_64_128_S_q_paged_kv_160_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_e4m3_64_128_S_q_paged_kv_192_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_e4m3_64_128_S_q_paged_kv_256_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_128_qk_tanh_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_e4m3_64_128_S_q_paged_kv_256_qk_tanh_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_32_alibi_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_40_alibi_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_48_alibi_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_64_alibi_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_80_alibi_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_96_alibi_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_104_alibi_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_128_alibi_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_e4m3_64_128_S_qkv_160_alibi_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_e4m3_64_128_S_qkv_192_alibi_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_e4m3_64_128_S_qkv_256_alibi_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_128_alibi_qk_tanh_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_e4m3_64_128_S_qkv_256_alibi_qk_tanh_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_32_alibi_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_40_alibi_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_48_alibi_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_64_alibi_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_80_alibi_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_96_alibi_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_104_alibi_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_128_alibi_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_e4m3_64_128_S_q_paged_kv_160_alibi_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_e4m3_64_128_S_q_paged_kv_192_alibi_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_e4m3_64_128_S_q_paged_kv_256_alibi_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_128_alibi_qk_tanh_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_e4m3_64_128_S_q_paged_kv_256_alibi_qk_tanh_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_qkv_32_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_qkv_40_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_qkv_48_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_qkv_64_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_80_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_96_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_104_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_160_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_192_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_256_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_qk_tanh_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_256_qk_tanh_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_q_kv_32_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_q_kv_64_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_kv_128_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_kv_128_qk_tanh_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_q_paged_kv_32_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_q_paged_kv_40_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_q_paged_kv_48_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_q_paged_kv_64_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_80_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_96_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_104_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_160_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_192_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_256_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_qk_tanh_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_256_qk_tanh_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_qkv_32_alibi_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_qkv_40_alibi_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_qkv_48_alibi_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_qkv_64_alibi_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_80_alibi_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_96_alibi_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_104_alibi_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_alibi_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_160_alibi_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_192_alibi_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_256_alibi_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_alibi_qk_tanh_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_256_alibi_qk_tanh_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_q_paged_kv_32_alibi_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_q_paged_kv_40_alibi_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_q_paged_kv_48_alibi_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_q_paged_kv_64_alibi_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_80_alibi_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_96_alibi_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_104_alibi_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_alibi_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_160_alibi_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_192_alibi_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_256_alibi_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_alibi_qk_tanh_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_256_alibi_qk_tanh_tma_ws_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_16_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_32_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_40_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_48_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_64_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_80_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_96_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_104_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_128_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_160_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_192_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_256_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_16_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_32_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_40_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_48_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_64_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_80_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_96_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_104_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_128_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_160_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_192_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_256_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_128_qk_tanh_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_256_qk_tanh_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_128_qk_tanh_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_256_qk_tanh_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_16_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_32_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_40_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_48_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_64_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_80_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_96_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_104_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_128_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_160_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_192_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_256_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_16_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_32_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_40_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_48_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_64_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_80_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_96_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_104_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_128_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_160_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_192_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_256_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_128_qk_tanh_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_256_qk_tanh_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_128_qk_tanh_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_256_qk_tanh_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_16_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_32_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_40_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_48_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_64_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_80_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_96_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_104_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_160_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_192_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_256_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_16_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_32_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_40_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_48_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_64_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_80_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_96_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_104_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_128_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_160_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_192_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_256_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_qk_tanh_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_256_qk_tanh_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_128_qk_tanh_sm90_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_256_qk_tanh_sm90_cu_cubin[];
#endif

#ifndef EXCLUDE_SM_89
extern unsigned char cubin_fmha_v2_flash_attention_e4m3_fp32_128_128_S_qkv_32_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_e4m3_fp32_128_128_S_q_kv_32_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_e4m3_fp32_128_128_S_q_paged_kv_32_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_e4m3_fp32_128_128_S_qkv_40_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_e4m3_fp32_128_128_S_q_paged_kv_40_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_e4m3_fp32_128_128_S_qkv_48_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_e4m3_fp32_128_128_S_q_paged_kv_48_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_e4m3_fp32_128_128_S_qkv_64_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_e4m3_fp32_128_128_S_q_kv_64_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_e4m3_fp32_128_128_S_q_paged_kv_64_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_qkv_80_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_q_paged_kv_80_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_qkv_96_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_q_paged_kv_96_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_qkv_104_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_q_paged_kv_104_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_qkv_128_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_q_kv_128_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_q_paged_kv_128_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_qkv_160_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_q_paged_kv_160_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_qkv_192_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_q_paged_kv_192_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_qkv_256_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_q_paged_kv_256_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_16_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_32_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_40_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_48_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_64_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_80_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_96_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_104_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_128_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_160_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_192_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_256_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_16_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_32_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_40_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_48_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_64_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_80_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_96_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_104_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_128_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_160_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_192_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_256_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_128_qk_tanh_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_256_qk_tanh_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_128_qk_tanh_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_256_qk_tanh_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_128_128_S_q_kv_32_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_128_128_S_q_kv_64_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_128_S_q_kv_128_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_64_S_q_kv_32_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_32_S_q_kv_64_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_32_S_q_kv_128_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_128_S_q_kv_128_qk_tanh_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_32_S_q_kv_128_qk_tanh_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_16_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_32_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_40_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_48_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_64_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_80_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_96_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_104_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_160_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_192_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_256_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_16_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_32_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_40_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_48_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_64_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_80_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_96_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_104_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_128_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_160_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_192_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_256_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_qk_tanh_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_256_qk_tanh_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_128_qk_tanh_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_256_qk_tanh_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_16_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_32_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_40_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_48_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_64_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_80_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_96_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_104_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_128_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_160_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_192_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_256_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_16_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_32_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_40_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_48_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_64_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_80_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_96_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_104_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_128_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_160_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_192_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_256_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_128_qk_tanh_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_256_qk_tanh_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_128_qk_tanh_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_256_qk_tanh_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_128_128_S_q_kv_32_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_128_128_S_q_kv_64_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_128_S_q_kv_128_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_64_S_q_kv_32_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_32_S_q_kv_64_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_32_S_q_kv_128_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_128_S_q_kv_128_qk_tanh_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_32_S_q_kv_128_qk_tanh_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_16_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_32_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_40_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_48_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_64_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_80_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_96_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_104_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_160_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_192_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_256_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_16_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_32_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_40_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_48_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_64_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_80_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_96_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_104_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_128_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_160_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_192_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_256_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_qk_tanh_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_256_qk_tanh_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_128_qk_tanh_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_256_qk_tanh_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_16_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_32_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_40_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_48_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_64_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_80_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_96_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_104_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_160_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_192_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_256_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_16_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_32_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_40_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_48_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_64_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_80_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_96_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_104_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_128_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_160_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_192_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_256_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_qk_tanh_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_256_qk_tanh_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_128_qk_tanh_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_256_qk_tanh_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_kv_32_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_kv_64_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_kv_128_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_kv_32_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_kv_64_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_kv_128_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_kv_128_qk_tanh_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_kv_128_qk_tanh_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_16_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_32_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_40_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_48_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_64_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_80_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_96_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_104_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_160_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_192_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_256_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_16_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_32_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_40_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_48_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_64_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_80_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_96_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_104_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_128_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_160_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_192_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_256_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_qk_tanh_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_256_qk_tanh_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_128_qk_tanh_sm89_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_256_qk_tanh_sm89_cu_cubin[];
#endif

#ifndef EXCLUDE_SM_80
extern unsigned char cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_16_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_32_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_40_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_48_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_64_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_80_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_96_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_104_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_128_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_160_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_192_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_256_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_16_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_32_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_40_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_48_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_64_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_80_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_96_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_104_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_128_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_160_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_192_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_256_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_128_qk_tanh_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_256_qk_tanh_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_128_qk_tanh_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_256_qk_tanh_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_128_128_S_q_kv_32_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_128_128_S_q_kv_64_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_128_S_q_kv_128_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_64_S_q_kv_32_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_32_S_q_kv_64_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_32_S_q_kv_128_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_128_S_q_kv_128_qk_tanh_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_32_S_q_kv_128_qk_tanh_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_16_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_32_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_40_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_48_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_64_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_80_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_96_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_104_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_160_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_192_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_256_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_16_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_32_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_40_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_48_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_64_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_80_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_96_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_104_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_128_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_160_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_192_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_256_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_qk_tanh_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_256_qk_tanh_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_128_qk_tanh_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_256_qk_tanh_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_16_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_32_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_40_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_48_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_64_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_80_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_96_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_104_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_128_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_160_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_192_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_256_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_16_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_32_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_40_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_48_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_64_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_80_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_96_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_104_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_128_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_160_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_192_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_256_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_128_qk_tanh_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_256_qk_tanh_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_128_qk_tanh_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_256_qk_tanh_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_128_128_S_q_kv_32_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_128_128_S_q_kv_64_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_128_S_q_kv_128_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_64_S_q_kv_32_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_32_S_q_kv_64_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_32_S_q_kv_128_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_128_S_q_kv_128_qk_tanh_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_32_S_q_kv_128_qk_tanh_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_16_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_32_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_40_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_48_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_64_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_80_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_96_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_104_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_160_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_192_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_256_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_16_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_32_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_40_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_48_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_64_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_80_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_96_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_104_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_128_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_160_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_192_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_256_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_qk_tanh_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_256_qk_tanh_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_128_qk_tanh_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_256_qk_tanh_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_16_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_32_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_40_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_48_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_64_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_80_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_96_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_104_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_160_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_192_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_256_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_16_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_32_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_40_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_48_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_64_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_80_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_96_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_104_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_128_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_160_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_192_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_256_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_qk_tanh_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_256_qk_tanh_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_128_qk_tanh_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_256_qk_tanh_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_kv_32_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_kv_64_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_kv_128_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_kv_32_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_kv_64_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_kv_128_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_kv_128_qk_tanh_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_kv_128_qk_tanh_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_16_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_32_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_40_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_48_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_64_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_80_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_96_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_104_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_160_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_192_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_256_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_16_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_32_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_40_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_48_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_64_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_80_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_96_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_104_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_128_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_160_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_192_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_256_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_qk_tanh_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_256_qk_tanh_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_128_qk_tanh_sm80_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_256_qk_tanh_sm80_cu_cubin[];
#endif

#ifndef EXCLUDE_SM_86
extern unsigned char cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_16_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_32_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_40_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_48_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_64_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_80_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_96_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_104_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_128_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_160_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_192_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_256_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_16_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_32_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_40_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_48_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_64_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_80_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_96_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_104_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_128_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_160_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_192_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_256_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_128_qk_tanh_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_256_qk_tanh_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_128_qk_tanh_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_256_qk_tanh_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_128_128_S_q_kv_32_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_128_128_S_q_kv_64_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_128_S_q_kv_128_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_64_S_q_kv_32_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_32_S_q_kv_64_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_32_S_q_kv_128_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_128_S_q_kv_128_qk_tanh_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_32_S_q_kv_128_qk_tanh_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_16_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_32_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_40_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_48_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_64_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_80_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_96_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_104_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_160_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_192_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_256_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_16_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_32_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_40_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_48_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_64_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_80_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_96_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_104_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_128_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_160_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_192_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_256_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_qk_tanh_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_256_qk_tanh_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_128_qk_tanh_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_256_qk_tanh_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_16_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_32_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_40_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_48_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_64_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_80_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_96_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_104_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_128_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_160_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_192_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_256_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_16_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_32_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_40_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_48_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_64_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_80_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_96_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_104_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_128_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_160_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_192_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_256_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_128_qk_tanh_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_256_qk_tanh_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_128_qk_tanh_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_256_qk_tanh_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_128_128_S_q_kv_32_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_128_128_S_q_kv_64_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_128_S_q_kv_128_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_64_S_q_kv_32_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_32_S_q_kv_64_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_32_S_q_kv_128_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_128_S_q_kv_128_qk_tanh_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_32_S_q_kv_128_qk_tanh_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_16_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_32_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_40_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_48_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_64_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_80_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_96_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_104_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_160_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_192_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_256_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_16_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_32_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_40_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_48_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_64_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_80_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_96_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_104_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_128_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_160_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_192_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_256_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_qk_tanh_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_256_qk_tanh_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_128_qk_tanh_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_256_qk_tanh_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_16_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_32_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_40_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_48_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_64_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_80_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_96_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_104_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_160_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_192_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_256_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_16_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_32_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_40_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_48_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_64_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_80_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_96_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_104_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_128_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_160_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_192_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_256_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_qk_tanh_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_256_qk_tanh_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_128_qk_tanh_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_256_qk_tanh_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_kv_32_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_kv_64_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_kv_128_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_kv_32_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_kv_64_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_kv_128_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_kv_128_qk_tanh_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_kv_128_qk_tanh_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_16_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_32_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_40_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_48_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_64_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_80_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_96_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_104_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_160_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_192_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_256_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_16_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_32_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_40_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_48_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_64_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_80_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_96_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_104_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_128_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_160_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_192_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_256_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_qk_tanh_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_256_qk_tanh_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_128_qk_tanh_sm86_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_256_qk_tanh_sm86_cu_cubin[];
#endif

#ifndef EXCLUDE_SM_70
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_32_sm70_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_40_sm70_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_48_sm70_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_64_sm70_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_80_sm70_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_96_sm70_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_104_sm70_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_128_sm70_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_160_sm70_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_192_sm70_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_256_sm70_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_128_qk_tanh_sm70_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_256_qk_tanh_sm70_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_64_S_q_kv_32_sm70_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_32_S_q_kv_64_sm70_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_16_S_q_kv_128_sm70_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_16_S_q_kv_128_qk_tanh_sm70_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_32_sm70_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_40_sm70_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_48_sm70_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_64_sm70_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_80_sm70_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_96_sm70_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_104_sm70_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_128_sm70_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_160_sm70_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_192_sm70_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_256_sm70_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_128_qk_tanh_sm70_cu_cubin[];
extern unsigned char cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_256_qk_tanh_sm70_cu_cubin[];
#endif


#ifndef EXCLUDE_SM_90
extern uint32_t cubin_fmha_v2_fp16_64_32_ldgsts_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_fp16_128_32_ldgsts_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_fp16_256_32_ldgsts_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_fp16_64_64_ldgsts_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_fp16_128_64_ldgsts_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_fp16_256_64_ldgsts_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_fp16_384_32_ldgsts_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_fp16_512_32_ldgsts_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_fp16_384_64_ldgsts_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_fp16_512_64_ldgsts_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_bf16_64_32_ldgsts_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_bf16_128_32_ldgsts_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_bf16_256_32_ldgsts_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_bf16_64_64_ldgsts_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_bf16_128_64_ldgsts_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_bf16_256_64_ldgsts_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_bf16_384_32_ldgsts_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_bf16_512_32_ldgsts_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_bf16_384_64_ldgsts_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_bf16_512_64_ldgsts_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_fp16_fp32_64_32_ldgsts_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_fp16_fp32_128_32_ldgsts_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_fp16_fp32_256_32_ldgsts_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_fp16_fp32_64_64_ldgsts_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_fp16_fp32_128_64_ldgsts_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_fp16_fp32_256_64_ldgsts_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_fp16_fp32_384_32_ldgsts_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_fp16_fp32_512_32_ldgsts_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_fp16_fp32_384_64_ldgsts_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_fp16_fp32_512_64_ldgsts_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_256_S_qkv_32_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_256_S_qkv_40_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_256_S_qkv_48_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_256_S_qkv_64_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_80_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_96_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_104_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_128_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_160_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_192_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_256_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_128_qk_tanh_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_256_qk_tanh_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_256_S_q_kv_32_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_256_S_q_kv_64_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_128_S_q_kv_128_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_128_S_q_kv_128_qk_tanh_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_256_S_q_paged_kv_32_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_256_S_q_paged_kv_40_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_256_S_q_paged_kv_48_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_256_S_q_paged_kv_64_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_80_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_96_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_104_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_160_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_192_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_256_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_qk_tanh_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_256_qk_tanh_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_256_S_qkv_32_alibi_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_256_S_qkv_40_alibi_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_256_S_qkv_48_alibi_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_256_S_qkv_64_alibi_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_80_alibi_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_96_alibi_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_104_alibi_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_128_alibi_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_160_alibi_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_192_alibi_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_256_alibi_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_128_alibi_qk_tanh_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_256_alibi_qk_tanh_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_256_S_q_paged_kv_32_alibi_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_256_S_q_paged_kv_40_alibi_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_256_S_q_paged_kv_48_alibi_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_256_S_q_paged_kv_64_alibi_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_80_alibi_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_96_alibi_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_104_alibi_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_alibi_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_160_alibi_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_192_alibi_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_256_alibi_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_alibi_qk_tanh_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_256_alibi_qk_tanh_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_256_S_qkv_32_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_256_S_qkv_40_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_256_S_qkv_48_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_256_S_qkv_64_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_80_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_96_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_104_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_128_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_160_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_192_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_256_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_128_qk_tanh_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_256_qk_tanh_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_256_S_q_kv_32_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_256_S_q_kv_64_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_128_S_q_kv_128_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_128_S_q_kv_128_qk_tanh_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_256_S_q_paged_kv_32_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_256_S_q_paged_kv_40_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_256_S_q_paged_kv_48_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_256_S_q_paged_kv_64_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_80_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_96_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_104_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_160_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_192_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_256_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_qk_tanh_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_256_qk_tanh_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_256_S_qkv_32_alibi_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_256_S_qkv_40_alibi_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_256_S_qkv_48_alibi_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_256_S_qkv_64_alibi_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_80_alibi_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_96_alibi_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_104_alibi_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_128_alibi_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_160_alibi_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_192_alibi_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_256_alibi_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_128_alibi_qk_tanh_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_256_alibi_qk_tanh_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_256_S_q_paged_kv_32_alibi_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_256_S_q_paged_kv_40_alibi_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_256_S_q_paged_kv_48_alibi_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_256_S_q_paged_kv_64_alibi_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_80_alibi_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_96_alibi_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_104_alibi_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_alibi_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_160_alibi_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_192_alibi_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_256_alibi_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_alibi_qk_tanh_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_256_alibi_qk_tanh_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_32_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_40_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_48_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_64_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_80_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_96_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_104_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_128_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_e4m3_64_128_S_qkv_160_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_e4m3_64_128_S_qkv_192_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_e4m3_64_128_S_qkv_256_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_128_qk_tanh_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_e4m3_64_128_S_qkv_256_qk_tanh_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_kv_32_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_kv_64_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_kv_128_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_kv_128_qk_tanh_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_32_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_40_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_48_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_64_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_80_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_96_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_104_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_128_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_e4m3_64_128_S_q_paged_kv_160_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_e4m3_64_128_S_q_paged_kv_192_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_e4m3_64_128_S_q_paged_kv_256_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_128_qk_tanh_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_e4m3_64_128_S_q_paged_kv_256_qk_tanh_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_32_alibi_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_40_alibi_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_48_alibi_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_64_alibi_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_80_alibi_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_96_alibi_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_104_alibi_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_128_alibi_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_e4m3_64_128_S_qkv_160_alibi_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_e4m3_64_128_S_qkv_192_alibi_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_e4m3_64_128_S_qkv_256_alibi_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_128_alibi_qk_tanh_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_e4m3_64_128_S_qkv_256_alibi_qk_tanh_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_32_alibi_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_40_alibi_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_48_alibi_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_64_alibi_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_80_alibi_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_96_alibi_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_104_alibi_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_128_alibi_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_e4m3_64_128_S_q_paged_kv_160_alibi_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_e4m3_64_128_S_q_paged_kv_192_alibi_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_e4m3_64_128_S_q_paged_kv_256_alibi_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_128_alibi_qk_tanh_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_e4m3_64_128_S_q_paged_kv_256_alibi_qk_tanh_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_qkv_32_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_qkv_40_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_qkv_48_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_qkv_64_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_80_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_96_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_104_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_160_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_192_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_256_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_qk_tanh_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_256_qk_tanh_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_q_kv_32_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_q_kv_64_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_kv_128_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_kv_128_qk_tanh_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_q_paged_kv_32_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_q_paged_kv_40_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_q_paged_kv_48_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_q_paged_kv_64_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_80_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_96_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_104_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_160_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_192_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_256_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_qk_tanh_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_256_qk_tanh_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_qkv_32_alibi_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_qkv_40_alibi_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_qkv_48_alibi_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_qkv_64_alibi_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_80_alibi_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_96_alibi_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_104_alibi_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_alibi_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_160_alibi_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_192_alibi_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_256_alibi_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_alibi_qk_tanh_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_256_alibi_qk_tanh_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_q_paged_kv_32_alibi_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_q_paged_kv_40_alibi_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_q_paged_kv_48_alibi_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_q_paged_kv_64_alibi_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_80_alibi_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_96_alibi_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_104_alibi_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_alibi_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_160_alibi_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_192_alibi_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_256_alibi_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_alibi_qk_tanh_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_256_alibi_qk_tanh_tma_ws_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_16_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_32_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_40_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_48_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_64_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_80_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_96_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_104_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_128_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_160_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_192_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_256_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_16_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_32_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_40_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_48_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_64_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_80_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_96_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_104_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_128_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_160_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_192_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_256_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_128_qk_tanh_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_256_qk_tanh_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_128_qk_tanh_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_256_qk_tanh_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_16_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_32_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_40_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_48_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_64_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_80_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_96_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_104_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_128_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_160_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_192_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_256_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_16_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_32_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_40_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_48_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_64_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_80_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_96_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_104_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_128_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_160_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_192_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_256_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_128_qk_tanh_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_256_qk_tanh_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_128_qk_tanh_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_256_qk_tanh_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_16_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_32_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_40_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_48_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_64_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_80_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_96_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_104_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_160_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_192_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_256_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_16_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_32_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_40_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_48_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_64_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_80_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_96_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_104_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_128_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_160_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_192_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_256_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_qk_tanh_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_256_qk_tanh_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_128_qk_tanh_sm90_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_256_qk_tanh_sm90_cu_cubin_len;
#endif

#ifndef EXCLUDE_SM_89
extern uint32_t cubin_fmha_v2_flash_attention_e4m3_fp32_128_128_S_qkv_32_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_e4m3_fp32_128_128_S_q_kv_32_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_e4m3_fp32_128_128_S_q_paged_kv_32_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_e4m3_fp32_128_128_S_qkv_40_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_e4m3_fp32_128_128_S_q_paged_kv_40_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_e4m3_fp32_128_128_S_qkv_48_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_e4m3_fp32_128_128_S_q_paged_kv_48_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_e4m3_fp32_128_128_S_qkv_64_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_e4m3_fp32_128_128_S_q_kv_64_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_e4m3_fp32_128_128_S_q_paged_kv_64_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_qkv_80_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_q_paged_kv_80_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_qkv_96_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_q_paged_kv_96_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_qkv_104_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_q_paged_kv_104_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_qkv_128_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_q_kv_128_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_q_paged_kv_128_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_qkv_160_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_q_paged_kv_160_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_qkv_192_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_q_paged_kv_192_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_qkv_256_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_q_paged_kv_256_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_16_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_32_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_40_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_48_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_64_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_80_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_96_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_104_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_128_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_160_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_192_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_256_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_16_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_32_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_40_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_48_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_64_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_80_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_96_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_104_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_128_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_160_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_192_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_256_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_128_qk_tanh_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_256_qk_tanh_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_128_qk_tanh_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_256_qk_tanh_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_128_128_S_q_kv_32_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_128_128_S_q_kv_64_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_128_S_q_kv_128_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_64_S_q_kv_32_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_32_S_q_kv_64_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_32_S_q_kv_128_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_128_S_q_kv_128_qk_tanh_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_32_S_q_kv_128_qk_tanh_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_16_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_32_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_40_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_48_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_64_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_80_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_96_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_104_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_160_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_192_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_256_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_16_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_32_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_40_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_48_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_64_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_80_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_96_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_104_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_128_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_160_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_192_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_256_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_qk_tanh_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_256_qk_tanh_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_128_qk_tanh_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_256_qk_tanh_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_16_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_32_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_40_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_48_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_64_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_80_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_96_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_104_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_128_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_160_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_192_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_256_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_16_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_32_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_40_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_48_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_64_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_80_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_96_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_104_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_128_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_160_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_192_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_256_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_128_qk_tanh_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_256_qk_tanh_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_128_qk_tanh_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_256_qk_tanh_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_128_128_S_q_kv_32_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_128_128_S_q_kv_64_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_128_S_q_kv_128_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_64_S_q_kv_32_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_32_S_q_kv_64_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_32_S_q_kv_128_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_128_S_q_kv_128_qk_tanh_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_32_S_q_kv_128_qk_tanh_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_16_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_32_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_40_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_48_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_64_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_80_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_96_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_104_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_160_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_192_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_256_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_16_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_32_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_40_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_48_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_64_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_80_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_96_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_104_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_128_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_160_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_192_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_256_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_qk_tanh_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_256_qk_tanh_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_128_qk_tanh_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_256_qk_tanh_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_16_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_32_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_40_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_48_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_64_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_80_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_96_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_104_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_160_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_192_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_256_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_16_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_32_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_40_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_48_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_64_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_80_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_96_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_104_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_128_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_160_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_192_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_256_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_qk_tanh_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_256_qk_tanh_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_128_qk_tanh_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_256_qk_tanh_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_kv_32_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_kv_64_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_kv_128_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_kv_32_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_kv_64_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_kv_128_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_kv_128_qk_tanh_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_kv_128_qk_tanh_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_16_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_32_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_40_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_48_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_64_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_80_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_96_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_104_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_160_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_192_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_256_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_16_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_32_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_40_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_48_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_64_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_80_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_96_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_104_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_128_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_160_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_192_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_256_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_qk_tanh_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_256_qk_tanh_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_128_qk_tanh_sm89_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_256_qk_tanh_sm89_cu_cubin_len;
#endif

#ifndef EXCLUDE_SM_80
extern uint32_t cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_16_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_32_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_40_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_48_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_64_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_80_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_96_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_104_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_128_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_160_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_192_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_256_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_16_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_32_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_40_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_48_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_64_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_80_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_96_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_104_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_128_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_160_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_192_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_256_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_128_qk_tanh_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_256_qk_tanh_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_128_qk_tanh_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_256_qk_tanh_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_128_128_S_q_kv_32_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_128_128_S_q_kv_64_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_128_S_q_kv_128_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_64_S_q_kv_32_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_32_S_q_kv_64_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_32_S_q_kv_128_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_128_S_q_kv_128_qk_tanh_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_32_S_q_kv_128_qk_tanh_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_16_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_32_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_40_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_48_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_64_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_80_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_96_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_104_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_160_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_192_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_256_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_16_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_32_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_40_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_48_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_64_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_80_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_96_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_104_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_128_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_160_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_192_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_256_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_qk_tanh_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_256_qk_tanh_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_128_qk_tanh_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_256_qk_tanh_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_16_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_32_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_40_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_48_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_64_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_80_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_96_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_104_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_128_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_160_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_192_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_256_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_16_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_32_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_40_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_48_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_64_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_80_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_96_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_104_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_128_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_160_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_192_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_256_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_128_qk_tanh_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_256_qk_tanh_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_128_qk_tanh_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_256_qk_tanh_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_128_128_S_q_kv_32_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_128_128_S_q_kv_64_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_128_S_q_kv_128_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_64_S_q_kv_32_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_32_S_q_kv_64_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_32_S_q_kv_128_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_128_S_q_kv_128_qk_tanh_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_32_S_q_kv_128_qk_tanh_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_16_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_32_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_40_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_48_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_64_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_80_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_96_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_104_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_160_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_192_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_256_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_16_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_32_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_40_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_48_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_64_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_80_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_96_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_104_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_128_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_160_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_192_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_256_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_qk_tanh_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_256_qk_tanh_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_128_qk_tanh_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_256_qk_tanh_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_16_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_32_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_40_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_48_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_64_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_80_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_96_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_104_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_160_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_192_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_256_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_16_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_32_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_40_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_48_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_64_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_80_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_96_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_104_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_128_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_160_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_192_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_256_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_qk_tanh_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_256_qk_tanh_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_128_qk_tanh_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_256_qk_tanh_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_kv_32_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_kv_64_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_kv_128_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_kv_32_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_kv_64_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_kv_128_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_kv_128_qk_tanh_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_kv_128_qk_tanh_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_16_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_32_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_40_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_48_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_64_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_80_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_96_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_104_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_160_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_192_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_256_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_16_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_32_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_40_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_48_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_64_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_80_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_96_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_104_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_128_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_160_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_192_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_256_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_qk_tanh_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_256_qk_tanh_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_128_qk_tanh_sm80_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_256_qk_tanh_sm80_cu_cubin_len;
#endif

#ifndef EXCLUDE_SM_86
extern uint32_t cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_16_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_32_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_40_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_48_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_64_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_80_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_96_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_104_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_128_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_160_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_192_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_256_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_16_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_32_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_40_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_48_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_64_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_80_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_96_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_104_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_128_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_160_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_192_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_256_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_128_qk_tanh_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_256_qk_tanh_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_128_qk_tanh_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_256_qk_tanh_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_128_128_S_q_kv_32_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_128_128_S_q_kv_64_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_128_S_q_kv_128_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_64_S_q_kv_32_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_32_S_q_kv_64_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_32_S_q_kv_128_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_128_S_q_kv_128_qk_tanh_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_32_S_q_kv_128_qk_tanh_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_16_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_32_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_40_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_48_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_64_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_80_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_96_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_104_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_160_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_192_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_256_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_16_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_32_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_40_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_48_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_64_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_80_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_96_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_104_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_128_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_160_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_192_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_256_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_qk_tanh_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_256_qk_tanh_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_128_qk_tanh_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_256_qk_tanh_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_16_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_32_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_40_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_48_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_64_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_80_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_96_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_104_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_128_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_160_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_192_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_256_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_16_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_32_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_40_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_48_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_64_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_80_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_96_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_104_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_128_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_160_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_192_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_256_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_128_qk_tanh_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_256_qk_tanh_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_128_qk_tanh_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_256_qk_tanh_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_128_128_S_q_kv_32_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_128_128_S_q_kv_64_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_128_S_q_kv_128_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_64_S_q_kv_32_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_32_S_q_kv_64_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_32_S_q_kv_128_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_128_S_q_kv_128_qk_tanh_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_32_S_q_kv_128_qk_tanh_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_16_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_32_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_40_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_48_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_64_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_80_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_96_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_104_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_160_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_192_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_256_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_16_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_32_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_40_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_48_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_64_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_80_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_96_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_104_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_128_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_160_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_192_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_256_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_qk_tanh_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_256_qk_tanh_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_128_qk_tanh_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_256_qk_tanh_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_16_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_32_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_40_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_48_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_64_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_80_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_96_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_104_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_160_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_192_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_256_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_16_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_32_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_40_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_48_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_64_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_80_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_96_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_104_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_128_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_160_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_192_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_256_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_qk_tanh_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_256_qk_tanh_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_128_qk_tanh_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_256_qk_tanh_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_kv_32_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_kv_64_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_kv_128_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_kv_32_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_kv_64_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_kv_128_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_kv_128_qk_tanh_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_kv_128_qk_tanh_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_16_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_32_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_40_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_48_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_64_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_80_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_96_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_104_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_160_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_192_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_256_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_16_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_32_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_40_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_48_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_64_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_80_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_96_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_104_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_128_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_160_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_192_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_256_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_qk_tanh_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_256_qk_tanh_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_128_qk_tanh_sm86_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_256_qk_tanh_sm86_cu_cubin_len;
#endif

#ifndef EXCLUDE_SM_70
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_32_sm70_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_40_sm70_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_48_sm70_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_64_sm70_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_80_sm70_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_96_sm70_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_104_sm70_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_128_sm70_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_160_sm70_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_192_sm70_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_256_sm70_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_128_qk_tanh_sm70_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_256_qk_tanh_sm70_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_64_S_q_kv_32_sm70_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_32_S_q_kv_64_sm70_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_16_S_q_kv_128_sm70_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_16_S_q_kv_128_qk_tanh_sm70_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_32_sm70_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_40_sm70_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_48_sm70_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_64_sm70_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_80_sm70_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_96_sm70_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_104_sm70_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_128_sm70_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_160_sm70_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_192_sm70_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_256_sm70_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_128_qk_tanh_sm70_cu_cubin_len;
extern uint32_t cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_256_qk_tanh_sm70_cu_cubin_len;
#endif


static const struct FusedMultiHeadAttentionKernelMetaInfoV2
{
    Data_type mDataType;
    unsigned int mS;
    unsigned int mStepQ;
    unsigned int mStepKV;
    unsigned int mD;
    unsigned int mSM;
    const unsigned char* mCubin;
    unsigned int mCubinSize;
    const char* mFuncName;
    unsigned int mSharedMemBytes;
    unsigned int mThreadsPerCTA;
    unsigned int mUnrollStep;
    int mAttentionMaskType;
    int mAttentionInputLayout;
    bool mInterleaved;
    bool mFlashAttention;
    bool mWarpSpecialization;
    bool mFP32Accumulation;
    bool mAlibiSupported;
    bool mTiled;
    bool mEnableQKTanhScale;
} sMhaKernelMetaInfosV2[] = {
#ifndef EXCLUDE_SM_90
{ DATA_TYPE_FP16, 64, 64, 64, 32, kSM_90,  cubin_fmha_v2_fp16_64_32_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_64_32_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_64_32_ldgsts_sm90_kernel", 17408, 128, 0, 0, 0, false, false, false, false, true, false, false},
{ DATA_TYPE_FP16, 64, 64, 64, 32, kSM_90,  cubin_fmha_v2_fp16_64_32_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_64_32_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_64_32_sliding_window_causal_ldgsts_sm90_kernel", 17408, 128, 0, 2, 0, false, false, false, false, true, false, false},
{ DATA_TYPE_FP16, 64, 64, 64, 32, kSM_90,  cubin_fmha_v2_fp16_64_32_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_64_32_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_64_32_causal_ldgsts_sm90_kernel", 17408, 128, 0, 1, 0, false, false, false, false, true, false, false},
{ DATA_TYPE_FP16, 64, 64, 64, 32, kSM_90,  cubin_fmha_v2_fp16_64_32_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_64_32_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_64_32_ldgsts_sm90_kernel_nl", 17408, 128, 64, 0, 0, false, false, false, false, true, false, false},
{ DATA_TYPE_FP16, 64, 64, 64, 32, kSM_90,  cubin_fmha_v2_fp16_64_32_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_64_32_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_64_32_causal_ldgsts_sm90_kernel_nl", 17408, 128, 64, 1, 0, false, false, false, false, true, false, false},
{ DATA_TYPE_FP16, 64, 64, 64, 32, kSM_90,  cubin_fmha_v2_fp16_64_32_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_64_32_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_64_32_sliding_window_causal_ldgsts_sm90_kernel_nl", 17408, 128, 64, 2, 0, false, false, false, false, true, false, false},
{ DATA_TYPE_FP16, 128, 64, 128, 32, kSM_90,  cubin_fmha_v2_fp16_128_32_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_128_32_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_128_32_ldgsts_sm90_kernel", 25600, 128, 0, 0, 0, false, false, false, false, true, false, false},
{ DATA_TYPE_FP16, 128, 64, 128, 32, kSM_90,  cubin_fmha_v2_fp16_128_32_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_128_32_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_128_32_sliding_window_causal_ldgsts_sm90_kernel", 25600, 128, 0, 2, 0, false, false, false, false, true, false, false},
{ DATA_TYPE_FP16, 128, 64, 128, 32, kSM_90,  cubin_fmha_v2_fp16_128_32_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_128_32_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_128_32_causal_ldgsts_sm90_kernel", 25600, 128, 0, 1, 0, false, false, false, false, true, false, false},
{ DATA_TYPE_FP16, 128, 64, 128, 32, kSM_90,  cubin_fmha_v2_fp16_128_32_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_128_32_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_128_32_ldgsts_sm90_kernel_nl", 25600, 128, 64, 0, 0, false, false, false, false, true, false, false},
{ DATA_TYPE_FP16, 128, 64, 128, 32, kSM_90,  cubin_fmha_v2_fp16_128_32_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_128_32_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_128_32_causal_ldgsts_sm90_kernel_nl", 25600, 128, 64, 1, 0, false, false, false, false, true, false, false},
{ DATA_TYPE_FP16, 128, 64, 128, 32, kSM_90,  cubin_fmha_v2_fp16_128_32_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_128_32_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_128_32_sliding_window_causal_ldgsts_sm90_kernel_nl", 25600, 128, 64, 2, 0, false, false, false, false, true, false, false},
{ DATA_TYPE_FP16, 256, 64, 256, 32, kSM_90,  cubin_fmha_v2_fp16_256_32_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_256_32_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_256_32_ldgsts_sm90_kernel", 41984, 128, 0, 0, 0, false, false, false, false, true, false, false},
{ DATA_TYPE_FP16, 256, 64, 256, 32, kSM_90,  cubin_fmha_v2_fp16_256_32_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_256_32_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_256_32_sliding_window_causal_ldgsts_sm90_kernel", 41984, 128, 0, 2, 0, false, false, false, false, true, false, false},
{ DATA_TYPE_FP16, 256, 64, 256, 32, kSM_90,  cubin_fmha_v2_fp16_256_32_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_256_32_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_256_32_causal_ldgsts_sm90_kernel", 41984, 128, 0, 1, 0, false, false, false, false, true, false, false},
{ DATA_TYPE_FP16, 256, 64, 256, 32, kSM_90,  cubin_fmha_v2_fp16_256_32_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_256_32_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_256_32_ldgsts_sm90_kernel_nl", 41984, 128, 64, 0, 0, false, false, false, false, true, false, false},
{ DATA_TYPE_FP16, 256, 64, 256, 32, kSM_90,  cubin_fmha_v2_fp16_256_32_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_256_32_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_256_32_causal_ldgsts_sm90_kernel_nl", 41984, 128, 64, 1, 0, false, false, false, false, true, false, false},
{ DATA_TYPE_FP16, 256, 64, 256, 32, kSM_90,  cubin_fmha_v2_fp16_256_32_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_256_32_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_256_32_sliding_window_causal_ldgsts_sm90_kernel_nl", 41984, 128, 64, 2, 0, false, false, false, false, true, false, false},
{ DATA_TYPE_FP16, 64, 64, 64, 64, kSM_90,  cubin_fmha_v2_fp16_64_64_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_64_64_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_64_64_ldgsts_sm90_kernel", 33792, 128, 0, 0, 0, false, false, false, false, true, false, false},
{ DATA_TYPE_FP16, 64, 64, 64, 64, kSM_90,  cubin_fmha_v2_fp16_64_64_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_64_64_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_64_64_sliding_window_causal_ldgsts_sm90_kernel", 33792, 128, 0, 2, 0, false, false, false, false, true, false, false},
{ DATA_TYPE_FP16, 64, 64, 64, 64, kSM_90,  cubin_fmha_v2_fp16_64_64_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_64_64_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_64_64_causal_ldgsts_sm90_kernel", 33792, 128, 0, 1, 0, false, false, false, false, true, false, false},
{ DATA_TYPE_FP16, 64, 64, 64, 64, kSM_90,  cubin_fmha_v2_fp16_64_64_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_64_64_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_64_64_ldgsts_sm90_kernel_nl", 33792, 128, 64, 0, 0, false, false, false, false, true, false, false},
{ DATA_TYPE_FP16, 64, 64, 64, 64, kSM_90,  cubin_fmha_v2_fp16_64_64_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_64_64_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_64_64_causal_ldgsts_sm90_kernel_nl", 33792, 128, 64, 1, 0, false, false, false, false, true, false, false},
{ DATA_TYPE_FP16, 64, 64, 64, 64, kSM_90,  cubin_fmha_v2_fp16_64_64_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_64_64_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_64_64_sliding_window_causal_ldgsts_sm90_kernel_nl", 33792, 128, 64, 2, 0, false, false, false, false, true, false, false},
{ DATA_TYPE_FP16, 128, 64, 128, 64, kSM_90,  cubin_fmha_v2_fp16_128_64_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_128_64_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_128_64_ldgsts_sm90_kernel", 50176, 128, 0, 0, 0, false, false, false, false, true, false, false},
{ DATA_TYPE_FP16, 128, 64, 128, 64, kSM_90,  cubin_fmha_v2_fp16_128_64_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_128_64_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_128_64_sliding_window_causal_ldgsts_sm90_kernel", 50176, 128, 0, 2, 0, false, false, false, false, true, false, false},
{ DATA_TYPE_FP16, 128, 64, 128, 64, kSM_90,  cubin_fmha_v2_fp16_128_64_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_128_64_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_128_64_causal_ldgsts_sm90_kernel", 50176, 128, 0, 1, 0, false, false, false, false, true, false, false},
{ DATA_TYPE_FP16, 128, 64, 128, 64, kSM_90,  cubin_fmha_v2_fp16_128_64_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_128_64_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_128_64_ldgsts_sm90_kernel_nl", 50176, 128, 64, 0, 0, false, false, false, false, true, false, false},
{ DATA_TYPE_FP16, 128, 64, 128, 64, kSM_90,  cubin_fmha_v2_fp16_128_64_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_128_64_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_128_64_causal_ldgsts_sm90_kernel_nl", 50176, 128, 64, 1, 0, false, false, false, false, true, false, false},
{ DATA_TYPE_FP16, 128, 64, 128, 64, kSM_90,  cubin_fmha_v2_fp16_128_64_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_128_64_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_128_64_sliding_window_causal_ldgsts_sm90_kernel_nl", 50176, 128, 64, 2, 0, false, false, false, false, true, false, false},
{ DATA_TYPE_FP16, 256, 64, 256, 64, kSM_90,  cubin_fmha_v2_fp16_256_64_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_256_64_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_256_64_ldgsts_sm90_kernel", 82944, 128, 0, 0, 0, false, false, false, false, true, false, false},
{ DATA_TYPE_FP16, 256, 64, 256, 64, kSM_90,  cubin_fmha_v2_fp16_256_64_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_256_64_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_256_64_sliding_window_causal_ldgsts_sm90_kernel", 82944, 128, 0, 2, 0, false, false, false, false, true, false, false},
{ DATA_TYPE_FP16, 256, 64, 256, 64, kSM_90,  cubin_fmha_v2_fp16_256_64_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_256_64_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_256_64_causal_ldgsts_sm90_kernel", 82944, 128, 0, 1, 0, false, false, false, false, true, false, false},
{ DATA_TYPE_FP16, 256, 64, 256, 64, kSM_90,  cubin_fmha_v2_fp16_256_64_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_256_64_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_256_64_ldgsts_sm90_kernel_nl", 82944, 128, 64, 0, 0, false, false, false, false, true, false, false},
{ DATA_TYPE_FP16, 256, 64, 256, 64, kSM_90,  cubin_fmha_v2_fp16_256_64_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_256_64_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_256_64_causal_ldgsts_sm90_kernel_nl", 82944, 128, 64, 1, 0, false, false, false, false, true, false, false},
{ DATA_TYPE_FP16, 256, 64, 256, 64, kSM_90,  cubin_fmha_v2_fp16_256_64_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_256_64_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_256_64_sliding_window_causal_ldgsts_sm90_kernel_nl", 82944, 128, 64, 2, 0, false, false, false, false, true, false, false},
{ DATA_TYPE_FP16, 384, 64, 384, 32, kSM_90,  cubin_fmha_v2_fp16_384_32_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_384_32_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_384_32_ldgsts_sm90_kernel", 67072, 256, 0, 0, 0, false, false, false, false, true, false, false},
{ DATA_TYPE_FP16, 384, 64, 384, 32, kSM_90,  cubin_fmha_v2_fp16_384_32_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_384_32_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_384_32_sliding_window_causal_ldgsts_sm90_kernel", 67072, 256, 0, 2, 0, false, false, false, false, true, false, false},
{ DATA_TYPE_FP16, 384, 64, 384, 32, kSM_90,  cubin_fmha_v2_fp16_384_32_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_384_32_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_384_32_causal_ldgsts_sm90_kernel", 67072, 256, 0, 1, 0, false, false, false, false, true, false, false},
{ DATA_TYPE_FP16, 384, 64, 384, 32, kSM_90,  cubin_fmha_v2_fp16_384_32_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_384_32_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_384_32_ldgsts_sm90_kernel_nl", 67072, 256, 64, 0, 0, false, false, false, false, true, false, false},
{ DATA_TYPE_FP16, 384, 64, 384, 32, kSM_90,  cubin_fmha_v2_fp16_384_32_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_384_32_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_384_32_causal_ldgsts_sm90_kernel_nl", 67072, 256, 64, 1, 0, false, false, false, false, true, false, false},
{ DATA_TYPE_FP16, 384, 64, 384, 32, kSM_90,  cubin_fmha_v2_fp16_384_32_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_384_32_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_384_32_sliding_window_causal_ldgsts_sm90_kernel_nl", 67072, 256, 64, 2, 0, false, false, false, false, true, false, false},
{ DATA_TYPE_FP16, 512, 64, 512, 32, kSM_90,  cubin_fmha_v2_fp16_512_32_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_512_32_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_512_32_ldgsts_sm90_kernel", 83456, 256, 0, 0, 0, false, false, false, false, true, false, false},
{ DATA_TYPE_FP16, 512, 64, 512, 32, kSM_90,  cubin_fmha_v2_fp16_512_32_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_512_32_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_512_32_sliding_window_causal_ldgsts_sm90_kernel", 83456, 256, 0, 2, 0, false, false, false, false, true, false, false},
{ DATA_TYPE_FP16, 512, 64, 512, 32, kSM_90,  cubin_fmha_v2_fp16_512_32_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_512_32_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_512_32_causal_ldgsts_sm90_kernel", 83456, 256, 0, 1, 0, false, false, false, false, true, false, false},
{ DATA_TYPE_FP16, 512, 64, 512, 32, kSM_90,  cubin_fmha_v2_fp16_512_32_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_512_32_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_512_32_ldgsts_sm90_kernel_nl", 83456, 256, 64, 0, 0, false, false, false, false, true, false, false},
{ DATA_TYPE_FP16, 512, 64, 512, 32, kSM_90,  cubin_fmha_v2_fp16_512_32_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_512_32_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_512_32_causal_ldgsts_sm90_kernel_nl", 83456, 256, 64, 1, 0, false, false, false, false, true, false, false},
{ DATA_TYPE_FP16, 512, 64, 512, 32, kSM_90,  cubin_fmha_v2_fp16_512_32_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_512_32_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_512_32_sliding_window_causal_ldgsts_sm90_kernel_nl", 83456, 256, 64, 2, 0, false, false, false, false, true, false, false},
{ DATA_TYPE_FP16, 384, 64, 384, 64, kSM_90,  cubin_fmha_v2_fp16_384_64_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_384_64_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_384_64_ldgsts_sm90_kernel", 132608, 256, 0, 0, 0, false, false, false, false, true, false, false},
{ DATA_TYPE_FP16, 384, 64, 384, 64, kSM_90,  cubin_fmha_v2_fp16_384_64_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_384_64_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_384_64_sliding_window_causal_ldgsts_sm90_kernel", 132608, 256, 0, 2, 0, false, false, false, false, true, false, false},
{ DATA_TYPE_FP16, 384, 64, 384, 64, kSM_90,  cubin_fmha_v2_fp16_384_64_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_384_64_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_384_64_causal_ldgsts_sm90_kernel", 132608, 256, 0, 1, 0, false, false, false, false, true, false, false},
{ DATA_TYPE_FP16, 384, 64, 384, 64, kSM_90,  cubin_fmha_v2_fp16_384_64_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_384_64_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_384_64_ldgsts_sm90_kernel_nl", 132608, 256, 64, 0, 0, false, false, false, false, true, false, false},
{ DATA_TYPE_FP16, 384, 64, 384, 64, kSM_90,  cubin_fmha_v2_fp16_384_64_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_384_64_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_384_64_causal_ldgsts_sm90_kernel_nl", 132608, 256, 64, 1, 0, false, false, false, false, true, false, false},
{ DATA_TYPE_FP16, 384, 64, 384, 64, kSM_90,  cubin_fmha_v2_fp16_384_64_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_384_64_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_384_64_sliding_window_causal_ldgsts_sm90_kernel_nl", 132608, 256, 64, 2, 0, false, false, false, false, true, false, false},
{ DATA_TYPE_FP16, 512, 64, 512, 64, kSM_90,  cubin_fmha_v2_fp16_512_64_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_512_64_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_512_64_ldgsts_sm90_kernel", 165376, 256, 0, 0, 0, false, false, false, false, true, false, false},
{ DATA_TYPE_FP16, 512, 64, 512, 64, kSM_90,  cubin_fmha_v2_fp16_512_64_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_512_64_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_512_64_sliding_window_causal_ldgsts_sm90_kernel", 165376, 256, 0, 2, 0, false, false, false, false, true, false, false},
{ DATA_TYPE_FP16, 512, 64, 512, 64, kSM_90,  cubin_fmha_v2_fp16_512_64_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_512_64_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_512_64_causal_ldgsts_sm90_kernel", 165376, 256, 0, 1, 0, false, false, false, false, true, false, false},
{ DATA_TYPE_FP16, 512, 64, 512, 64, kSM_90,  cubin_fmha_v2_fp16_512_64_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_512_64_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_512_64_ldgsts_sm90_kernel_nl", 165376, 256, 64, 0, 0, false, false, false, false, true, false, false},
{ DATA_TYPE_FP16, 512, 64, 512, 64, kSM_90,  cubin_fmha_v2_fp16_512_64_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_512_64_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_512_64_causal_ldgsts_sm90_kernel_nl", 165376, 256, 64, 1, 0, false, false, false, false, true, false, false},
{ DATA_TYPE_FP16, 512, 64, 512, 64, kSM_90,  cubin_fmha_v2_fp16_512_64_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_512_64_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_512_64_sliding_window_causal_ldgsts_sm90_kernel_nl", 165376, 256, 64, 2, 0, false, false, false, false, true, false, false},
{ DATA_TYPE_BF16, 64, 64, 64, 32, kSM_90,  cubin_fmha_v2_bf16_64_32_ldgsts_sm90_cu_cubin, cubin_fmha_v2_bf16_64_32_ldgsts_sm90_cu_cubin_len, "fmha_v2_bf16_64_32_ldgsts_sm90_kernel", 17408, 128, 0, 0, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_BF16, 64, 64, 64, 32, kSM_90,  cubin_fmha_v2_bf16_64_32_ldgsts_sm90_cu_cubin, cubin_fmha_v2_bf16_64_32_ldgsts_sm90_cu_cubin_len, "fmha_v2_bf16_64_32_sliding_window_causal_ldgsts_sm90_kernel", 17408, 128, 0, 2, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_BF16, 64, 64, 64, 32, kSM_90,  cubin_fmha_v2_bf16_64_32_ldgsts_sm90_cu_cubin, cubin_fmha_v2_bf16_64_32_ldgsts_sm90_cu_cubin_len, "fmha_v2_bf16_64_32_causal_ldgsts_sm90_kernel", 17408, 128, 0, 1, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_BF16, 64, 64, 64, 32, kSM_90,  cubin_fmha_v2_bf16_64_32_ldgsts_sm90_cu_cubin, cubin_fmha_v2_bf16_64_32_ldgsts_sm90_cu_cubin_len, "fmha_v2_bf16_64_32_ldgsts_sm90_kernel_nl", 17408, 128, 64, 0, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_BF16, 64, 64, 64, 32, kSM_90,  cubin_fmha_v2_bf16_64_32_ldgsts_sm90_cu_cubin, cubin_fmha_v2_bf16_64_32_ldgsts_sm90_cu_cubin_len, "fmha_v2_bf16_64_32_causal_ldgsts_sm90_kernel_nl", 17408, 128, 64, 1, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_BF16, 64, 64, 64, 32, kSM_90,  cubin_fmha_v2_bf16_64_32_ldgsts_sm90_cu_cubin, cubin_fmha_v2_bf16_64_32_ldgsts_sm90_cu_cubin_len, "fmha_v2_bf16_64_32_sliding_window_causal_ldgsts_sm90_kernel_nl", 17408, 128, 64, 2, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_BF16, 128, 64, 128, 32, kSM_90,  cubin_fmha_v2_bf16_128_32_ldgsts_sm90_cu_cubin, cubin_fmha_v2_bf16_128_32_ldgsts_sm90_cu_cubin_len, "fmha_v2_bf16_128_32_ldgsts_sm90_kernel", 25600, 128, 0, 0, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_BF16, 128, 64, 128, 32, kSM_90,  cubin_fmha_v2_bf16_128_32_ldgsts_sm90_cu_cubin, cubin_fmha_v2_bf16_128_32_ldgsts_sm90_cu_cubin_len, "fmha_v2_bf16_128_32_sliding_window_causal_ldgsts_sm90_kernel", 25600, 128, 0, 2, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_BF16, 128, 64, 128, 32, kSM_90,  cubin_fmha_v2_bf16_128_32_ldgsts_sm90_cu_cubin, cubin_fmha_v2_bf16_128_32_ldgsts_sm90_cu_cubin_len, "fmha_v2_bf16_128_32_causal_ldgsts_sm90_kernel", 25600, 128, 0, 1, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_BF16, 128, 64, 128, 32, kSM_90,  cubin_fmha_v2_bf16_128_32_ldgsts_sm90_cu_cubin, cubin_fmha_v2_bf16_128_32_ldgsts_sm90_cu_cubin_len, "fmha_v2_bf16_128_32_ldgsts_sm90_kernel_nl", 25600, 128, 64, 0, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_BF16, 128, 64, 128, 32, kSM_90,  cubin_fmha_v2_bf16_128_32_ldgsts_sm90_cu_cubin, cubin_fmha_v2_bf16_128_32_ldgsts_sm90_cu_cubin_len, "fmha_v2_bf16_128_32_causal_ldgsts_sm90_kernel_nl", 25600, 128, 64, 1, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_BF16, 128, 64, 128, 32, kSM_90,  cubin_fmha_v2_bf16_128_32_ldgsts_sm90_cu_cubin, cubin_fmha_v2_bf16_128_32_ldgsts_sm90_cu_cubin_len, "fmha_v2_bf16_128_32_sliding_window_causal_ldgsts_sm90_kernel_nl", 25600, 128, 64, 2, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_BF16, 256, 64, 256, 32, kSM_90,  cubin_fmha_v2_bf16_256_32_ldgsts_sm90_cu_cubin, cubin_fmha_v2_bf16_256_32_ldgsts_sm90_cu_cubin_len, "fmha_v2_bf16_256_32_ldgsts_sm90_kernel", 41984, 128, 0, 0, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_BF16, 256, 64, 256, 32, kSM_90,  cubin_fmha_v2_bf16_256_32_ldgsts_sm90_cu_cubin, cubin_fmha_v2_bf16_256_32_ldgsts_sm90_cu_cubin_len, "fmha_v2_bf16_256_32_sliding_window_causal_ldgsts_sm90_kernel", 41984, 128, 0, 2, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_BF16, 256, 64, 256, 32, kSM_90,  cubin_fmha_v2_bf16_256_32_ldgsts_sm90_cu_cubin, cubin_fmha_v2_bf16_256_32_ldgsts_sm90_cu_cubin_len, "fmha_v2_bf16_256_32_causal_ldgsts_sm90_kernel", 41984, 128, 0, 1, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_BF16, 256, 64, 256, 32, kSM_90,  cubin_fmha_v2_bf16_256_32_ldgsts_sm90_cu_cubin, cubin_fmha_v2_bf16_256_32_ldgsts_sm90_cu_cubin_len, "fmha_v2_bf16_256_32_ldgsts_sm90_kernel_nl", 41984, 128, 64, 0, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_BF16, 256, 64, 256, 32, kSM_90,  cubin_fmha_v2_bf16_256_32_ldgsts_sm90_cu_cubin, cubin_fmha_v2_bf16_256_32_ldgsts_sm90_cu_cubin_len, "fmha_v2_bf16_256_32_causal_ldgsts_sm90_kernel_nl", 41984, 128, 64, 1, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_BF16, 256, 64, 256, 32, kSM_90,  cubin_fmha_v2_bf16_256_32_ldgsts_sm90_cu_cubin, cubin_fmha_v2_bf16_256_32_ldgsts_sm90_cu_cubin_len, "fmha_v2_bf16_256_32_sliding_window_causal_ldgsts_sm90_kernel_nl", 41984, 128, 64, 2, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_BF16, 64, 64, 64, 64, kSM_90,  cubin_fmha_v2_bf16_64_64_ldgsts_sm90_cu_cubin, cubin_fmha_v2_bf16_64_64_ldgsts_sm90_cu_cubin_len, "fmha_v2_bf16_64_64_ldgsts_sm90_kernel", 33792, 128, 0, 0, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_BF16, 64, 64, 64, 64, kSM_90,  cubin_fmha_v2_bf16_64_64_ldgsts_sm90_cu_cubin, cubin_fmha_v2_bf16_64_64_ldgsts_sm90_cu_cubin_len, "fmha_v2_bf16_64_64_sliding_window_causal_ldgsts_sm90_kernel", 33792, 128, 0, 2, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_BF16, 64, 64, 64, 64, kSM_90,  cubin_fmha_v2_bf16_64_64_ldgsts_sm90_cu_cubin, cubin_fmha_v2_bf16_64_64_ldgsts_sm90_cu_cubin_len, "fmha_v2_bf16_64_64_causal_ldgsts_sm90_kernel", 33792, 128, 0, 1, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_BF16, 64, 64, 64, 64, kSM_90,  cubin_fmha_v2_bf16_64_64_ldgsts_sm90_cu_cubin, cubin_fmha_v2_bf16_64_64_ldgsts_sm90_cu_cubin_len, "fmha_v2_bf16_64_64_ldgsts_sm90_kernel_nl", 33792, 128, 64, 0, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_BF16, 64, 64, 64, 64, kSM_90,  cubin_fmha_v2_bf16_64_64_ldgsts_sm90_cu_cubin, cubin_fmha_v2_bf16_64_64_ldgsts_sm90_cu_cubin_len, "fmha_v2_bf16_64_64_causal_ldgsts_sm90_kernel_nl", 33792, 128, 64, 1, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_BF16, 64, 64, 64, 64, kSM_90,  cubin_fmha_v2_bf16_64_64_ldgsts_sm90_cu_cubin, cubin_fmha_v2_bf16_64_64_ldgsts_sm90_cu_cubin_len, "fmha_v2_bf16_64_64_sliding_window_causal_ldgsts_sm90_kernel_nl", 33792, 128, 64, 2, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_BF16, 128, 64, 128, 64, kSM_90,  cubin_fmha_v2_bf16_128_64_ldgsts_sm90_cu_cubin, cubin_fmha_v2_bf16_128_64_ldgsts_sm90_cu_cubin_len, "fmha_v2_bf16_128_64_ldgsts_sm90_kernel", 50176, 128, 0, 0, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_BF16, 128, 64, 128, 64, kSM_90,  cubin_fmha_v2_bf16_128_64_ldgsts_sm90_cu_cubin, cubin_fmha_v2_bf16_128_64_ldgsts_sm90_cu_cubin_len, "fmha_v2_bf16_128_64_sliding_window_causal_ldgsts_sm90_kernel", 50176, 128, 0, 2, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_BF16, 128, 64, 128, 64, kSM_90,  cubin_fmha_v2_bf16_128_64_ldgsts_sm90_cu_cubin, cubin_fmha_v2_bf16_128_64_ldgsts_sm90_cu_cubin_len, "fmha_v2_bf16_128_64_causal_ldgsts_sm90_kernel", 50176, 128, 0, 1, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_BF16, 128, 64, 128, 64, kSM_90,  cubin_fmha_v2_bf16_128_64_ldgsts_sm90_cu_cubin, cubin_fmha_v2_bf16_128_64_ldgsts_sm90_cu_cubin_len, "fmha_v2_bf16_128_64_ldgsts_sm90_kernel_nl", 50176, 128, 64, 0, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_BF16, 128, 64, 128, 64, kSM_90,  cubin_fmha_v2_bf16_128_64_ldgsts_sm90_cu_cubin, cubin_fmha_v2_bf16_128_64_ldgsts_sm90_cu_cubin_len, "fmha_v2_bf16_128_64_causal_ldgsts_sm90_kernel_nl", 50176, 128, 64, 1, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_BF16, 128, 64, 128, 64, kSM_90,  cubin_fmha_v2_bf16_128_64_ldgsts_sm90_cu_cubin, cubin_fmha_v2_bf16_128_64_ldgsts_sm90_cu_cubin_len, "fmha_v2_bf16_128_64_sliding_window_causal_ldgsts_sm90_kernel_nl", 50176, 128, 64, 2, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_BF16, 256, 64, 256, 64, kSM_90,  cubin_fmha_v2_bf16_256_64_ldgsts_sm90_cu_cubin, cubin_fmha_v2_bf16_256_64_ldgsts_sm90_cu_cubin_len, "fmha_v2_bf16_256_64_ldgsts_sm90_kernel", 82944, 128, 0, 0, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_BF16, 256, 64, 256, 64, kSM_90,  cubin_fmha_v2_bf16_256_64_ldgsts_sm90_cu_cubin, cubin_fmha_v2_bf16_256_64_ldgsts_sm90_cu_cubin_len, "fmha_v2_bf16_256_64_sliding_window_causal_ldgsts_sm90_kernel", 82944, 128, 0, 2, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_BF16, 256, 64, 256, 64, kSM_90,  cubin_fmha_v2_bf16_256_64_ldgsts_sm90_cu_cubin, cubin_fmha_v2_bf16_256_64_ldgsts_sm90_cu_cubin_len, "fmha_v2_bf16_256_64_causal_ldgsts_sm90_kernel", 82944, 128, 0, 1, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_BF16, 256, 64, 256, 64, kSM_90,  cubin_fmha_v2_bf16_256_64_ldgsts_sm90_cu_cubin, cubin_fmha_v2_bf16_256_64_ldgsts_sm90_cu_cubin_len, "fmha_v2_bf16_256_64_ldgsts_sm90_kernel_nl", 82944, 128, 64, 0, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_BF16, 256, 64, 256, 64, kSM_90,  cubin_fmha_v2_bf16_256_64_ldgsts_sm90_cu_cubin, cubin_fmha_v2_bf16_256_64_ldgsts_sm90_cu_cubin_len, "fmha_v2_bf16_256_64_causal_ldgsts_sm90_kernel_nl", 82944, 128, 64, 1, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_BF16, 256, 64, 256, 64, kSM_90,  cubin_fmha_v2_bf16_256_64_ldgsts_sm90_cu_cubin, cubin_fmha_v2_bf16_256_64_ldgsts_sm90_cu_cubin_len, "fmha_v2_bf16_256_64_sliding_window_causal_ldgsts_sm90_kernel_nl", 82944, 128, 64, 2, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_BF16, 384, 64, 384, 32, kSM_90,  cubin_fmha_v2_bf16_384_32_ldgsts_sm90_cu_cubin, cubin_fmha_v2_bf16_384_32_ldgsts_sm90_cu_cubin_len, "fmha_v2_bf16_384_32_ldgsts_sm90_kernel", 67072, 256, 0, 0, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_BF16, 384, 64, 384, 32, kSM_90,  cubin_fmha_v2_bf16_384_32_ldgsts_sm90_cu_cubin, cubin_fmha_v2_bf16_384_32_ldgsts_sm90_cu_cubin_len, "fmha_v2_bf16_384_32_sliding_window_causal_ldgsts_sm90_kernel", 67072, 256, 0, 2, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_BF16, 384, 64, 384, 32, kSM_90,  cubin_fmha_v2_bf16_384_32_ldgsts_sm90_cu_cubin, cubin_fmha_v2_bf16_384_32_ldgsts_sm90_cu_cubin_len, "fmha_v2_bf16_384_32_causal_ldgsts_sm90_kernel", 67072, 256, 0, 1, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_BF16, 384, 64, 384, 32, kSM_90,  cubin_fmha_v2_bf16_384_32_ldgsts_sm90_cu_cubin, cubin_fmha_v2_bf16_384_32_ldgsts_sm90_cu_cubin_len, "fmha_v2_bf16_384_32_ldgsts_sm90_kernel_nl", 67072, 256, 64, 0, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_BF16, 384, 64, 384, 32, kSM_90,  cubin_fmha_v2_bf16_384_32_ldgsts_sm90_cu_cubin, cubin_fmha_v2_bf16_384_32_ldgsts_sm90_cu_cubin_len, "fmha_v2_bf16_384_32_causal_ldgsts_sm90_kernel_nl", 67072, 256, 64, 1, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_BF16, 384, 64, 384, 32, kSM_90,  cubin_fmha_v2_bf16_384_32_ldgsts_sm90_cu_cubin, cubin_fmha_v2_bf16_384_32_ldgsts_sm90_cu_cubin_len, "fmha_v2_bf16_384_32_sliding_window_causal_ldgsts_sm90_kernel_nl", 67072, 256, 64, 2, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_BF16, 512, 64, 512, 32, kSM_90,  cubin_fmha_v2_bf16_512_32_ldgsts_sm90_cu_cubin, cubin_fmha_v2_bf16_512_32_ldgsts_sm90_cu_cubin_len, "fmha_v2_bf16_512_32_ldgsts_sm90_kernel", 83456, 256, 0, 0, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_BF16, 512, 64, 512, 32, kSM_90,  cubin_fmha_v2_bf16_512_32_ldgsts_sm90_cu_cubin, cubin_fmha_v2_bf16_512_32_ldgsts_sm90_cu_cubin_len, "fmha_v2_bf16_512_32_sliding_window_causal_ldgsts_sm90_kernel", 83456, 256, 0, 2, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_BF16, 512, 64, 512, 32, kSM_90,  cubin_fmha_v2_bf16_512_32_ldgsts_sm90_cu_cubin, cubin_fmha_v2_bf16_512_32_ldgsts_sm90_cu_cubin_len, "fmha_v2_bf16_512_32_causal_ldgsts_sm90_kernel", 83456, 256, 0, 1, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_BF16, 512, 64, 512, 32, kSM_90,  cubin_fmha_v2_bf16_512_32_ldgsts_sm90_cu_cubin, cubin_fmha_v2_bf16_512_32_ldgsts_sm90_cu_cubin_len, "fmha_v2_bf16_512_32_ldgsts_sm90_kernel_nl", 83456, 256, 64, 0, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_BF16, 512, 64, 512, 32, kSM_90,  cubin_fmha_v2_bf16_512_32_ldgsts_sm90_cu_cubin, cubin_fmha_v2_bf16_512_32_ldgsts_sm90_cu_cubin_len, "fmha_v2_bf16_512_32_causal_ldgsts_sm90_kernel_nl", 83456, 256, 64, 1, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_BF16, 512, 64, 512, 32, kSM_90,  cubin_fmha_v2_bf16_512_32_ldgsts_sm90_cu_cubin, cubin_fmha_v2_bf16_512_32_ldgsts_sm90_cu_cubin_len, "fmha_v2_bf16_512_32_sliding_window_causal_ldgsts_sm90_kernel_nl", 83456, 256, 64, 2, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_BF16, 384, 64, 384, 64, kSM_90,  cubin_fmha_v2_bf16_384_64_ldgsts_sm90_cu_cubin, cubin_fmha_v2_bf16_384_64_ldgsts_sm90_cu_cubin_len, "fmha_v2_bf16_384_64_ldgsts_sm90_kernel", 132608, 256, 0, 0, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_BF16, 384, 64, 384, 64, kSM_90,  cubin_fmha_v2_bf16_384_64_ldgsts_sm90_cu_cubin, cubin_fmha_v2_bf16_384_64_ldgsts_sm90_cu_cubin_len, "fmha_v2_bf16_384_64_sliding_window_causal_ldgsts_sm90_kernel", 132608, 256, 0, 2, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_BF16, 384, 64, 384, 64, kSM_90,  cubin_fmha_v2_bf16_384_64_ldgsts_sm90_cu_cubin, cubin_fmha_v2_bf16_384_64_ldgsts_sm90_cu_cubin_len, "fmha_v2_bf16_384_64_causal_ldgsts_sm90_kernel", 132608, 256, 0, 1, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_BF16, 384, 64, 384, 64, kSM_90,  cubin_fmha_v2_bf16_384_64_ldgsts_sm90_cu_cubin, cubin_fmha_v2_bf16_384_64_ldgsts_sm90_cu_cubin_len, "fmha_v2_bf16_384_64_ldgsts_sm90_kernel_nl", 132608, 256, 64, 0, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_BF16, 384, 64, 384, 64, kSM_90,  cubin_fmha_v2_bf16_384_64_ldgsts_sm90_cu_cubin, cubin_fmha_v2_bf16_384_64_ldgsts_sm90_cu_cubin_len, "fmha_v2_bf16_384_64_causal_ldgsts_sm90_kernel_nl", 132608, 256, 64, 1, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_BF16, 384, 64, 384, 64, kSM_90,  cubin_fmha_v2_bf16_384_64_ldgsts_sm90_cu_cubin, cubin_fmha_v2_bf16_384_64_ldgsts_sm90_cu_cubin_len, "fmha_v2_bf16_384_64_sliding_window_causal_ldgsts_sm90_kernel_nl", 132608, 256, 64, 2, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_BF16, 512, 64, 512, 64, kSM_90,  cubin_fmha_v2_bf16_512_64_ldgsts_sm90_cu_cubin, cubin_fmha_v2_bf16_512_64_ldgsts_sm90_cu_cubin_len, "fmha_v2_bf16_512_64_ldgsts_sm90_kernel", 165376, 256, 0, 0, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_BF16, 512, 64, 512, 64, kSM_90,  cubin_fmha_v2_bf16_512_64_ldgsts_sm90_cu_cubin, cubin_fmha_v2_bf16_512_64_ldgsts_sm90_cu_cubin_len, "fmha_v2_bf16_512_64_sliding_window_causal_ldgsts_sm90_kernel", 165376, 256, 0, 2, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_BF16, 512, 64, 512, 64, kSM_90,  cubin_fmha_v2_bf16_512_64_ldgsts_sm90_cu_cubin, cubin_fmha_v2_bf16_512_64_ldgsts_sm90_cu_cubin_len, "fmha_v2_bf16_512_64_causal_ldgsts_sm90_kernel", 165376, 256, 0, 1, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_BF16, 512, 64, 512, 64, kSM_90,  cubin_fmha_v2_bf16_512_64_ldgsts_sm90_cu_cubin, cubin_fmha_v2_bf16_512_64_ldgsts_sm90_cu_cubin_len, "fmha_v2_bf16_512_64_ldgsts_sm90_kernel_nl", 165376, 256, 64, 0, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_BF16, 512, 64, 512, 64, kSM_90,  cubin_fmha_v2_bf16_512_64_ldgsts_sm90_cu_cubin, cubin_fmha_v2_bf16_512_64_ldgsts_sm90_cu_cubin_len, "fmha_v2_bf16_512_64_causal_ldgsts_sm90_kernel_nl", 165376, 256, 64, 1, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_BF16, 512, 64, 512, 64, kSM_90,  cubin_fmha_v2_bf16_512_64_ldgsts_sm90_cu_cubin, cubin_fmha_v2_bf16_512_64_ldgsts_sm90_cu_cubin_len, "fmha_v2_bf16_512_64_sliding_window_causal_ldgsts_sm90_kernel_nl", 165376, 256, 64, 2, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_FP16, 64, 64, 64, 32, kSM_90,  cubin_fmha_v2_fp16_fp32_64_32_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_fp32_64_32_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_fp32_64_32_ldgsts_sm90_kernel", 17408, 128, 0, 0, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_FP16, 64, 64, 64, 32, kSM_90,  cubin_fmha_v2_fp16_fp32_64_32_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_fp32_64_32_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_fp32_64_32_sliding_window_causal_ldgsts_sm90_kernel", 17408, 128, 0, 2, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_FP16, 64, 64, 64, 32, kSM_90,  cubin_fmha_v2_fp16_fp32_64_32_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_fp32_64_32_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_fp32_64_32_causal_ldgsts_sm90_kernel", 17408, 128, 0, 1, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_FP16, 64, 64, 64, 32, kSM_90,  cubin_fmha_v2_fp16_fp32_64_32_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_fp32_64_32_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_fp32_64_32_ldgsts_sm90_kernel_nl", 17408, 128, 64, 0, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_FP16, 64, 64, 64, 32, kSM_90,  cubin_fmha_v2_fp16_fp32_64_32_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_fp32_64_32_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_fp32_64_32_causal_ldgsts_sm90_kernel_nl", 17408, 128, 64, 1, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_FP16, 64, 64, 64, 32, kSM_90,  cubin_fmha_v2_fp16_fp32_64_32_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_fp32_64_32_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_fp32_64_32_sliding_window_causal_ldgsts_sm90_kernel_nl", 17408, 128, 64, 2, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_FP16, 128, 64, 128, 32, kSM_90,  cubin_fmha_v2_fp16_fp32_128_32_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_fp32_128_32_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_fp32_128_32_ldgsts_sm90_kernel", 25600, 128, 0, 0, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_FP16, 128, 64, 128, 32, kSM_90,  cubin_fmha_v2_fp16_fp32_128_32_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_fp32_128_32_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_fp32_128_32_sliding_window_causal_ldgsts_sm90_kernel", 25600, 128, 0, 2, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_FP16, 128, 64, 128, 32, kSM_90,  cubin_fmha_v2_fp16_fp32_128_32_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_fp32_128_32_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_fp32_128_32_causal_ldgsts_sm90_kernel", 25600, 128, 0, 1, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_FP16, 128, 64, 128, 32, kSM_90,  cubin_fmha_v2_fp16_fp32_128_32_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_fp32_128_32_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_fp32_128_32_ldgsts_sm90_kernel_nl", 25600, 128, 64, 0, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_FP16, 128, 64, 128, 32, kSM_90,  cubin_fmha_v2_fp16_fp32_128_32_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_fp32_128_32_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_fp32_128_32_causal_ldgsts_sm90_kernel_nl", 25600, 128, 64, 1, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_FP16, 128, 64, 128, 32, kSM_90,  cubin_fmha_v2_fp16_fp32_128_32_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_fp32_128_32_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_fp32_128_32_sliding_window_causal_ldgsts_sm90_kernel_nl", 25600, 128, 64, 2, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_FP16, 256, 64, 256, 32, kSM_90,  cubin_fmha_v2_fp16_fp32_256_32_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_fp32_256_32_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_fp32_256_32_ldgsts_sm90_kernel", 41984, 128, 0, 0, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_FP16, 256, 64, 256, 32, kSM_90,  cubin_fmha_v2_fp16_fp32_256_32_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_fp32_256_32_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_fp32_256_32_sliding_window_causal_ldgsts_sm90_kernel", 41984, 128, 0, 2, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_FP16, 256, 64, 256, 32, kSM_90,  cubin_fmha_v2_fp16_fp32_256_32_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_fp32_256_32_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_fp32_256_32_causal_ldgsts_sm90_kernel", 41984, 128, 0, 1, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_FP16, 256, 64, 256, 32, kSM_90,  cubin_fmha_v2_fp16_fp32_256_32_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_fp32_256_32_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_fp32_256_32_ldgsts_sm90_kernel_nl", 41984, 128, 64, 0, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_FP16, 256, 64, 256, 32, kSM_90,  cubin_fmha_v2_fp16_fp32_256_32_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_fp32_256_32_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_fp32_256_32_causal_ldgsts_sm90_kernel_nl", 41984, 128, 64, 1, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_FP16, 256, 64, 256, 32, kSM_90,  cubin_fmha_v2_fp16_fp32_256_32_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_fp32_256_32_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_fp32_256_32_sliding_window_causal_ldgsts_sm90_kernel_nl", 41984, 128, 64, 2, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_FP16, 64, 64, 64, 64, kSM_90,  cubin_fmha_v2_fp16_fp32_64_64_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_fp32_64_64_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_fp32_64_64_ldgsts_sm90_kernel", 33792, 128, 0, 0, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_FP16, 64, 64, 64, 64, kSM_90,  cubin_fmha_v2_fp16_fp32_64_64_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_fp32_64_64_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_fp32_64_64_sliding_window_causal_ldgsts_sm90_kernel", 33792, 128, 0, 2, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_FP16, 64, 64, 64, 64, kSM_90,  cubin_fmha_v2_fp16_fp32_64_64_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_fp32_64_64_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_fp32_64_64_causal_ldgsts_sm90_kernel", 33792, 128, 0, 1, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_FP16, 64, 64, 64, 64, kSM_90,  cubin_fmha_v2_fp16_fp32_64_64_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_fp32_64_64_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_fp32_64_64_ldgsts_sm90_kernel_nl", 33792, 128, 64, 0, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_FP16, 64, 64, 64, 64, kSM_90,  cubin_fmha_v2_fp16_fp32_64_64_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_fp32_64_64_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_fp32_64_64_causal_ldgsts_sm90_kernel_nl", 33792, 128, 64, 1, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_FP16, 64, 64, 64, 64, kSM_90,  cubin_fmha_v2_fp16_fp32_64_64_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_fp32_64_64_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_fp32_64_64_sliding_window_causal_ldgsts_sm90_kernel_nl", 33792, 128, 64, 2, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_FP16, 128, 64, 128, 64, kSM_90,  cubin_fmha_v2_fp16_fp32_128_64_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_fp32_128_64_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_fp32_128_64_ldgsts_sm90_kernel", 50176, 128, 0, 0, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_FP16, 128, 64, 128, 64, kSM_90,  cubin_fmha_v2_fp16_fp32_128_64_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_fp32_128_64_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_fp32_128_64_sliding_window_causal_ldgsts_sm90_kernel", 50176, 128, 0, 2, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_FP16, 128, 64, 128, 64, kSM_90,  cubin_fmha_v2_fp16_fp32_128_64_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_fp32_128_64_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_fp32_128_64_causal_ldgsts_sm90_kernel", 50176, 128, 0, 1, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_FP16, 128, 64, 128, 64, kSM_90,  cubin_fmha_v2_fp16_fp32_128_64_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_fp32_128_64_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_fp32_128_64_ldgsts_sm90_kernel_nl", 50176, 128, 64, 0, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_FP16, 128, 64, 128, 64, kSM_90,  cubin_fmha_v2_fp16_fp32_128_64_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_fp32_128_64_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_fp32_128_64_causal_ldgsts_sm90_kernel_nl", 50176, 128, 64, 1, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_FP16, 128, 64, 128, 64, kSM_90,  cubin_fmha_v2_fp16_fp32_128_64_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_fp32_128_64_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_fp32_128_64_sliding_window_causal_ldgsts_sm90_kernel_nl", 50176, 128, 64, 2, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_FP16, 256, 64, 256, 64, kSM_90,  cubin_fmha_v2_fp16_fp32_256_64_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_fp32_256_64_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_fp32_256_64_ldgsts_sm90_kernel", 82944, 128, 0, 0, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_FP16, 256, 64, 256, 64, kSM_90,  cubin_fmha_v2_fp16_fp32_256_64_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_fp32_256_64_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_fp32_256_64_sliding_window_causal_ldgsts_sm90_kernel", 82944, 128, 0, 2, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_FP16, 256, 64, 256, 64, kSM_90,  cubin_fmha_v2_fp16_fp32_256_64_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_fp32_256_64_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_fp32_256_64_causal_ldgsts_sm90_kernel", 82944, 128, 0, 1, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_FP16, 256, 64, 256, 64, kSM_90,  cubin_fmha_v2_fp16_fp32_256_64_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_fp32_256_64_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_fp32_256_64_ldgsts_sm90_kernel_nl", 82944, 128, 64, 0, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_FP16, 256, 64, 256, 64, kSM_90,  cubin_fmha_v2_fp16_fp32_256_64_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_fp32_256_64_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_fp32_256_64_causal_ldgsts_sm90_kernel_nl", 82944, 128, 64, 1, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_FP16, 256, 64, 256, 64, kSM_90,  cubin_fmha_v2_fp16_fp32_256_64_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_fp32_256_64_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_fp32_256_64_sliding_window_causal_ldgsts_sm90_kernel_nl", 82944, 128, 64, 2, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_FP16, 384, 64, 384, 32, kSM_90,  cubin_fmha_v2_fp16_fp32_384_32_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_fp32_384_32_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_fp32_384_32_ldgsts_sm90_kernel", 67072, 256, 0, 0, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_FP16, 384, 64, 384, 32, kSM_90,  cubin_fmha_v2_fp16_fp32_384_32_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_fp32_384_32_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_fp32_384_32_sliding_window_causal_ldgsts_sm90_kernel", 67072, 256, 0, 2, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_FP16, 384, 64, 384, 32, kSM_90,  cubin_fmha_v2_fp16_fp32_384_32_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_fp32_384_32_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_fp32_384_32_causal_ldgsts_sm90_kernel", 67072, 256, 0, 1, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_FP16, 384, 64, 384, 32, kSM_90,  cubin_fmha_v2_fp16_fp32_384_32_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_fp32_384_32_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_fp32_384_32_ldgsts_sm90_kernel_nl", 67072, 256, 64, 0, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_FP16, 384, 64, 384, 32, kSM_90,  cubin_fmha_v2_fp16_fp32_384_32_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_fp32_384_32_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_fp32_384_32_causal_ldgsts_sm90_kernel_nl", 67072, 256, 64, 1, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_FP16, 384, 64, 384, 32, kSM_90,  cubin_fmha_v2_fp16_fp32_384_32_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_fp32_384_32_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_fp32_384_32_sliding_window_causal_ldgsts_sm90_kernel_nl", 67072, 256, 64, 2, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_FP16, 512, 64, 512, 32, kSM_90,  cubin_fmha_v2_fp16_fp32_512_32_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_fp32_512_32_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_fp32_512_32_ldgsts_sm90_kernel", 83456, 256, 0, 0, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_FP16, 512, 64, 512, 32, kSM_90,  cubin_fmha_v2_fp16_fp32_512_32_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_fp32_512_32_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_fp32_512_32_sliding_window_causal_ldgsts_sm90_kernel", 83456, 256, 0, 2, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_FP16, 512, 64, 512, 32, kSM_90,  cubin_fmha_v2_fp16_fp32_512_32_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_fp32_512_32_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_fp32_512_32_causal_ldgsts_sm90_kernel", 83456, 256, 0, 1, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_FP16, 512, 64, 512, 32, kSM_90,  cubin_fmha_v2_fp16_fp32_512_32_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_fp32_512_32_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_fp32_512_32_ldgsts_sm90_kernel_nl", 83456, 256, 64, 0, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_FP16, 512, 64, 512, 32, kSM_90,  cubin_fmha_v2_fp16_fp32_512_32_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_fp32_512_32_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_fp32_512_32_causal_ldgsts_sm90_kernel_nl", 83456, 256, 64, 1, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_FP16, 512, 64, 512, 32, kSM_90,  cubin_fmha_v2_fp16_fp32_512_32_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_fp32_512_32_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_fp32_512_32_sliding_window_causal_ldgsts_sm90_kernel_nl", 83456, 256, 64, 2, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_FP16, 384, 64, 384, 64, kSM_90,  cubin_fmha_v2_fp16_fp32_384_64_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_fp32_384_64_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_fp32_384_64_ldgsts_sm90_kernel", 132608, 256, 0, 0, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_FP16, 384, 64, 384, 64, kSM_90,  cubin_fmha_v2_fp16_fp32_384_64_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_fp32_384_64_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_fp32_384_64_sliding_window_causal_ldgsts_sm90_kernel", 132608, 256, 0, 2, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_FP16, 384, 64, 384, 64, kSM_90,  cubin_fmha_v2_fp16_fp32_384_64_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_fp32_384_64_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_fp32_384_64_causal_ldgsts_sm90_kernel", 132608, 256, 0, 1, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_FP16, 384, 64, 384, 64, kSM_90,  cubin_fmha_v2_fp16_fp32_384_64_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_fp32_384_64_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_fp32_384_64_ldgsts_sm90_kernel_nl", 132608, 256, 64, 0, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_FP16, 384, 64, 384, 64, kSM_90,  cubin_fmha_v2_fp16_fp32_384_64_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_fp32_384_64_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_fp32_384_64_causal_ldgsts_sm90_kernel_nl", 132608, 256, 64, 1, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_FP16, 384, 64, 384, 64, kSM_90,  cubin_fmha_v2_fp16_fp32_384_64_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_fp32_384_64_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_fp32_384_64_sliding_window_causal_ldgsts_sm90_kernel_nl", 132608, 256, 64, 2, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_FP16, 512, 64, 512, 64, kSM_90,  cubin_fmha_v2_fp16_fp32_512_64_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_fp32_512_64_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_fp32_512_64_ldgsts_sm90_kernel", 165376, 256, 0, 0, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_FP16, 512, 64, 512, 64, kSM_90,  cubin_fmha_v2_fp16_fp32_512_64_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_fp32_512_64_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_fp32_512_64_sliding_window_causal_ldgsts_sm90_kernel", 165376, 256, 0, 2, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_FP16, 512, 64, 512, 64, kSM_90,  cubin_fmha_v2_fp16_fp32_512_64_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_fp32_512_64_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_fp32_512_64_causal_ldgsts_sm90_kernel", 165376, 256, 0, 1, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_FP16, 512, 64, 512, 64, kSM_90,  cubin_fmha_v2_fp16_fp32_512_64_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_fp32_512_64_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_fp32_512_64_ldgsts_sm90_kernel_nl", 165376, 256, 64, 0, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_FP16, 512, 64, 512, 64, kSM_90,  cubin_fmha_v2_fp16_fp32_512_64_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_fp32_512_64_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_fp32_512_64_causal_ldgsts_sm90_kernel_nl", 165376, 256, 64, 1, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_FP16, 512, 64, 512, 64, kSM_90,  cubin_fmha_v2_fp16_fp32_512_64_ldgsts_sm90_cu_cubin, cubin_fmha_v2_fp16_fp32_512_64_ldgsts_sm90_cu_cubin_len, "fmha_v2_fp16_fp32_512_64_sliding_window_causal_ldgsts_sm90_kernel_nl", 165376, 256, 64, 2, 0, false, false, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 256, 32, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_256_S_qkv_32_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_256_S_qkv_32_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_256_S_qkv_32_tma_ws_sm90_kernel", 73984, 384, 64, 0, 0, false, true, true, false, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 256, 32, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_256_S_qkv_32_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_256_S_qkv_32_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_256_S_qkv_32_causal_tma_ws_sm90_kernel", 73984, 384, 64, 1, 0, false, true, true, false, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 256, 32, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_256_S_qkv_32_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_256_S_qkv_32_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_256_S_qkv_32_sliding_window_causal_tma_ws_sm90_kernel", 73984, 384, 64, 2, 0, false, true, true, false, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 256, 32, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_256_S_qkv_32_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_256_S_qkv_32_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_256_S_qkv_32_custom_mask_tma_ws_sm90_kernel", 73984, 384, 64, 3, 0, false, true, true, false, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 256, 40, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_256_S_qkv_40_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_256_S_qkv_40_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_256_S_qkv_40_causal_tma_ws_sm90_kernel", 147712, 384, 64, 1, 0, false, true, true, false, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 256, 40, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_256_S_qkv_40_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_256_S_qkv_40_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_256_S_qkv_40_sliding_window_causal_tma_ws_sm90_kernel", 147712, 384, 64, 2, 0, false, true, true, false, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 256, 40, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_256_S_qkv_40_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_256_S_qkv_40_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_256_S_qkv_40_custom_mask_tma_ws_sm90_kernel", 147712, 384, 64, 3, 0, false, true, true, false, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 256, 48, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_256_S_qkv_48_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_256_S_qkv_48_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_256_S_qkv_48_causal_tma_ws_sm90_kernel", 147712, 384, 64, 1, 0, false, true, true, false, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 256, 48, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_256_S_qkv_48_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_256_S_qkv_48_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_256_S_qkv_48_sliding_window_causal_tma_ws_sm90_kernel", 147712, 384, 64, 2, 0, false, true, true, false, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 256, 48, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_256_S_qkv_48_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_256_S_qkv_48_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_256_S_qkv_48_custom_mask_tma_ws_sm90_kernel", 147712, 384, 64, 3, 0, false, true, true, false, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 256, 64, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_256_S_qkv_64_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_256_S_qkv_64_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_256_S_qkv_64_tma_ws_sm90_kernel", 147712, 384, 64, 0, 0, false, true, true, false, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 256, 64, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_256_S_qkv_64_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_256_S_qkv_64_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_256_S_qkv_64_causal_tma_ws_sm90_kernel", 147712, 384, 64, 1, 0, false, true, true, false, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 256, 64, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_256_S_qkv_64_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_256_S_qkv_64_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_256_S_qkv_64_sliding_window_causal_tma_ws_sm90_kernel", 147712, 384, 64, 2, 0, false, true, true, false, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 256, 64, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_256_S_qkv_64_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_256_S_qkv_64_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_256_S_qkv_64_custom_mask_tma_ws_sm90_kernel", 147712, 384, 64, 3, 0, false, true, true, false, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 80, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_80_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_80_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_80_causal_tma_ws_sm90_kernel", 164096, 384, 64, 1, 0, false, true, true, false, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 80, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_80_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_80_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_80_sliding_window_causal_tma_ws_sm90_kernel", 164096, 384, 64, 2, 0, false, true, true, false, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 80, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_80_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_80_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_80_custom_mask_tma_ws_sm90_kernel", 164096, 384, 64, 3, 0, false, true, true, false, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 96, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_96_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_96_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_96_causal_tma_ws_sm90_kernel", 164096, 384, 64, 1, 0, false, true, true, false, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 96, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_96_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_96_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_96_sliding_window_causal_tma_ws_sm90_kernel", 164096, 384, 64, 2, 0, false, true, true, false, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 96, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_96_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_96_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_96_custom_mask_tma_ws_sm90_kernel", 164096, 384, 64, 3, 0, false, true, true, false, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 104, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_104_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_104_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_104_causal_tma_ws_sm90_kernel", 164096, 384, 64, 1, 0, false, true, true, false, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 104, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_104_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_104_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_104_sliding_window_causal_tma_ws_sm90_kernel", 164096, 384, 64, 2, 0, false, true, true, false, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 104, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_104_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_104_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_104_custom_mask_tma_ws_sm90_kernel", 164096, 384, 64, 3, 0, false, true, true, false, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_128_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_128_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_128_causal_tma_ws_sm90_kernel", 164096, 384, 64, 1, 0, false, true, true, false, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_128_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_128_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_128_sliding_window_causal_tma_ws_sm90_kernel", 164096, 384, 64, 2, 0, false, true, true, false, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_128_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_128_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_128_custom_mask_tma_ws_sm90_kernel", 164096, 384, 64, 3, 0, false, true, true, false, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 160, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_160_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_160_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_64_S_qkv_160_causal_tma_ws_sm90_kernel", 196864, 384, 64, 1, 0, false, true, true, false, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 160, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_160_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_160_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_64_S_qkv_160_sliding_window_causal_tma_ws_sm90_kernel", 196864, 384, 64, 2, 0, false, true, true, false, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 160, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_160_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_160_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_64_S_qkv_160_custom_mask_tma_ws_sm90_kernel", 196864, 384, 64, 3, 0, false, true, true, false, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 192, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_192_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_192_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_64_S_qkv_192_causal_tma_ws_sm90_kernel", 196864, 384, 64, 1, 0, false, true, true, false, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 192, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_192_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_192_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_64_S_qkv_192_sliding_window_causal_tma_ws_sm90_kernel", 196864, 384, 64, 2, 0, false, true, true, false, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 192, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_192_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_192_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_64_S_qkv_192_custom_mask_tma_ws_sm90_kernel", 196864, 384, 64, 3, 0, false, true, true, false, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 256, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_256_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_256_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_64_S_qkv_256_causal_tma_ws_sm90_kernel", 196864, 384, 64, 1, 0, false, true, true, false, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 256, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_256_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_256_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_64_S_qkv_256_sliding_window_causal_tma_ws_sm90_kernel", 196864, 384, 64, 2, 0, false, true, true, false, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 256, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_256_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_256_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_64_S_qkv_256_custom_mask_tma_ws_sm90_kernel", 196864, 384, 64, 3, 0, false, true, true, false, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_128_qk_tanh_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_128_qk_tanh_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_128_causal_qk_tanh_tma_ws_sm90_kernel", 164096, 384, 64, 1, 0, false, true, true, false, false, false, true},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_128_qk_tanh_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_128_qk_tanh_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_128_sliding_window_causal_qk_tanh_tma_ws_sm90_kernel", 164096, 384, 64, 2, 0, false, true, true, false, false, false, true},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_128_qk_tanh_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_128_qk_tanh_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_128_custom_mask_qk_tanh_tma_ws_sm90_kernel", 164096, 384, 64, 3, 0, false, true, true, false, false, false, true},
{ DATA_TYPE_FP16, 0, 64, 64, 256, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_256_qk_tanh_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_256_qk_tanh_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_64_S_qkv_256_causal_qk_tanh_tma_ws_sm90_kernel", 196864, 384, 64, 1, 0, false, true, true, false, false, false, true},
{ DATA_TYPE_FP16, 0, 64, 64, 256, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_256_qk_tanh_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_256_qk_tanh_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_64_S_qkv_256_sliding_window_causal_qk_tanh_tma_ws_sm90_kernel", 196864, 384, 64, 2, 0, false, true, true, false, false, false, true},
{ DATA_TYPE_FP16, 0, 64, 64, 256, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_256_qk_tanh_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_256_qk_tanh_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_64_S_qkv_256_custom_mask_qk_tanh_tma_ws_sm90_kernel", 196864, 384, 64, 3, 0, false, true, true, false, false, false, true},
{ DATA_TYPE_FP16, 0, 64, 256, 32, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_256_S_q_kv_32_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_256_S_q_kv_32_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_256_S_q_kv_32_tma_ws_sm90_kernel", 73984, 384, 64, 0, 1, false, true, true, false, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 256, 32, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_256_S_q_kv_32_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_256_S_q_kv_32_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_256_S_q_kv_32_custom_mask_tma_ws_sm90_kernel", 73984, 384, 64, 3, 1, false, true, true, false, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 256, 64, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_256_S_q_kv_64_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_256_S_q_kv_64_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_256_S_q_kv_64_tma_ws_sm90_kernel", 147712, 384, 64, 0, 1, false, true, true, false, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 256, 64, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_256_S_q_kv_64_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_256_S_q_kv_64_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_256_S_q_kv_64_custom_mask_tma_ws_sm90_kernel", 147712, 384, 64, 3, 1, false, true, true, false, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_kv_128_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_kv_128_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_kv_128_tma_ws_sm90_kernel", 164096, 384, 64, 0, 1, false, true, true, false, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_kv_128_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_kv_128_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_kv_128_custom_mask_tma_ws_sm90_kernel", 164096, 384, 64, 3, 1, false, true, true, false, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_kv_128_qk_tanh_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_kv_128_qk_tanh_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_kv_128_qk_tanh_tma_ws_sm90_kernel", 164096, 384, 64, 0, 1, false, true, true, false, false, false, true},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_kv_128_qk_tanh_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_kv_128_qk_tanh_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_kv_128_custom_mask_qk_tanh_tma_ws_sm90_kernel", 164096, 384, 64, 3, 1, false, true, true, false, false, false, true},
{ DATA_TYPE_FP16, 0, 64, 256, 32, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_256_S_q_paged_kv_32_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_256_S_q_paged_kv_32_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_256_S_q_paged_kv_32_tma_ws_sm90_kernel", 73984, 384, 64, 0, 2, false, true, true, false, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 256, 32, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_256_S_q_paged_kv_32_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_256_S_q_paged_kv_32_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_256_S_q_paged_kv_32_causal_tma_ws_sm90_kernel", 73984, 384, 64, 1, 2, false, true, true, false, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 256, 32, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_256_S_q_paged_kv_32_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_256_S_q_paged_kv_32_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_256_S_q_paged_kv_32_sliding_window_causal_tma_ws_sm90_kernel", 73984, 384, 64, 2, 2, false, true, true, false, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 256, 32, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_256_S_q_paged_kv_32_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_256_S_q_paged_kv_32_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_256_S_q_paged_kv_32_custom_mask_tma_ws_sm90_kernel", 73984, 384, 64, 3, 2, false, true, true, false, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 256, 40, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_256_S_q_paged_kv_40_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_256_S_q_paged_kv_40_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_256_S_q_paged_kv_40_causal_tma_ws_sm90_kernel", 147712, 384, 64, 1, 2, false, true, true, false, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 256, 40, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_256_S_q_paged_kv_40_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_256_S_q_paged_kv_40_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_256_S_q_paged_kv_40_sliding_window_causal_tma_ws_sm90_kernel", 147712, 384, 64, 2, 2, false, true, true, false, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 256, 40, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_256_S_q_paged_kv_40_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_256_S_q_paged_kv_40_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_256_S_q_paged_kv_40_custom_mask_tma_ws_sm90_kernel", 147712, 384, 64, 3, 2, false, true, true, false, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 256, 48, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_256_S_q_paged_kv_48_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_256_S_q_paged_kv_48_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_256_S_q_paged_kv_48_causal_tma_ws_sm90_kernel", 147712, 384, 64, 1, 2, false, true, true, false, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 256, 48, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_256_S_q_paged_kv_48_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_256_S_q_paged_kv_48_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_256_S_q_paged_kv_48_sliding_window_causal_tma_ws_sm90_kernel", 147712, 384, 64, 2, 2, false, true, true, false, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 256, 48, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_256_S_q_paged_kv_48_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_256_S_q_paged_kv_48_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_256_S_q_paged_kv_48_custom_mask_tma_ws_sm90_kernel", 147712, 384, 64, 3, 2, false, true, true, false, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 256, 64, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_256_S_q_paged_kv_64_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_256_S_q_paged_kv_64_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_256_S_q_paged_kv_64_tma_ws_sm90_kernel", 147712, 384, 64, 0, 2, false, true, true, false, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 256, 64, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_256_S_q_paged_kv_64_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_256_S_q_paged_kv_64_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_256_S_q_paged_kv_64_causal_tma_ws_sm90_kernel", 147712, 384, 64, 1, 2, false, true, true, false, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 256, 64, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_256_S_q_paged_kv_64_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_256_S_q_paged_kv_64_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_256_S_q_paged_kv_64_sliding_window_causal_tma_ws_sm90_kernel", 147712, 384, 64, 2, 2, false, true, true, false, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 256, 64, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_256_S_q_paged_kv_64_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_256_S_q_paged_kv_64_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_256_S_q_paged_kv_64_custom_mask_tma_ws_sm90_kernel", 147712, 384, 64, 3, 2, false, true, true, false, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 80, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_80_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_80_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_80_causal_tma_ws_sm90_kernel", 164096, 384, 64, 1, 2, false, true, true, false, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 80, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_80_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_80_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_80_sliding_window_causal_tma_ws_sm90_kernel", 164096, 384, 64, 2, 2, false, true, true, false, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 80, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_80_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_80_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_80_custom_mask_tma_ws_sm90_kernel", 164096, 384, 64, 3, 2, false, true, true, false, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 96, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_96_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_96_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_96_causal_tma_ws_sm90_kernel", 164096, 384, 64, 1, 2, false, true, true, false, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 96, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_96_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_96_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_96_sliding_window_causal_tma_ws_sm90_kernel", 164096, 384, 64, 2, 2, false, true, true, false, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 96, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_96_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_96_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_96_custom_mask_tma_ws_sm90_kernel", 164096, 384, 64, 3, 2, false, true, true, false, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 104, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_104_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_104_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_104_causal_tma_ws_sm90_kernel", 164096, 384, 64, 1, 2, false, true, true, false, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 104, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_104_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_104_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_104_sliding_window_causal_tma_ws_sm90_kernel", 164096, 384, 64, 2, 2, false, true, true, false, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 104, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_104_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_104_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_104_custom_mask_tma_ws_sm90_kernel", 164096, 384, 64, 3, 2, false, true, true, false, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_tma_ws_sm90_kernel", 164096, 384, 64, 0, 2, false, true, true, false, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_causal_tma_ws_sm90_kernel", 164096, 384, 64, 1, 2, false, true, true, false, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_sliding_window_causal_tma_ws_sm90_kernel", 164096, 384, 64, 2, 2, false, true, true, false, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_custom_mask_tma_ws_sm90_kernel", 164096, 384, 64, 3, 2, false, true, true, false, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 160, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_160_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_160_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_160_causal_tma_ws_sm90_kernel", 196864, 384, 64, 1, 2, false, true, true, false, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 160, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_160_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_160_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_160_sliding_window_causal_tma_ws_sm90_kernel", 196864, 384, 64, 2, 2, false, true, true, false, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 160, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_160_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_160_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_160_custom_mask_tma_ws_sm90_kernel", 196864, 384, 64, 3, 2, false, true, true, false, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 192, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_192_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_192_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_192_causal_tma_ws_sm90_kernel", 196864, 384, 64, 1, 2, false, true, true, false, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 192, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_192_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_192_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_192_sliding_window_causal_tma_ws_sm90_kernel", 196864, 384, 64, 2, 2, false, true, true, false, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 192, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_192_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_192_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_192_custom_mask_tma_ws_sm90_kernel", 196864, 384, 64, 3, 2, false, true, true, false, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 256, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_256_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_256_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_256_causal_tma_ws_sm90_kernel", 196864, 384, 64, 1, 2, false, true, true, false, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 256, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_256_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_256_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_256_sliding_window_causal_tma_ws_sm90_kernel", 196864, 384, 64, 2, 2, false, true, true, false, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 256, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_256_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_256_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_256_custom_mask_tma_ws_sm90_kernel", 196864, 384, 64, 3, 2, false, true, true, false, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_qk_tanh_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_qk_tanh_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_qk_tanh_tma_ws_sm90_kernel", 164096, 384, 64, 0, 2, false, true, true, false, false, false, true},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_qk_tanh_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_qk_tanh_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_causal_qk_tanh_tma_ws_sm90_kernel", 164096, 384, 64, 1, 2, false, true, true, false, false, false, true},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_qk_tanh_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_qk_tanh_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_sliding_window_causal_qk_tanh_tma_ws_sm90_kernel", 164096, 384, 64, 2, 2, false, true, true, false, false, false, true},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_qk_tanh_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_qk_tanh_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_custom_mask_qk_tanh_tma_ws_sm90_kernel", 164096, 384, 64, 3, 2, false, true, true, false, false, false, true},
{ DATA_TYPE_FP16, 0, 64, 64, 256, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_256_qk_tanh_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_256_qk_tanh_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_256_causal_qk_tanh_tma_ws_sm90_kernel", 196864, 384, 64, 1, 2, false, true, true, false, false, false, true},
{ DATA_TYPE_FP16, 0, 64, 64, 256, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_256_qk_tanh_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_256_qk_tanh_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_256_sliding_window_causal_qk_tanh_tma_ws_sm90_kernel", 196864, 384, 64, 2, 2, false, true, true, false, false, false, true},
{ DATA_TYPE_FP16, 0, 64, 64, 256, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_256_qk_tanh_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_256_qk_tanh_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_256_custom_mask_qk_tanh_tma_ws_sm90_kernel", 196864, 384, 64, 3, 2, false, true, true, false, false, false, true},
{ DATA_TYPE_FP16, 0, 64, 256, 32, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_256_S_qkv_32_alibi_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_256_S_qkv_32_alibi_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_256_S_qkv_32_causal_alibi_tma_ws_sm90_kernel", 73984, 384, 64, 1, 0, false, true, true, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 256, 40, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_256_S_qkv_40_alibi_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_256_S_qkv_40_alibi_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_256_S_qkv_40_causal_alibi_tma_ws_sm90_kernel", 147712, 384, 64, 1, 0, false, true, true, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 256, 48, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_256_S_qkv_48_alibi_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_256_S_qkv_48_alibi_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_256_S_qkv_48_causal_alibi_tma_ws_sm90_kernel", 147712, 384, 64, 1, 0, false, true, true, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 256, 64, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_256_S_qkv_64_alibi_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_256_S_qkv_64_alibi_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_256_S_qkv_64_causal_alibi_tma_ws_sm90_kernel", 147712, 384, 64, 1, 0, false, true, true, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 80, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_80_alibi_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_80_alibi_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_80_causal_alibi_tma_ws_sm90_kernel", 164096, 384, 64, 1, 0, false, true, true, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 96, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_96_alibi_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_96_alibi_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_96_causal_alibi_tma_ws_sm90_kernel", 164096, 384, 64, 1, 0, false, true, true, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 104, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_104_alibi_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_104_alibi_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_104_causal_alibi_tma_ws_sm90_kernel", 164096, 384, 64, 1, 0, false, true, true, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_128_alibi_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_128_alibi_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_128_causal_alibi_tma_ws_sm90_kernel", 164096, 384, 64, 1, 0, false, true, true, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 160, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_160_alibi_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_160_alibi_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_64_S_qkv_160_causal_alibi_tma_ws_sm90_kernel", 196864, 384, 64, 1, 0, false, true, true, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 192, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_192_alibi_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_192_alibi_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_64_S_qkv_192_causal_alibi_tma_ws_sm90_kernel", 196864, 384, 64, 1, 0, false, true, true, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 256, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_256_alibi_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_256_alibi_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_64_S_qkv_256_causal_alibi_tma_ws_sm90_kernel", 196864, 384, 64, 1, 0, false, true, true, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_128_alibi_qk_tanh_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_128_alibi_qk_tanh_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_128_causal_alibi_qk_tanh_tma_ws_sm90_kernel", 164096, 384, 64, 1, 0, false, true, true, false, true, false, true},
{ DATA_TYPE_FP16, 0, 64, 64, 256, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_256_alibi_qk_tanh_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_256_alibi_qk_tanh_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_64_S_qkv_256_causal_alibi_qk_tanh_tma_ws_sm90_kernel", 196864, 384, 64, 1, 0, false, true, true, false, true, false, true},
{ DATA_TYPE_FP16, 0, 64, 256, 32, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_256_S_q_paged_kv_32_alibi_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_256_S_q_paged_kv_32_alibi_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_256_S_q_paged_kv_32_causal_alibi_tma_ws_sm90_kernel", 73984, 384, 64, 1, 2, false, true, true, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 256, 40, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_256_S_q_paged_kv_40_alibi_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_256_S_q_paged_kv_40_alibi_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_256_S_q_paged_kv_40_causal_alibi_tma_ws_sm90_kernel", 147712, 384, 64, 1, 2, false, true, true, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 256, 48, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_256_S_q_paged_kv_48_alibi_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_256_S_q_paged_kv_48_alibi_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_256_S_q_paged_kv_48_causal_alibi_tma_ws_sm90_kernel", 147712, 384, 64, 1, 2, false, true, true, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 256, 64, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_256_S_q_paged_kv_64_alibi_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_256_S_q_paged_kv_64_alibi_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_256_S_q_paged_kv_64_causal_alibi_tma_ws_sm90_kernel", 147712, 384, 64, 1, 2, false, true, true, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 80, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_80_alibi_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_80_alibi_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_80_causal_alibi_tma_ws_sm90_kernel", 164096, 384, 64, 1, 2, false, true, true, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 96, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_96_alibi_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_96_alibi_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_96_causal_alibi_tma_ws_sm90_kernel", 164096, 384, 64, 1, 2, false, true, true, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 104, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_104_alibi_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_104_alibi_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_104_causal_alibi_tma_ws_sm90_kernel", 164096, 384, 64, 1, 2, false, true, true, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_alibi_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_alibi_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_causal_alibi_tma_ws_sm90_kernel", 164096, 384, 64, 1, 2, false, true, true, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 160, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_160_alibi_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_160_alibi_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_160_causal_alibi_tma_ws_sm90_kernel", 196864, 384, 64, 1, 2, false, true, true, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 192, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_192_alibi_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_192_alibi_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_192_causal_alibi_tma_ws_sm90_kernel", 196864, 384, 64, 1, 2, false, true, true, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 256, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_256_alibi_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_256_alibi_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_256_causal_alibi_tma_ws_sm90_kernel", 196864, 384, 64, 1, 2, false, true, true, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_alibi_qk_tanh_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_alibi_qk_tanh_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_causal_alibi_qk_tanh_tma_ws_sm90_kernel", 164096, 384, 64, 1, 2, false, true, true, false, true, false, true},
{ DATA_TYPE_FP16, 0, 64, 64, 256, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_256_alibi_qk_tanh_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_256_alibi_qk_tanh_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_256_causal_alibi_qk_tanh_tma_ws_sm90_kernel", 196864, 384, 64, 1, 2, false, true, true, false, true, false, true},
{ DATA_TYPE_BF16, 0, 64, 256, 32, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_256_S_qkv_32_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_256_S_qkv_32_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_256_S_qkv_32_tma_ws_sm90_kernel", 73984, 384, 64, 0, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_BF16, 0, 64, 256, 32, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_256_S_qkv_32_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_256_S_qkv_32_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_256_S_qkv_32_causal_tma_ws_sm90_kernel", 73984, 384, 64, 1, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_BF16, 0, 64, 256, 32, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_256_S_qkv_32_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_256_S_qkv_32_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_256_S_qkv_32_sliding_window_causal_tma_ws_sm90_kernel", 73984, 384, 64, 2, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_BF16, 0, 64, 256, 32, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_256_S_qkv_32_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_256_S_qkv_32_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_256_S_qkv_32_custom_mask_tma_ws_sm90_kernel", 73984, 384, 64, 3, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_BF16, 0, 64, 256, 40, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_256_S_qkv_40_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_256_S_qkv_40_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_256_S_qkv_40_causal_tma_ws_sm90_kernel", 147712, 384, 64, 1, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_BF16, 0, 64, 256, 40, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_256_S_qkv_40_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_256_S_qkv_40_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_256_S_qkv_40_sliding_window_causal_tma_ws_sm90_kernel", 147712, 384, 64, 2, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_BF16, 0, 64, 256, 40, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_256_S_qkv_40_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_256_S_qkv_40_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_256_S_qkv_40_custom_mask_tma_ws_sm90_kernel", 147712, 384, 64, 3, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_BF16, 0, 64, 256, 48, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_256_S_qkv_48_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_256_S_qkv_48_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_256_S_qkv_48_causal_tma_ws_sm90_kernel", 147712, 384, 64, 1, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_BF16, 0, 64, 256, 48, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_256_S_qkv_48_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_256_S_qkv_48_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_256_S_qkv_48_sliding_window_causal_tma_ws_sm90_kernel", 147712, 384, 64, 2, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_BF16, 0, 64, 256, 48, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_256_S_qkv_48_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_256_S_qkv_48_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_256_S_qkv_48_custom_mask_tma_ws_sm90_kernel", 147712, 384, 64, 3, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_BF16, 0, 64, 256, 64, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_256_S_qkv_64_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_256_S_qkv_64_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_256_S_qkv_64_tma_ws_sm90_kernel", 147712, 384, 64, 0, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_BF16, 0, 64, 256, 64, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_256_S_qkv_64_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_256_S_qkv_64_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_256_S_qkv_64_causal_tma_ws_sm90_kernel", 147712, 384, 64, 1, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_BF16, 0, 64, 256, 64, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_256_S_qkv_64_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_256_S_qkv_64_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_256_S_qkv_64_sliding_window_causal_tma_ws_sm90_kernel", 147712, 384, 64, 2, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_BF16, 0, 64, 256, 64, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_256_S_qkv_64_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_256_S_qkv_64_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_256_S_qkv_64_custom_mask_tma_ws_sm90_kernel", 147712, 384, 64, 3, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_BF16, 0, 64, 128, 80, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_80_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_80_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_80_causal_tma_ws_sm90_kernel", 164096, 384, 64, 1, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_BF16, 0, 64, 128, 80, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_80_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_80_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_80_sliding_window_causal_tma_ws_sm90_kernel", 164096, 384, 64, 2, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_BF16, 0, 64, 128, 80, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_80_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_80_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_80_custom_mask_tma_ws_sm90_kernel", 164096, 384, 64, 3, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_BF16, 0, 64, 128, 96, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_96_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_96_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_96_causal_tma_ws_sm90_kernel", 164096, 384, 64, 1, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_BF16, 0, 64, 128, 96, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_96_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_96_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_96_sliding_window_causal_tma_ws_sm90_kernel", 164096, 384, 64, 2, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_BF16, 0, 64, 128, 96, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_96_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_96_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_96_custom_mask_tma_ws_sm90_kernel", 164096, 384, 64, 3, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_BF16, 0, 64, 128, 104, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_104_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_104_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_104_causal_tma_ws_sm90_kernel", 164096, 384, 64, 1, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_BF16, 0, 64, 128, 104, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_104_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_104_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_104_sliding_window_causal_tma_ws_sm90_kernel", 164096, 384, 64, 2, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_BF16, 0, 64, 128, 104, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_104_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_104_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_104_custom_mask_tma_ws_sm90_kernel", 164096, 384, 64, 3, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_BF16, 0, 64, 128, 128, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_128_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_128_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_128_causal_tma_ws_sm90_kernel", 164096, 384, 64, 1, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_BF16, 0, 64, 128, 128, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_128_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_128_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_128_sliding_window_causal_tma_ws_sm90_kernel", 164096, 384, 64, 2, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_BF16, 0, 64, 128, 128, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_128_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_128_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_128_custom_mask_tma_ws_sm90_kernel", 164096, 384, 64, 3, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_BF16, 0, 64, 64, 160, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_160_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_160_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_64_S_qkv_160_causal_tma_ws_sm90_kernel", 196864, 384, 64, 1, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_BF16, 0, 64, 64, 160, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_160_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_160_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_64_S_qkv_160_sliding_window_causal_tma_ws_sm90_kernel", 196864, 384, 64, 2, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_BF16, 0, 64, 64, 160, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_160_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_160_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_64_S_qkv_160_custom_mask_tma_ws_sm90_kernel", 196864, 384, 64, 3, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_BF16, 0, 64, 64, 192, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_192_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_192_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_64_S_qkv_192_causal_tma_ws_sm90_kernel", 196864, 384, 64, 1, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_BF16, 0, 64, 64, 192, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_192_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_192_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_64_S_qkv_192_sliding_window_causal_tma_ws_sm90_kernel", 196864, 384, 64, 2, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_BF16, 0, 64, 64, 192, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_192_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_192_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_64_S_qkv_192_custom_mask_tma_ws_sm90_kernel", 196864, 384, 64, 3, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_BF16, 0, 64, 64, 256, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_256_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_256_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_64_S_qkv_256_causal_tma_ws_sm90_kernel", 196864, 384, 64, 1, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_BF16, 0, 64, 64, 256, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_256_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_256_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_64_S_qkv_256_sliding_window_causal_tma_ws_sm90_kernel", 196864, 384, 64, 2, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_BF16, 0, 64, 64, 256, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_256_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_256_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_64_S_qkv_256_custom_mask_tma_ws_sm90_kernel", 196864, 384, 64, 3, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_BF16, 0, 64, 128, 128, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_128_qk_tanh_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_128_qk_tanh_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_128_causal_qk_tanh_tma_ws_sm90_kernel", 164096, 384, 64, 1, 0, false, true, true, true, false, false, true},
{ DATA_TYPE_BF16, 0, 64, 128, 128, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_128_qk_tanh_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_128_qk_tanh_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_128_sliding_window_causal_qk_tanh_tma_ws_sm90_kernel", 164096, 384, 64, 2, 0, false, true, true, true, false, false, true},
{ DATA_TYPE_BF16, 0, 64, 128, 128, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_128_qk_tanh_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_128_qk_tanh_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_128_custom_mask_qk_tanh_tma_ws_sm90_kernel", 164096, 384, 64, 3, 0, false, true, true, true, false, false, true},
{ DATA_TYPE_BF16, 0, 64, 64, 256, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_256_qk_tanh_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_256_qk_tanh_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_64_S_qkv_256_causal_qk_tanh_tma_ws_sm90_kernel", 196864, 384, 64, 1, 0, false, true, true, true, false, false, true},
{ DATA_TYPE_BF16, 0, 64, 64, 256, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_256_qk_tanh_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_256_qk_tanh_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_64_S_qkv_256_sliding_window_causal_qk_tanh_tma_ws_sm90_kernel", 196864, 384, 64, 2, 0, false, true, true, true, false, false, true},
{ DATA_TYPE_BF16, 0, 64, 64, 256, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_256_qk_tanh_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_256_qk_tanh_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_64_S_qkv_256_custom_mask_qk_tanh_tma_ws_sm90_kernel", 196864, 384, 64, 3, 0, false, true, true, true, false, false, true},
{ DATA_TYPE_BF16, 0, 64, 256, 32, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_256_S_q_kv_32_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_256_S_q_kv_32_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_256_S_q_kv_32_tma_ws_sm90_kernel", 73984, 384, 64, 0, 1, false, true, true, true, false, false, false},
{ DATA_TYPE_BF16, 0, 64, 256, 32, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_256_S_q_kv_32_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_256_S_q_kv_32_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_256_S_q_kv_32_custom_mask_tma_ws_sm90_kernel", 73984, 384, 64, 3, 1, false, true, true, true, false, false, false},
{ DATA_TYPE_BF16, 0, 64, 256, 64, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_256_S_q_kv_64_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_256_S_q_kv_64_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_256_S_q_kv_64_tma_ws_sm90_kernel", 147712, 384, 64, 0, 1, false, true, true, true, false, false, false},
{ DATA_TYPE_BF16, 0, 64, 256, 64, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_256_S_q_kv_64_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_256_S_q_kv_64_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_256_S_q_kv_64_custom_mask_tma_ws_sm90_kernel", 147712, 384, 64, 3, 1, false, true, true, true, false, false, false},
{ DATA_TYPE_BF16, 0, 64, 128, 128, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_128_S_q_kv_128_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_q_kv_128_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_q_kv_128_tma_ws_sm90_kernel", 164096, 384, 64, 0, 1, false, true, true, true, false, false, false},
{ DATA_TYPE_BF16, 0, 64, 128, 128, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_128_S_q_kv_128_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_q_kv_128_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_q_kv_128_custom_mask_tma_ws_sm90_kernel", 164096, 384, 64, 3, 1, false, true, true, true, false, false, false},
{ DATA_TYPE_BF16, 0, 64, 128, 128, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_128_S_q_kv_128_qk_tanh_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_q_kv_128_qk_tanh_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_q_kv_128_qk_tanh_tma_ws_sm90_kernel", 164096, 384, 64, 0, 1, false, true, true, true, false, false, true},
{ DATA_TYPE_BF16, 0, 64, 128, 128, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_128_S_q_kv_128_qk_tanh_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_q_kv_128_qk_tanh_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_q_kv_128_custom_mask_qk_tanh_tma_ws_sm90_kernel", 164096, 384, 64, 3, 1, false, true, true, true, false, false, true},
{ DATA_TYPE_BF16, 0, 64, 256, 32, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_256_S_q_paged_kv_32_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_256_S_q_paged_kv_32_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_256_S_q_paged_kv_32_tma_ws_sm90_kernel", 73984, 384, 64, 0, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_BF16, 0, 64, 256, 32, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_256_S_q_paged_kv_32_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_256_S_q_paged_kv_32_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_256_S_q_paged_kv_32_causal_tma_ws_sm90_kernel", 73984, 384, 64, 1, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_BF16, 0, 64, 256, 32, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_256_S_q_paged_kv_32_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_256_S_q_paged_kv_32_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_256_S_q_paged_kv_32_sliding_window_causal_tma_ws_sm90_kernel", 73984, 384, 64, 2, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_BF16, 0, 64, 256, 32, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_256_S_q_paged_kv_32_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_256_S_q_paged_kv_32_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_256_S_q_paged_kv_32_custom_mask_tma_ws_sm90_kernel", 73984, 384, 64, 3, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_BF16, 0, 64, 256, 40, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_256_S_q_paged_kv_40_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_256_S_q_paged_kv_40_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_256_S_q_paged_kv_40_causal_tma_ws_sm90_kernel", 147712, 384, 64, 1, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_BF16, 0, 64, 256, 40, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_256_S_q_paged_kv_40_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_256_S_q_paged_kv_40_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_256_S_q_paged_kv_40_sliding_window_causal_tma_ws_sm90_kernel", 147712, 384, 64, 2, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_BF16, 0, 64, 256, 40, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_256_S_q_paged_kv_40_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_256_S_q_paged_kv_40_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_256_S_q_paged_kv_40_custom_mask_tma_ws_sm90_kernel", 147712, 384, 64, 3, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_BF16, 0, 64, 256, 48, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_256_S_q_paged_kv_48_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_256_S_q_paged_kv_48_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_256_S_q_paged_kv_48_causal_tma_ws_sm90_kernel", 147712, 384, 64, 1, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_BF16, 0, 64, 256, 48, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_256_S_q_paged_kv_48_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_256_S_q_paged_kv_48_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_256_S_q_paged_kv_48_sliding_window_causal_tma_ws_sm90_kernel", 147712, 384, 64, 2, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_BF16, 0, 64, 256, 48, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_256_S_q_paged_kv_48_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_256_S_q_paged_kv_48_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_256_S_q_paged_kv_48_custom_mask_tma_ws_sm90_kernel", 147712, 384, 64, 3, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_BF16, 0, 64, 256, 64, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_256_S_q_paged_kv_64_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_256_S_q_paged_kv_64_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_256_S_q_paged_kv_64_tma_ws_sm90_kernel", 147712, 384, 64, 0, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_BF16, 0, 64, 256, 64, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_256_S_q_paged_kv_64_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_256_S_q_paged_kv_64_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_256_S_q_paged_kv_64_causal_tma_ws_sm90_kernel", 147712, 384, 64, 1, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_BF16, 0, 64, 256, 64, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_256_S_q_paged_kv_64_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_256_S_q_paged_kv_64_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_256_S_q_paged_kv_64_sliding_window_causal_tma_ws_sm90_kernel", 147712, 384, 64, 2, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_BF16, 0, 64, 256, 64, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_256_S_q_paged_kv_64_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_256_S_q_paged_kv_64_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_256_S_q_paged_kv_64_custom_mask_tma_ws_sm90_kernel", 147712, 384, 64, 3, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_BF16, 0, 64, 128, 80, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_80_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_80_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_80_causal_tma_ws_sm90_kernel", 164096, 384, 64, 1, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_BF16, 0, 64, 128, 80, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_80_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_80_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_80_sliding_window_causal_tma_ws_sm90_kernel", 164096, 384, 64, 2, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_BF16, 0, 64, 128, 80, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_80_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_80_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_80_custom_mask_tma_ws_sm90_kernel", 164096, 384, 64, 3, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_BF16, 0, 64, 128, 96, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_96_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_96_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_96_causal_tma_ws_sm90_kernel", 164096, 384, 64, 1, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_BF16, 0, 64, 128, 96, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_96_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_96_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_96_sliding_window_causal_tma_ws_sm90_kernel", 164096, 384, 64, 2, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_BF16, 0, 64, 128, 96, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_96_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_96_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_96_custom_mask_tma_ws_sm90_kernel", 164096, 384, 64, 3, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_BF16, 0, 64, 128, 104, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_104_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_104_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_104_causal_tma_ws_sm90_kernel", 164096, 384, 64, 1, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_BF16, 0, 64, 128, 104, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_104_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_104_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_104_sliding_window_causal_tma_ws_sm90_kernel", 164096, 384, 64, 2, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_BF16, 0, 64, 128, 104, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_104_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_104_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_104_custom_mask_tma_ws_sm90_kernel", 164096, 384, 64, 3, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_BF16, 0, 64, 128, 128, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_tma_ws_sm90_kernel", 164096, 384, 64, 0, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_BF16, 0, 64, 128, 128, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_causal_tma_ws_sm90_kernel", 164096, 384, 64, 1, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_BF16, 0, 64, 128, 128, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_sliding_window_causal_tma_ws_sm90_kernel", 164096, 384, 64, 2, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_BF16, 0, 64, 128, 128, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_custom_mask_tma_ws_sm90_kernel", 164096, 384, 64, 3, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_BF16, 0, 64, 64, 160, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_160_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_160_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_160_causal_tma_ws_sm90_kernel", 196864, 384, 64, 1, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_BF16, 0, 64, 64, 160, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_160_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_160_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_160_sliding_window_causal_tma_ws_sm90_kernel", 196864, 384, 64, 2, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_BF16, 0, 64, 64, 160, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_160_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_160_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_160_custom_mask_tma_ws_sm90_kernel", 196864, 384, 64, 3, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_BF16, 0, 64, 64, 192, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_192_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_192_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_192_causal_tma_ws_sm90_kernel", 196864, 384, 64, 1, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_BF16, 0, 64, 64, 192, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_192_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_192_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_192_sliding_window_causal_tma_ws_sm90_kernel", 196864, 384, 64, 2, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_BF16, 0, 64, 64, 192, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_192_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_192_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_192_custom_mask_tma_ws_sm90_kernel", 196864, 384, 64, 3, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_BF16, 0, 64, 64, 256, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_256_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_256_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_256_causal_tma_ws_sm90_kernel", 196864, 384, 64, 1, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_BF16, 0, 64, 64, 256, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_256_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_256_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_256_sliding_window_causal_tma_ws_sm90_kernel", 196864, 384, 64, 2, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_BF16, 0, 64, 64, 256, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_256_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_256_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_256_custom_mask_tma_ws_sm90_kernel", 196864, 384, 64, 3, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_BF16, 0, 64, 128, 128, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_qk_tanh_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_qk_tanh_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_qk_tanh_tma_ws_sm90_kernel", 164096, 384, 64, 0, 2, false, true, true, true, false, false, true},
{ DATA_TYPE_BF16, 0, 64, 128, 128, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_qk_tanh_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_qk_tanh_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_causal_qk_tanh_tma_ws_sm90_kernel", 164096, 384, 64, 1, 2, false, true, true, true, false, false, true},
{ DATA_TYPE_BF16, 0, 64, 128, 128, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_qk_tanh_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_qk_tanh_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_sliding_window_causal_qk_tanh_tma_ws_sm90_kernel", 164096, 384, 64, 2, 2, false, true, true, true, false, false, true},
{ DATA_TYPE_BF16, 0, 64, 128, 128, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_qk_tanh_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_qk_tanh_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_custom_mask_qk_tanh_tma_ws_sm90_kernel", 164096, 384, 64, 3, 2, false, true, true, true, false, false, true},
{ DATA_TYPE_BF16, 0, 64, 64, 256, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_256_qk_tanh_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_256_qk_tanh_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_256_causal_qk_tanh_tma_ws_sm90_kernel", 196864, 384, 64, 1, 2, false, true, true, true, false, false, true},
{ DATA_TYPE_BF16, 0, 64, 64, 256, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_256_qk_tanh_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_256_qk_tanh_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_256_sliding_window_causal_qk_tanh_tma_ws_sm90_kernel", 196864, 384, 64, 2, 2, false, true, true, true, false, false, true},
{ DATA_TYPE_BF16, 0, 64, 64, 256, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_256_qk_tanh_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_256_qk_tanh_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_256_custom_mask_qk_tanh_tma_ws_sm90_kernel", 196864, 384, 64, 3, 2, false, true, true, true, false, false, true},
{ DATA_TYPE_BF16, 0, 64, 256, 32, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_256_S_qkv_32_alibi_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_256_S_qkv_32_alibi_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_256_S_qkv_32_causal_alibi_tma_ws_sm90_kernel", 73984, 384, 64, 1, 0, false, true, true, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 256, 40, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_256_S_qkv_40_alibi_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_256_S_qkv_40_alibi_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_256_S_qkv_40_causal_alibi_tma_ws_sm90_kernel", 147712, 384, 64, 1, 0, false, true, true, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 256, 48, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_256_S_qkv_48_alibi_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_256_S_qkv_48_alibi_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_256_S_qkv_48_causal_alibi_tma_ws_sm90_kernel", 147712, 384, 64, 1, 0, false, true, true, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 256, 64, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_256_S_qkv_64_alibi_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_256_S_qkv_64_alibi_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_256_S_qkv_64_causal_alibi_tma_ws_sm90_kernel", 147712, 384, 64, 1, 0, false, true, true, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 128, 80, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_80_alibi_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_80_alibi_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_80_causal_alibi_tma_ws_sm90_kernel", 164096, 384, 64, 1, 0, false, true, true, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 128, 96, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_96_alibi_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_96_alibi_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_96_causal_alibi_tma_ws_sm90_kernel", 164096, 384, 64, 1, 0, false, true, true, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 128, 104, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_104_alibi_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_104_alibi_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_104_causal_alibi_tma_ws_sm90_kernel", 164096, 384, 64, 1, 0, false, true, true, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 128, 128, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_128_alibi_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_128_alibi_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_128_causal_alibi_tma_ws_sm90_kernel", 164096, 384, 64, 1, 0, false, true, true, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 64, 160, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_160_alibi_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_160_alibi_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_64_S_qkv_160_causal_alibi_tma_ws_sm90_kernel", 196864, 384, 64, 1, 0, false, true, true, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 64, 192, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_192_alibi_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_192_alibi_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_64_S_qkv_192_causal_alibi_tma_ws_sm90_kernel", 196864, 384, 64, 1, 0, false, true, true, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 64, 256, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_256_alibi_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_256_alibi_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_64_S_qkv_256_causal_alibi_tma_ws_sm90_kernel", 196864, 384, 64, 1, 0, false, true, true, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 128, 128, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_128_alibi_qk_tanh_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_128_alibi_qk_tanh_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_128_causal_alibi_qk_tanh_tma_ws_sm90_kernel", 164096, 384, 64, 1, 0, false, true, true, true, true, false, true},
{ DATA_TYPE_BF16, 0, 64, 64, 256, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_256_alibi_qk_tanh_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_256_alibi_qk_tanh_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_64_S_qkv_256_causal_alibi_qk_tanh_tma_ws_sm90_kernel", 196864, 384, 64, 1, 0, false, true, true, true, true, false, true},
{ DATA_TYPE_BF16, 0, 64, 256, 32, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_256_S_q_paged_kv_32_alibi_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_256_S_q_paged_kv_32_alibi_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_256_S_q_paged_kv_32_causal_alibi_tma_ws_sm90_kernel", 73984, 384, 64, 1, 2, false, true, true, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 256, 40, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_256_S_q_paged_kv_40_alibi_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_256_S_q_paged_kv_40_alibi_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_256_S_q_paged_kv_40_causal_alibi_tma_ws_sm90_kernel", 147712, 384, 64, 1, 2, false, true, true, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 256, 48, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_256_S_q_paged_kv_48_alibi_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_256_S_q_paged_kv_48_alibi_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_256_S_q_paged_kv_48_causal_alibi_tma_ws_sm90_kernel", 147712, 384, 64, 1, 2, false, true, true, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 256, 64, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_256_S_q_paged_kv_64_alibi_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_256_S_q_paged_kv_64_alibi_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_256_S_q_paged_kv_64_causal_alibi_tma_ws_sm90_kernel", 147712, 384, 64, 1, 2, false, true, true, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 128, 80, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_80_alibi_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_80_alibi_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_80_causal_alibi_tma_ws_sm90_kernel", 164096, 384, 64, 1, 2, false, true, true, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 128, 96, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_96_alibi_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_96_alibi_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_96_causal_alibi_tma_ws_sm90_kernel", 164096, 384, 64, 1, 2, false, true, true, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 128, 104, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_104_alibi_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_104_alibi_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_104_causal_alibi_tma_ws_sm90_kernel", 164096, 384, 64, 1, 2, false, true, true, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 128, 128, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_alibi_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_alibi_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_causal_alibi_tma_ws_sm90_kernel", 164096, 384, 64, 1, 2, false, true, true, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 64, 160, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_160_alibi_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_160_alibi_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_160_causal_alibi_tma_ws_sm90_kernel", 196864, 384, 64, 1, 2, false, true, true, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 64, 192, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_192_alibi_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_192_alibi_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_192_causal_alibi_tma_ws_sm90_kernel", 196864, 384, 64, 1, 2, false, true, true, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 64, 256, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_256_alibi_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_256_alibi_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_256_causal_alibi_tma_ws_sm90_kernel", 196864, 384, 64, 1, 2, false, true, true, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 128, 128, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_alibi_qk_tanh_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_alibi_qk_tanh_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_causal_alibi_qk_tanh_tma_ws_sm90_kernel", 164096, 384, 64, 1, 2, false, true, true, true, true, false, true},
{ DATA_TYPE_BF16, 0, 64, 64, 256, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_256_alibi_qk_tanh_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_256_alibi_qk_tanh_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_256_causal_alibi_qk_tanh_tma_ws_sm90_kernel", 196864, 384, 64, 1, 2, false, true, true, true, true, false, true},
{ DATA_TYPE_E4M3, 0, 64, 256, 32, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_32_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_32_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_256_S_qkv_32_tma_ws_sm90_kernel", 82304, 384, 64, 0, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_E4M3, 0, 64, 256, 32, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_32_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_32_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_256_S_qkv_32_causal_tma_ws_sm90_kernel", 82304, 384, 64, 1, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_E4M3, 0, 64, 256, 32, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_32_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_32_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_256_S_qkv_32_sliding_window_causal_tma_ws_sm90_kernel", 78208, 384, 64, 2, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_E4M3, 0, 64, 256, 32, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_32_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_32_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_256_S_qkv_32_custom_mask_tma_ws_sm90_kernel", 82304, 384, 64, 3, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_E4M3, 0, 64, 256, 40, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_40_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_40_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_256_S_qkv_40_causal_tma_ws_sm90_kernel", 164224, 384, 64, 1, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_E4M3, 0, 64, 256, 40, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_40_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_40_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_256_S_qkv_40_sliding_window_causal_tma_ws_sm90_kernel", 156032, 384, 64, 2, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_E4M3, 0, 64, 256, 40, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_40_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_40_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_256_S_qkv_40_custom_mask_tma_ws_sm90_kernel", 164224, 384, 64, 3, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_E4M3, 0, 64, 256, 48, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_48_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_48_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_256_S_qkv_48_causal_tma_ws_sm90_kernel", 164224, 384, 64, 1, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_E4M3, 0, 64, 256, 48, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_48_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_48_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_256_S_qkv_48_sliding_window_causal_tma_ws_sm90_kernel", 156032, 384, 64, 2, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_E4M3, 0, 64, 256, 48, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_48_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_48_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_256_S_qkv_48_custom_mask_tma_ws_sm90_kernel", 164224, 384, 64, 3, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_E4M3, 0, 64, 256, 64, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_64_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_64_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_256_S_qkv_64_tma_ws_sm90_kernel", 164224, 384, 64, 0, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_E4M3, 0, 64, 256, 64, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_64_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_64_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_256_S_qkv_64_causal_tma_ws_sm90_kernel", 164224, 384, 64, 1, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_E4M3, 0, 64, 256, 64, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_64_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_64_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_256_S_qkv_64_sliding_window_causal_tma_ws_sm90_kernel", 156032, 384, 64, 2, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_E4M3, 0, 64, 256, 64, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_64_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_64_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_256_S_qkv_64_custom_mask_tma_ws_sm90_kernel", 164224, 384, 64, 3, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_E4M3, 0, 64, 256, 80, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_80_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_80_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_256_S_qkv_80_causal_tma_ws_sm90_kernel", 196864, 384, 64, 1, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_E4M3, 0, 64, 256, 80, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_80_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_80_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_256_S_qkv_80_sliding_window_causal_tma_ws_sm90_kernel", 180480, 384, 64, 2, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_E4M3, 0, 64, 256, 80, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_80_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_80_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_256_S_qkv_80_custom_mask_tma_ws_sm90_kernel", 196864, 384, 64, 3, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_E4M3, 0, 64, 256, 96, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_96_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_96_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_256_S_qkv_96_causal_tma_ws_sm90_kernel", 196864, 384, 64, 1, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_E4M3, 0, 64, 256, 96, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_96_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_96_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_256_S_qkv_96_sliding_window_causal_tma_ws_sm90_kernel", 180480, 384, 64, 2, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_E4M3, 0, 64, 256, 96, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_96_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_96_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_256_S_qkv_96_custom_mask_tma_ws_sm90_kernel", 196864, 384, 64, 3, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_E4M3, 0, 64, 256, 104, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_104_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_104_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_256_S_qkv_104_causal_tma_ws_sm90_kernel", 196864, 384, 64, 1, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_E4M3, 0, 64, 256, 104, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_104_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_104_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_256_S_qkv_104_sliding_window_causal_tma_ws_sm90_kernel", 180480, 384, 64, 2, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_E4M3, 0, 64, 256, 104, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_104_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_104_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_256_S_qkv_104_custom_mask_tma_ws_sm90_kernel", 196864, 384, 64, 3, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_E4M3, 0, 64, 256, 128, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_128_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_128_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_256_S_qkv_128_causal_tma_ws_sm90_kernel", 196864, 384, 64, 1, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_E4M3, 0, 64, 256, 128, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_128_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_128_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_256_S_qkv_128_sliding_window_causal_tma_ws_sm90_kernel", 180480, 384, 64, 2, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_E4M3, 0, 64, 256, 128, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_128_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_128_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_256_S_qkv_128_custom_mask_tma_ws_sm90_kernel", 196864, 384, 64, 3, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_E4M3, 0, 64, 128, 160, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_128_S_qkv_160_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_128_S_qkv_160_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_128_S_qkv_160_causal_tma_ws_sm90_kernel", 229632, 384, 64, 1, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_E4M3, 0, 64, 128, 160, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_128_S_qkv_160_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_128_S_qkv_160_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_128_S_qkv_160_sliding_window_causal_tma_ws_sm90_kernel", 196864, 384, 64, 2, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_E4M3, 0, 64, 128, 160, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_128_S_qkv_160_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_128_S_qkv_160_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_128_S_qkv_160_custom_mask_tma_ws_sm90_kernel", 229632, 384, 64, 3, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_E4M3, 0, 64, 128, 192, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_128_S_qkv_192_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_128_S_qkv_192_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_128_S_qkv_192_causal_tma_ws_sm90_kernel", 229632, 384, 64, 1, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_E4M3, 0, 64, 128, 192, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_128_S_qkv_192_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_128_S_qkv_192_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_128_S_qkv_192_sliding_window_causal_tma_ws_sm90_kernel", 196864, 384, 64, 2, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_E4M3, 0, 64, 128, 192, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_128_S_qkv_192_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_128_S_qkv_192_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_128_S_qkv_192_custom_mask_tma_ws_sm90_kernel", 229632, 384, 64, 3, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_E4M3, 0, 64, 128, 256, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_128_S_qkv_256_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_128_S_qkv_256_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_128_S_qkv_256_causal_tma_ws_sm90_kernel", 229632, 384, 64, 1, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_E4M3, 0, 64, 128, 256, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_128_S_qkv_256_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_128_S_qkv_256_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_128_S_qkv_256_sliding_window_causal_tma_ws_sm90_kernel", 196864, 384, 64, 2, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_E4M3, 0, 64, 128, 256, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_128_S_qkv_256_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_128_S_qkv_256_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_128_S_qkv_256_custom_mask_tma_ws_sm90_kernel", 229632, 384, 64, 3, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_E4M3, 0, 64, 256, 128, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_128_qk_tanh_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_128_qk_tanh_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_256_S_qkv_128_causal_qk_tanh_tma_ws_sm90_kernel", 196864, 384, 64, 1, 0, false, true, true, true, false, false, true},
{ DATA_TYPE_E4M3, 0, 64, 256, 128, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_128_qk_tanh_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_128_qk_tanh_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_256_S_qkv_128_sliding_window_causal_qk_tanh_tma_ws_sm90_kernel", 180480, 384, 64, 2, 0, false, true, true, true, false, false, true},
{ DATA_TYPE_E4M3, 0, 64, 256, 128, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_128_qk_tanh_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_128_qk_tanh_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_256_S_qkv_128_custom_mask_qk_tanh_tma_ws_sm90_kernel", 196864, 384, 64, 3, 0, false, true, true, true, false, false, true},
{ DATA_TYPE_E4M3, 0, 64, 128, 256, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_128_S_qkv_256_qk_tanh_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_128_S_qkv_256_qk_tanh_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_128_S_qkv_256_causal_qk_tanh_tma_ws_sm90_kernel", 229632, 384, 64, 1, 0, false, true, true, true, false, false, true},
{ DATA_TYPE_E4M3, 0, 64, 128, 256, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_128_S_qkv_256_qk_tanh_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_128_S_qkv_256_qk_tanh_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_128_S_qkv_256_sliding_window_causal_qk_tanh_tma_ws_sm90_kernel", 196864, 384, 64, 2, 0, false, true, true, true, false, false, true},
{ DATA_TYPE_E4M3, 0, 64, 128, 256, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_128_S_qkv_256_qk_tanh_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_128_S_qkv_256_qk_tanh_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_128_S_qkv_256_custom_mask_qk_tanh_tma_ws_sm90_kernel", 229632, 384, 64, 3, 0, false, true, true, true, false, false, true},
{ DATA_TYPE_E4M3, 0, 64, 256, 32, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_kv_32_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_kv_32_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_256_S_q_kv_32_tma_ws_sm90_kernel", 82304, 384, 64, 0, 1, false, true, true, true, false, false, false},
{ DATA_TYPE_E4M3, 0, 64, 256, 32, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_kv_32_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_kv_32_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_256_S_q_kv_32_custom_mask_tma_ws_sm90_kernel", 82304, 384, 64, 3, 1, false, true, true, true, false, false, false},
{ DATA_TYPE_E4M3, 0, 64, 256, 64, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_kv_64_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_kv_64_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_256_S_q_kv_64_tma_ws_sm90_kernel", 164224, 384, 64, 0, 1, false, true, true, true, false, false, false},
{ DATA_TYPE_E4M3, 0, 64, 256, 64, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_kv_64_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_kv_64_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_256_S_q_kv_64_custom_mask_tma_ws_sm90_kernel", 164224, 384, 64, 3, 1, false, true, true, true, false, false, false},
{ DATA_TYPE_E4M3, 0, 64, 256, 128, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_kv_128_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_kv_128_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_256_S_q_kv_128_tma_ws_sm90_kernel", 196864, 384, 64, 0, 1, false, true, true, true, false, false, false},
{ DATA_TYPE_E4M3, 0, 64, 256, 128, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_kv_128_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_kv_128_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_256_S_q_kv_128_custom_mask_tma_ws_sm90_kernel", 196864, 384, 64, 3, 1, false, true, true, true, false, false, false},
{ DATA_TYPE_E4M3, 0, 64, 256, 128, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_kv_128_qk_tanh_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_kv_128_qk_tanh_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_256_S_q_kv_128_qk_tanh_tma_ws_sm90_kernel", 196864, 384, 64, 0, 1, false, true, true, true, false, false, true},
{ DATA_TYPE_E4M3, 0, 64, 256, 128, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_kv_128_qk_tanh_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_kv_128_qk_tanh_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_256_S_q_kv_128_custom_mask_qk_tanh_tma_ws_sm90_kernel", 196864, 384, 64, 3, 1, false, true, true, true, false, false, true},
{ DATA_TYPE_E4M3, 0, 64, 256, 32, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_32_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_32_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_32_tma_ws_sm90_kernel", 82304, 384, 64, 0, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_E4M3, 0, 64, 256, 32, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_32_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_32_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_32_causal_tma_ws_sm90_kernel", 82304, 384, 64, 1, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_E4M3, 0, 64, 256, 32, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_32_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_32_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_32_sliding_window_causal_tma_ws_sm90_kernel", 78208, 384, 64, 2, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_E4M3, 0, 64, 256, 32, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_32_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_32_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_32_custom_mask_tma_ws_sm90_kernel", 82304, 384, 64, 3, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_E4M3, 0, 64, 256, 40, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_40_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_40_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_40_causal_tma_ws_sm90_kernel", 164224, 384, 64, 1, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_E4M3, 0, 64, 256, 40, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_40_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_40_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_40_sliding_window_causal_tma_ws_sm90_kernel", 156032, 384, 64, 2, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_E4M3, 0, 64, 256, 40, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_40_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_40_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_40_custom_mask_tma_ws_sm90_kernel", 164224, 384, 64, 3, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_E4M3, 0, 64, 256, 48, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_48_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_48_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_48_causal_tma_ws_sm90_kernel", 164224, 384, 64, 1, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_E4M3, 0, 64, 256, 48, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_48_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_48_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_48_sliding_window_causal_tma_ws_sm90_kernel", 156032, 384, 64, 2, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_E4M3, 0, 64, 256, 48, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_48_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_48_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_48_custom_mask_tma_ws_sm90_kernel", 164224, 384, 64, 3, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_E4M3, 0, 64, 256, 64, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_64_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_64_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_64_tma_ws_sm90_kernel", 164224, 384, 64, 0, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_E4M3, 0, 64, 256, 64, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_64_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_64_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_64_causal_tma_ws_sm90_kernel", 164224, 384, 64, 1, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_E4M3, 0, 64, 256, 64, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_64_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_64_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_64_sliding_window_causal_tma_ws_sm90_kernel", 156032, 384, 64, 2, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_E4M3, 0, 64, 256, 64, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_64_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_64_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_64_custom_mask_tma_ws_sm90_kernel", 164224, 384, 64, 3, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_E4M3, 0, 64, 256, 80, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_80_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_80_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_80_causal_tma_ws_sm90_kernel", 196864, 384, 64, 1, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_E4M3, 0, 64, 256, 80, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_80_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_80_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_80_sliding_window_causal_tma_ws_sm90_kernel", 180480, 384, 64, 2, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_E4M3, 0, 64, 256, 80, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_80_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_80_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_80_custom_mask_tma_ws_sm90_kernel", 196864, 384, 64, 3, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_E4M3, 0, 64, 256, 96, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_96_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_96_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_96_causal_tma_ws_sm90_kernel", 196864, 384, 64, 1, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_E4M3, 0, 64, 256, 96, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_96_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_96_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_96_sliding_window_causal_tma_ws_sm90_kernel", 180480, 384, 64, 2, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_E4M3, 0, 64, 256, 96, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_96_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_96_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_96_custom_mask_tma_ws_sm90_kernel", 196864, 384, 64, 3, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_E4M3, 0, 64, 256, 104, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_104_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_104_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_104_causal_tma_ws_sm90_kernel", 196864, 384, 64, 1, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_E4M3, 0, 64, 256, 104, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_104_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_104_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_104_sliding_window_causal_tma_ws_sm90_kernel", 180480, 384, 64, 2, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_E4M3, 0, 64, 256, 104, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_104_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_104_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_104_custom_mask_tma_ws_sm90_kernel", 196864, 384, 64, 3, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_E4M3, 0, 64, 256, 128, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_128_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_128_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_128_tma_ws_sm90_kernel", 196864, 384, 64, 0, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_E4M3, 0, 64, 256, 128, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_128_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_128_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_128_causal_tma_ws_sm90_kernel", 196864, 384, 64, 1, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_E4M3, 0, 64, 256, 128, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_128_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_128_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_128_sliding_window_causal_tma_ws_sm90_kernel", 180480, 384, 64, 2, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_E4M3, 0, 64, 256, 128, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_128_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_128_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_128_custom_mask_tma_ws_sm90_kernel", 196864, 384, 64, 3, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_E4M3, 0, 64, 128, 160, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_128_S_q_paged_kv_160_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_128_S_q_paged_kv_160_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_128_S_q_paged_kv_160_causal_tma_ws_sm90_kernel", 229632, 384, 64, 1, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_E4M3, 0, 64, 128, 160, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_128_S_q_paged_kv_160_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_128_S_q_paged_kv_160_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_128_S_q_paged_kv_160_sliding_window_causal_tma_ws_sm90_kernel", 196864, 384, 64, 2, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_E4M3, 0, 64, 128, 160, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_128_S_q_paged_kv_160_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_128_S_q_paged_kv_160_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_128_S_q_paged_kv_160_custom_mask_tma_ws_sm90_kernel", 229632, 384, 64, 3, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_E4M3, 0, 64, 128, 192, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_128_S_q_paged_kv_192_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_128_S_q_paged_kv_192_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_128_S_q_paged_kv_192_causal_tma_ws_sm90_kernel", 229632, 384, 64, 1, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_E4M3, 0, 64, 128, 192, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_128_S_q_paged_kv_192_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_128_S_q_paged_kv_192_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_128_S_q_paged_kv_192_sliding_window_causal_tma_ws_sm90_kernel", 196864, 384, 64, 2, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_E4M3, 0, 64, 128, 192, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_128_S_q_paged_kv_192_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_128_S_q_paged_kv_192_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_128_S_q_paged_kv_192_custom_mask_tma_ws_sm90_kernel", 229632, 384, 64, 3, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_E4M3, 0, 64, 128, 256, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_128_S_q_paged_kv_256_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_128_S_q_paged_kv_256_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_128_S_q_paged_kv_256_causal_tma_ws_sm90_kernel", 229632, 384, 64, 1, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_E4M3, 0, 64, 128, 256, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_128_S_q_paged_kv_256_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_128_S_q_paged_kv_256_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_128_S_q_paged_kv_256_sliding_window_causal_tma_ws_sm90_kernel", 196864, 384, 64, 2, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_E4M3, 0, 64, 128, 256, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_128_S_q_paged_kv_256_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_128_S_q_paged_kv_256_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_128_S_q_paged_kv_256_custom_mask_tma_ws_sm90_kernel", 229632, 384, 64, 3, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_E4M3, 0, 64, 256, 128, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_128_qk_tanh_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_128_qk_tanh_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_128_causal_qk_tanh_tma_ws_sm90_kernel", 196864, 384, 64, 1, 2, false, true, true, true, false, false, true},
{ DATA_TYPE_E4M3, 0, 64, 128, 256, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_128_S_q_paged_kv_256_qk_tanh_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_128_S_q_paged_kv_256_qk_tanh_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_128_S_q_paged_kv_256_causal_qk_tanh_tma_ws_sm90_kernel", 229632, 384, 64, 1, 2, false, true, true, true, false, false, true},
{ DATA_TYPE_E4M3, 0, 64, 256, 32, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_32_alibi_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_32_alibi_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_256_S_qkv_32_causal_alibi_tma_ws_sm90_kernel", 82304, 384, 64, 1, 0, false, true, true, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 64, 256, 40, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_40_alibi_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_40_alibi_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_256_S_qkv_40_causal_alibi_tma_ws_sm90_kernel", 164224, 384, 64, 1, 0, false, true, true, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 64, 256, 48, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_48_alibi_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_48_alibi_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_256_S_qkv_48_causal_alibi_tma_ws_sm90_kernel", 164224, 384, 64, 1, 0, false, true, true, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 64, 256, 64, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_64_alibi_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_64_alibi_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_256_S_qkv_64_causal_alibi_tma_ws_sm90_kernel", 164224, 384, 64, 1, 0, false, true, true, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 64, 256, 80, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_80_alibi_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_80_alibi_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_256_S_qkv_80_causal_alibi_tma_ws_sm90_kernel", 196864, 384, 64, 1, 0, false, true, true, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 64, 256, 96, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_96_alibi_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_96_alibi_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_256_S_qkv_96_causal_alibi_tma_ws_sm90_kernel", 196864, 384, 64, 1, 0, false, true, true, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 64, 256, 104, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_104_alibi_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_104_alibi_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_256_S_qkv_104_causal_alibi_tma_ws_sm90_kernel", 196864, 384, 64, 1, 0, false, true, true, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 64, 256, 128, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_128_alibi_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_128_alibi_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_256_S_qkv_128_causal_alibi_tma_ws_sm90_kernel", 196864, 384, 64, 1, 0, false, true, true, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 64, 128, 160, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_128_S_qkv_160_alibi_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_128_S_qkv_160_alibi_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_128_S_qkv_160_causal_alibi_tma_ws_sm90_kernel", 229632, 384, 64, 1, 0, false, true, true, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 64, 128, 192, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_128_S_qkv_192_alibi_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_128_S_qkv_192_alibi_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_128_S_qkv_192_causal_alibi_tma_ws_sm90_kernel", 229632, 384, 64, 1, 0, false, true, true, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 64, 128, 256, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_128_S_qkv_256_alibi_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_128_S_qkv_256_alibi_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_128_S_qkv_256_causal_alibi_tma_ws_sm90_kernel", 229632, 384, 64, 1, 0, false, true, true, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 64, 256, 128, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_128_alibi_qk_tanh_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_256_S_qkv_128_alibi_qk_tanh_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_256_S_qkv_128_causal_alibi_qk_tanh_tma_ws_sm90_kernel", 196864, 384, 64, 1, 0, false, true, true, true, true, false, true},
{ DATA_TYPE_E4M3, 0, 64, 128, 256, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_128_S_qkv_256_alibi_qk_tanh_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_128_S_qkv_256_alibi_qk_tanh_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_128_S_qkv_256_causal_alibi_qk_tanh_tma_ws_sm90_kernel", 229632, 384, 64, 1, 0, false, true, true, true, true, false, true},
{ DATA_TYPE_E4M3, 0, 64, 256, 32, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_32_alibi_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_32_alibi_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_32_causal_alibi_tma_ws_sm90_kernel", 82304, 384, 64, 1, 2, false, true, true, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 64, 256, 40, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_40_alibi_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_40_alibi_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_40_causal_alibi_tma_ws_sm90_kernel", 164224, 384, 64, 1, 2, false, true, true, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 64, 256, 48, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_48_alibi_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_48_alibi_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_48_causal_alibi_tma_ws_sm90_kernel", 164224, 384, 64, 1, 2, false, true, true, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 64, 256, 64, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_64_alibi_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_64_alibi_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_64_causal_alibi_tma_ws_sm90_kernel", 164224, 384, 64, 1, 2, false, true, true, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 64, 256, 80, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_80_alibi_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_80_alibi_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_80_causal_alibi_tma_ws_sm90_kernel", 196864, 384, 64, 1, 2, false, true, true, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 64, 256, 96, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_96_alibi_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_96_alibi_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_96_causal_alibi_tma_ws_sm90_kernel", 196864, 384, 64, 1, 2, false, true, true, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 64, 256, 104, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_104_alibi_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_104_alibi_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_104_causal_alibi_tma_ws_sm90_kernel", 196864, 384, 64, 1, 2, false, true, true, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 64, 256, 128, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_128_alibi_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_128_alibi_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_128_causal_alibi_tma_ws_sm90_kernel", 196864, 384, 64, 1, 2, false, true, true, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 64, 128, 160, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_128_S_q_paged_kv_160_alibi_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_128_S_q_paged_kv_160_alibi_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_128_S_q_paged_kv_160_causal_alibi_tma_ws_sm90_kernel", 229632, 384, 64, 1, 2, false, true, true, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 64, 128, 192, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_128_S_q_paged_kv_192_alibi_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_128_S_q_paged_kv_192_alibi_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_128_S_q_paged_kv_192_causal_alibi_tma_ws_sm90_kernel", 229632, 384, 64, 1, 2, false, true, true, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 64, 128, 256, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_128_S_q_paged_kv_256_alibi_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_128_S_q_paged_kv_256_alibi_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_128_S_q_paged_kv_256_causal_alibi_tma_ws_sm90_kernel", 229632, 384, 64, 1, 2, false, true, true, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 64, 256, 128, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_128_alibi_qk_tanh_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_128_alibi_qk_tanh_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_256_S_q_paged_kv_128_causal_alibi_qk_tanh_tma_ws_sm90_kernel", 196864, 384, 64, 1, 2, false, true, true, true, true, false, true},
{ DATA_TYPE_E4M3, 0, 64, 128, 256, kSM_90,  cubin_fmha_v2_flash_attention_e4m3_64_128_S_q_paged_kv_256_alibi_qk_tanh_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_64_128_S_q_paged_kv_256_alibi_qk_tanh_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_e4m3_64_128_S_q_paged_kv_256_causal_alibi_qk_tanh_tma_ws_sm90_kernel", 229632, 384, 64, 1, 2, false, true, true, true, true, false, true},
{ DATA_TYPE_FP16, 0, 64, 256, 32, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_qkv_32_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_qkv_32_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_256_S_qkv_32_tma_ws_sm90_kernel", 73984, 384, 64, 0, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 256, 32, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_qkv_32_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_qkv_32_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_256_S_qkv_32_causal_tma_ws_sm90_kernel", 73984, 384, 64, 1, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 256, 32, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_qkv_32_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_qkv_32_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_256_S_qkv_32_sliding_window_causal_tma_ws_sm90_kernel", 73984, 384, 64, 2, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 256, 32, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_qkv_32_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_qkv_32_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_256_S_qkv_32_custom_mask_tma_ws_sm90_kernel", 73984, 384, 64, 3, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 256, 40, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_qkv_40_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_qkv_40_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_256_S_qkv_40_causal_tma_ws_sm90_kernel", 147712, 384, 64, 1, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 256, 40, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_qkv_40_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_qkv_40_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_256_S_qkv_40_sliding_window_causal_tma_ws_sm90_kernel", 147712, 384, 64, 2, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 256, 40, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_qkv_40_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_qkv_40_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_256_S_qkv_40_custom_mask_tma_ws_sm90_kernel", 147712, 384, 64, 3, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 256, 48, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_qkv_48_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_qkv_48_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_256_S_qkv_48_causal_tma_ws_sm90_kernel", 147712, 384, 64, 1, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 256, 48, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_qkv_48_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_qkv_48_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_256_S_qkv_48_sliding_window_causal_tma_ws_sm90_kernel", 147712, 384, 64, 2, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 256, 48, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_qkv_48_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_qkv_48_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_256_S_qkv_48_custom_mask_tma_ws_sm90_kernel", 147712, 384, 64, 3, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 256, 64, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_qkv_64_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_qkv_64_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_256_S_qkv_64_tma_ws_sm90_kernel", 147712, 384, 64, 0, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 256, 64, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_qkv_64_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_qkv_64_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_256_S_qkv_64_causal_tma_ws_sm90_kernel", 147712, 384, 64, 1, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 256, 64, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_qkv_64_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_qkv_64_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_256_S_qkv_64_sliding_window_causal_tma_ws_sm90_kernel", 147712, 384, 64, 2, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 256, 64, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_qkv_64_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_qkv_64_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_256_S_qkv_64_custom_mask_tma_ws_sm90_kernel", 147712, 384, 64, 3, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 80, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_80_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_80_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_80_causal_tma_ws_sm90_kernel", 164096, 384, 64, 1, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 80, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_80_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_80_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_80_sliding_window_causal_tma_ws_sm90_kernel", 164096, 384, 64, 2, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 80, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_80_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_80_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_80_custom_mask_tma_ws_sm90_kernel", 164096, 384, 64, 3, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 96, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_96_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_96_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_96_causal_tma_ws_sm90_kernel", 164096, 384, 64, 1, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 96, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_96_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_96_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_96_sliding_window_causal_tma_ws_sm90_kernel", 164096, 384, 64, 2, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 96, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_96_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_96_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_96_custom_mask_tma_ws_sm90_kernel", 164096, 384, 64, 3, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 104, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_104_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_104_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_104_causal_tma_ws_sm90_kernel", 164096, 384, 64, 1, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 104, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_104_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_104_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_104_sliding_window_causal_tma_ws_sm90_kernel", 164096, 384, 64, 2, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 104, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_104_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_104_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_104_custom_mask_tma_ws_sm90_kernel", 164096, 384, 64, 3, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_causal_tma_ws_sm90_kernel", 164096, 384, 64, 1, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_sliding_window_causal_tma_ws_sm90_kernel", 164096, 384, 64, 2, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_custom_mask_tma_ws_sm90_kernel", 164096, 384, 64, 3, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 160, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_160_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_160_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_160_causal_tma_ws_sm90_kernel", 196864, 384, 64, 1, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 160, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_160_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_160_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_160_sliding_window_causal_tma_ws_sm90_kernel", 196864, 384, 64, 2, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 160, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_160_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_160_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_160_custom_mask_tma_ws_sm90_kernel", 196864, 384, 64, 3, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 192, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_192_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_192_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_192_causal_tma_ws_sm90_kernel", 196864, 384, 64, 1, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 192, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_192_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_192_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_192_sliding_window_causal_tma_ws_sm90_kernel", 196864, 384, 64, 2, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 192, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_192_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_192_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_192_custom_mask_tma_ws_sm90_kernel", 196864, 384, 64, 3, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 256, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_256_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_256_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_256_causal_tma_ws_sm90_kernel", 196864, 384, 64, 1, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 256, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_256_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_256_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_256_sliding_window_causal_tma_ws_sm90_kernel", 196864, 384, 64, 2, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 256, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_256_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_256_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_256_custom_mask_tma_ws_sm90_kernel", 196864, 384, 64, 3, 0, false, true, true, true, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_qk_tanh_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_qk_tanh_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_causal_qk_tanh_tma_ws_sm90_kernel", 164096, 384, 64, 1, 0, false, true, true, true, false, false, true},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_qk_tanh_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_qk_tanh_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_sliding_window_causal_qk_tanh_tma_ws_sm90_kernel", 164096, 384, 64, 2, 0, false, true, true, true, false, false, true},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_qk_tanh_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_qk_tanh_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_custom_mask_qk_tanh_tma_ws_sm90_kernel", 164096, 384, 64, 3, 0, false, true, true, true, false, false, true},
{ DATA_TYPE_FP16, 0, 64, 64, 256, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_256_qk_tanh_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_256_qk_tanh_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_256_causal_qk_tanh_tma_ws_sm90_kernel", 196864, 384, 64, 1, 0, false, true, true, true, false, false, true},
{ DATA_TYPE_FP16, 0, 64, 64, 256, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_256_qk_tanh_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_256_qk_tanh_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_256_sliding_window_causal_qk_tanh_tma_ws_sm90_kernel", 196864, 384, 64, 2, 0, false, true, true, true, false, false, true},
{ DATA_TYPE_FP16, 0, 64, 64, 256, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_256_qk_tanh_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_256_qk_tanh_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_256_custom_mask_qk_tanh_tma_ws_sm90_kernel", 196864, 384, 64, 3, 0, false, true, true, true, false, false, true},
{ DATA_TYPE_FP16, 0, 64, 256, 32, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_q_kv_32_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_q_kv_32_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_256_S_q_kv_32_tma_ws_sm90_kernel", 73984, 384, 64, 0, 1, false, true, true, true, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 256, 32, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_q_kv_32_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_q_kv_32_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_256_S_q_kv_32_custom_mask_tma_ws_sm90_kernel", 73984, 384, 64, 3, 1, false, true, true, true, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 256, 64, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_q_kv_64_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_q_kv_64_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_256_S_q_kv_64_tma_ws_sm90_kernel", 147712, 384, 64, 0, 1, false, true, true, true, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 256, 64, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_q_kv_64_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_q_kv_64_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_256_S_q_kv_64_custom_mask_tma_ws_sm90_kernel", 147712, 384, 64, 3, 1, false, true, true, true, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_kv_128_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_kv_128_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_q_kv_128_tma_ws_sm90_kernel", 164096, 384, 64, 0, 1, false, true, true, true, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_kv_128_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_kv_128_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_q_kv_128_custom_mask_tma_ws_sm90_kernel", 164096, 384, 64, 3, 1, false, true, true, true, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_kv_128_qk_tanh_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_kv_128_qk_tanh_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_q_kv_128_qk_tanh_tma_ws_sm90_kernel", 164096, 384, 64, 0, 1, false, true, true, true, false, false, true},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_kv_128_qk_tanh_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_kv_128_qk_tanh_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_q_kv_128_custom_mask_qk_tanh_tma_ws_sm90_kernel", 164096, 384, 64, 3, 1, false, true, true, true, false, false, true},
{ DATA_TYPE_FP16, 0, 64, 256, 32, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_q_paged_kv_32_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_q_paged_kv_32_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_256_S_q_paged_kv_32_tma_ws_sm90_kernel", 73984, 384, 64, 0, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 256, 32, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_q_paged_kv_32_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_q_paged_kv_32_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_256_S_q_paged_kv_32_causal_tma_ws_sm90_kernel", 73984, 384, 64, 1, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 256, 32, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_q_paged_kv_32_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_q_paged_kv_32_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_256_S_q_paged_kv_32_sliding_window_causal_tma_ws_sm90_kernel", 73984, 384, 64, 2, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 256, 32, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_q_paged_kv_32_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_q_paged_kv_32_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_256_S_q_paged_kv_32_custom_mask_tma_ws_sm90_kernel", 73984, 384, 64, 3, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 256, 40, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_q_paged_kv_40_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_q_paged_kv_40_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_256_S_q_paged_kv_40_causal_tma_ws_sm90_kernel", 147712, 384, 64, 1, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 256, 40, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_q_paged_kv_40_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_q_paged_kv_40_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_256_S_q_paged_kv_40_sliding_window_causal_tma_ws_sm90_kernel", 147712, 384, 64, 2, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 256, 40, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_q_paged_kv_40_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_q_paged_kv_40_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_256_S_q_paged_kv_40_custom_mask_tma_ws_sm90_kernel", 147712, 384, 64, 3, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 256, 48, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_q_paged_kv_48_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_q_paged_kv_48_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_256_S_q_paged_kv_48_causal_tma_ws_sm90_kernel", 147712, 384, 64, 1, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 256, 48, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_q_paged_kv_48_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_q_paged_kv_48_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_256_S_q_paged_kv_48_sliding_window_causal_tma_ws_sm90_kernel", 147712, 384, 64, 2, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 256, 48, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_q_paged_kv_48_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_q_paged_kv_48_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_256_S_q_paged_kv_48_custom_mask_tma_ws_sm90_kernel", 147712, 384, 64, 3, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 256, 64, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_q_paged_kv_64_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_q_paged_kv_64_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_256_S_q_paged_kv_64_tma_ws_sm90_kernel", 147712, 384, 64, 0, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 256, 64, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_q_paged_kv_64_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_q_paged_kv_64_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_256_S_q_paged_kv_64_causal_tma_ws_sm90_kernel", 147712, 384, 64, 1, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 256, 64, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_q_paged_kv_64_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_q_paged_kv_64_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_256_S_q_paged_kv_64_sliding_window_causal_tma_ws_sm90_kernel", 147712, 384, 64, 2, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 256, 64, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_q_paged_kv_64_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_q_paged_kv_64_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_256_S_q_paged_kv_64_custom_mask_tma_ws_sm90_kernel", 147712, 384, 64, 3, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 80, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_80_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_80_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_80_causal_tma_ws_sm90_kernel", 164096, 384, 64, 1, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 80, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_80_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_80_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_80_sliding_window_causal_tma_ws_sm90_kernel", 164096, 384, 64, 2, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 80, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_80_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_80_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_80_custom_mask_tma_ws_sm90_kernel", 164096, 384, 64, 3, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 96, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_96_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_96_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_96_causal_tma_ws_sm90_kernel", 164096, 384, 64, 1, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 96, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_96_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_96_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_96_sliding_window_causal_tma_ws_sm90_kernel", 164096, 384, 64, 2, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 96, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_96_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_96_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_96_custom_mask_tma_ws_sm90_kernel", 164096, 384, 64, 3, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 104, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_104_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_104_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_104_causal_tma_ws_sm90_kernel", 164096, 384, 64, 1, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 104, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_104_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_104_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_104_sliding_window_causal_tma_ws_sm90_kernel", 164096, 384, 64, 2, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 104, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_104_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_104_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_104_custom_mask_tma_ws_sm90_kernel", 164096, 384, 64, 3, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_tma_ws_sm90_kernel", 164096, 384, 64, 0, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_causal_tma_ws_sm90_kernel", 164096, 384, 64, 1, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_sliding_window_causal_tma_ws_sm90_kernel", 164096, 384, 64, 2, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_custom_mask_tma_ws_sm90_kernel", 164096, 384, 64, 3, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 160, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_160_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_160_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_160_causal_tma_ws_sm90_kernel", 196864, 384, 64, 1, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 160, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_160_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_160_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_160_sliding_window_causal_tma_ws_sm90_kernel", 196864, 384, 64, 2, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 160, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_160_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_160_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_160_custom_mask_tma_ws_sm90_kernel", 196864, 384, 64, 3, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 192, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_192_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_192_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_192_causal_tma_ws_sm90_kernel", 196864, 384, 64, 1, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 192, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_192_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_192_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_192_sliding_window_causal_tma_ws_sm90_kernel", 196864, 384, 64, 2, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 192, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_192_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_192_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_192_custom_mask_tma_ws_sm90_kernel", 196864, 384, 64, 3, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 256, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_256_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_256_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_256_causal_tma_ws_sm90_kernel", 196864, 384, 64, 1, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 256, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_256_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_256_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_256_sliding_window_causal_tma_ws_sm90_kernel", 196864, 384, 64, 2, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 256, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_256_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_256_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_256_custom_mask_tma_ws_sm90_kernel", 196864, 384, 64, 3, 2, false, true, true, true, false, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_qk_tanh_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_qk_tanh_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_qk_tanh_tma_ws_sm90_kernel", 164096, 384, 64, 0, 2, false, true, true, true, false, false, true},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_qk_tanh_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_qk_tanh_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_causal_qk_tanh_tma_ws_sm90_kernel", 164096, 384, 64, 1, 2, false, true, true, true, false, false, true},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_qk_tanh_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_qk_tanh_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_sliding_window_causal_qk_tanh_tma_ws_sm90_kernel", 164096, 384, 64, 2, 2, false, true, true, true, false, false, true},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_qk_tanh_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_qk_tanh_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_custom_mask_qk_tanh_tma_ws_sm90_kernel", 164096, 384, 64, 3, 2, false, true, true, true, false, false, true},
{ DATA_TYPE_FP16, 0, 64, 64, 256, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_256_qk_tanh_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_256_qk_tanh_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_256_causal_qk_tanh_tma_ws_sm90_kernel", 196864, 384, 64, 1, 2, false, true, true, true, false, false, true},
{ DATA_TYPE_FP16, 0, 64, 64, 256, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_256_qk_tanh_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_256_qk_tanh_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_256_sliding_window_causal_qk_tanh_tma_ws_sm90_kernel", 196864, 384, 64, 2, 2, false, true, true, true, false, false, true},
{ DATA_TYPE_FP16, 0, 64, 64, 256, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_256_qk_tanh_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_256_qk_tanh_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_256_custom_mask_qk_tanh_tma_ws_sm90_kernel", 196864, 384, 64, 3, 2, false, true, true, true, false, false, true},
{ DATA_TYPE_FP16, 0, 64, 256, 32, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_qkv_32_alibi_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_qkv_32_alibi_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_256_S_qkv_32_causal_alibi_tma_ws_sm90_kernel", 73984, 384, 64, 1, 0, false, true, true, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 256, 40, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_qkv_40_alibi_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_qkv_40_alibi_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_256_S_qkv_40_causal_alibi_tma_ws_sm90_kernel", 147712, 384, 64, 1, 0, false, true, true, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 256, 48, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_qkv_48_alibi_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_qkv_48_alibi_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_256_S_qkv_48_causal_alibi_tma_ws_sm90_kernel", 147712, 384, 64, 1, 0, false, true, true, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 256, 64, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_qkv_64_alibi_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_qkv_64_alibi_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_256_S_qkv_64_causal_alibi_tma_ws_sm90_kernel", 147712, 384, 64, 1, 0, false, true, true, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 80, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_80_alibi_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_80_alibi_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_80_causal_alibi_tma_ws_sm90_kernel", 164096, 384, 64, 1, 0, false, true, true, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 96, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_96_alibi_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_96_alibi_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_96_causal_alibi_tma_ws_sm90_kernel", 164096, 384, 64, 1, 0, false, true, true, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 104, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_104_alibi_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_104_alibi_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_104_causal_alibi_tma_ws_sm90_kernel", 164096, 384, 64, 1, 0, false, true, true, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_alibi_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_alibi_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_causal_alibi_tma_ws_sm90_kernel", 164096, 384, 64, 1, 0, false, true, true, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 160, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_160_alibi_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_160_alibi_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_160_causal_alibi_tma_ws_sm90_kernel", 196864, 384, 64, 1, 0, false, true, true, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 192, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_192_alibi_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_192_alibi_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_192_causal_alibi_tma_ws_sm90_kernel", 196864, 384, 64, 1, 0, false, true, true, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 256, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_256_alibi_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_256_alibi_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_256_causal_alibi_tma_ws_sm90_kernel", 196864, 384, 64, 1, 0, false, true, true, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_alibi_qk_tanh_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_alibi_qk_tanh_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_causal_alibi_qk_tanh_tma_ws_sm90_kernel", 164096, 384, 64, 1, 0, false, true, true, true, true, false, true},
{ DATA_TYPE_FP16, 0, 64, 64, 256, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_256_alibi_qk_tanh_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_256_alibi_qk_tanh_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_256_causal_alibi_qk_tanh_tma_ws_sm90_kernel", 196864, 384, 64, 1, 0, false, true, true, true, true, false, true},
{ DATA_TYPE_FP16, 0, 64, 256, 32, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_q_paged_kv_32_alibi_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_q_paged_kv_32_alibi_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_256_S_q_paged_kv_32_causal_alibi_tma_ws_sm90_kernel", 73984, 384, 64, 1, 2, false, true, true, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 256, 40, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_q_paged_kv_40_alibi_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_q_paged_kv_40_alibi_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_256_S_q_paged_kv_40_causal_alibi_tma_ws_sm90_kernel", 147712, 384, 64, 1, 2, false, true, true, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 256, 48, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_q_paged_kv_48_alibi_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_q_paged_kv_48_alibi_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_256_S_q_paged_kv_48_causal_alibi_tma_ws_sm90_kernel", 147712, 384, 64, 1, 2, false, true, true, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 256, 64, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_q_paged_kv_64_alibi_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_256_S_q_paged_kv_64_alibi_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_256_S_q_paged_kv_64_causal_alibi_tma_ws_sm90_kernel", 147712, 384, 64, 1, 2, false, true, true, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 80, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_80_alibi_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_80_alibi_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_80_causal_alibi_tma_ws_sm90_kernel", 164096, 384, 64, 1, 2, false, true, true, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 96, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_96_alibi_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_96_alibi_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_96_causal_alibi_tma_ws_sm90_kernel", 164096, 384, 64, 1, 2, false, true, true, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 104, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_104_alibi_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_104_alibi_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_104_causal_alibi_tma_ws_sm90_kernel", 164096, 384, 64, 1, 2, false, true, true, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_alibi_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_alibi_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_causal_alibi_tma_ws_sm90_kernel", 164096, 384, 64, 1, 2, false, true, true, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 160, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_160_alibi_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_160_alibi_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_160_causal_alibi_tma_ws_sm90_kernel", 196864, 384, 64, 1, 2, false, true, true, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 192, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_192_alibi_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_192_alibi_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_192_causal_alibi_tma_ws_sm90_kernel", 196864, 384, 64, 1, 2, false, true, true, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 256, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_256_alibi_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_256_alibi_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_256_causal_alibi_tma_ws_sm90_kernel", 196864, 384, 64, 1, 2, false, true, true, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_alibi_qk_tanh_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_alibi_qk_tanh_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_causal_alibi_qk_tanh_tma_ws_sm90_kernel", 164096, 384, 64, 1, 2, false, true, true, true, true, false, true},
{ DATA_TYPE_FP16, 0, 64, 64, 256, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_256_alibi_qk_tanh_tma_ws_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_256_alibi_qk_tanh_tma_ws_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_256_causal_alibi_qk_tanh_tma_ws_sm90_kernel", 196864, 384, 64, 1, 2, false, true, true, true, true, false, true},
{ DATA_TYPE_FP16, 0, 128, 128, 16, kSM_90,  cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_16_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_16_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_qkv_16_causal_sm90_kernel_nl_tiled", 16384, 128, 128, 1, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 16, kSM_90,  cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_16_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_16_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_qkv_16_sliding_window_causal_sm90_kernel_nl_tiled", 16384, 128, 128, 2, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 16, kSM_90,  cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_16_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_16_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_qkv_16_custom_mask_sm90_kernel_nl_tiled", 16384, 128, 128, 3, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 32, kSM_90,  cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_32_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_32_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_qkv_32_sm90_kernel_nl_tiled", 32768, 128, 128, 0, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 32, kSM_90,  cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_32_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_32_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_qkv_32_causal_sm90_kernel_nl_tiled", 32768, 128, 128, 1, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 32, kSM_90,  cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_32_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_32_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_qkv_32_sliding_window_causal_sm90_kernel_nl_tiled", 32768, 128, 128, 2, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 32, kSM_90,  cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_32_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_32_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_qkv_32_custom_mask_sm90_kernel_nl_tiled", 32768, 128, 128, 3, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 40, kSM_90,  cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_40_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_40_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_qkv_40_causal_sm90_kernel_nl_tiled", 65536, 128, 128, 1, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 40, kSM_90,  cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_40_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_40_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_qkv_40_sliding_window_causal_sm90_kernel_nl_tiled", 65536, 128, 128, 2, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 40, kSM_90,  cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_40_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_40_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_qkv_40_custom_mask_sm90_kernel_nl_tiled", 65536, 128, 128, 3, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 48, kSM_90,  cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_48_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_48_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_qkv_48_causal_sm90_kernel_nl_tiled", 65536, 128, 128, 1, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 48, kSM_90,  cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_48_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_48_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_qkv_48_sliding_window_causal_sm90_kernel_nl_tiled", 65536, 128, 128, 2, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 48, kSM_90,  cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_48_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_48_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_qkv_48_custom_mask_sm90_kernel_nl_tiled", 65536, 128, 128, 3, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 64, kSM_90,  cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_64_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_64_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_qkv_64_sm90_kernel_nl_tiled", 65536, 128, 128, 0, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 64, kSM_90,  cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_64_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_64_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_qkv_64_causal_sm90_kernel_nl_tiled", 65536, 128, 128, 1, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 64, kSM_90,  cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_64_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_64_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_qkv_64_sliding_window_causal_sm90_kernel_nl_tiled", 65536, 128, 128, 2, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 64, kSM_90,  cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_64_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_64_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_qkv_64_custom_mask_sm90_kernel_nl_tiled", 65536, 128, 128, 3, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 80, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_80_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_80_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_80_causal_sm90_kernel_nl_tiled", 81920, 128, 64, 1, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 80, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_80_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_80_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_80_sliding_window_causal_sm90_kernel_nl_tiled", 81920, 128, 64, 2, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 80, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_80_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_80_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_80_custom_mask_sm90_kernel_nl_tiled", 81920, 128, 64, 3, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 96, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_96_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_96_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_96_causal_sm90_kernel_nl_tiled", 81920, 128, 64, 1, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 96, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_96_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_96_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_96_sliding_window_causal_sm90_kernel_nl_tiled", 81920, 128, 64, 2, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 96, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_96_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_96_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_96_custom_mask_sm90_kernel_nl_tiled", 81920, 128, 64, 3, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 104, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_104_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_104_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_104_causal_sm90_kernel_nl_tiled", 81920, 128, 64, 1, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 104, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_104_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_104_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_104_sliding_window_causal_sm90_kernel_nl_tiled", 81920, 128, 64, 2, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 104, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_104_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_104_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_104_custom_mask_sm90_kernel_nl_tiled", 81920, 128, 64, 3, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_128_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_128_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_128_causal_sm90_kernel_nl_tiled", 81920, 128, 64, 1, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_128_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_128_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_128_sliding_window_causal_sm90_kernel_nl_tiled", 81920, 128, 64, 2, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_128_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_128_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_128_custom_mask_sm90_kernel_nl_tiled", 81920, 128, 64, 3, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 160, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_160_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_160_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_160_causal_sm90_kernel_nl_tiled", 81920, 128, 64, 1, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 160, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_160_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_160_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_160_sliding_window_causal_sm90_kernel_nl_tiled", 81920, 128, 64, 2, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 160, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_160_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_160_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_160_custom_mask_sm90_kernel_nl_tiled", 81920, 128, 64, 3, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 192, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_192_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_192_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_192_causal_sm90_kernel_nl_tiled", 81920, 128, 64, 1, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 192, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_192_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_192_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_192_sliding_window_causal_sm90_kernel_nl_tiled", 81920, 128, 64, 2, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 192, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_192_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_192_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_192_custom_mask_sm90_kernel_nl_tiled", 81920, 128, 64, 3, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 256, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_256_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_256_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_256_causal_sm90_kernel_nl_tiled", 81920, 128, 64, 1, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 256, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_256_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_256_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_256_sliding_window_causal_sm90_kernel_nl_tiled", 81920, 128, 64, 2, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 256, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_256_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_256_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_256_custom_mask_sm90_kernel_nl_tiled", 81920, 128, 64, 3, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 64, 16, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_16_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_16_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_64_S_qkv_16_causal_sm90_kernel_nl", 6144, 128, 64, 1, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 16, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_16_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_16_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_64_S_qkv_16_sliding_window_causal_sm90_kernel_nl", 6144, 128, 64, 2, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 16, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_16_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_16_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_64_S_qkv_16_custom_mask_sm90_kernel_nl", 6144, 128, 64, 3, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 32, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_32_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_32_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_64_S_qkv_32_sm90_kernel_nl", 12288, 128, 64, 0, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 32, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_32_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_32_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_64_S_qkv_32_causal_sm90_kernel_nl", 12288, 128, 64, 1, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 32, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_32_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_32_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_64_S_qkv_32_sliding_window_causal_sm90_kernel_nl", 12288, 128, 64, 2, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 32, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_32_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_32_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_64_S_qkv_32_custom_mask_sm90_kernel_nl", 12288, 128, 64, 3, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 40, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_40_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_40_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_40_causal_sm90_kernel_nl", 16384, 128, 64, 1, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 40, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_40_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_40_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_40_sliding_window_causal_sm90_kernel_nl", 16384, 128, 64, 2, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 40, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_40_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_40_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_40_custom_mask_sm90_kernel_nl", 16384, 128, 64, 3, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 48, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_48_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_48_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_48_causal_sm90_kernel_nl", 16384, 128, 64, 1, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 48, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_48_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_48_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_48_sliding_window_causal_sm90_kernel_nl", 16384, 128, 64, 2, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 48, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_48_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_48_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_48_custom_mask_sm90_kernel_nl", 16384, 128, 64, 3, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 64, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_64_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_64_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_64_sm90_kernel_nl", 16384, 128, 64, 0, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 64, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_64_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_64_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_64_causal_sm90_kernel_nl", 16384, 128, 64, 1, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 64, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_64_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_64_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_64_sliding_window_causal_sm90_kernel_nl", 16384, 128, 64, 2, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 64, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_64_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_64_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_64_custom_mask_sm90_kernel_nl", 16384, 128, 64, 3, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 80, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_80_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_80_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_80_causal_sm90_kernel_nl", 32768, 128, 64, 1, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 80, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_80_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_80_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_80_sliding_window_causal_sm90_kernel_nl", 32768, 128, 64, 2, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 80, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_80_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_80_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_80_custom_mask_sm90_kernel_nl", 32768, 128, 64, 3, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 96, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_96_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_96_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_96_causal_sm90_kernel_nl", 32768, 128, 64, 1, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 96, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_96_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_96_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_96_sliding_window_causal_sm90_kernel_nl", 32768, 128, 64, 2, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 96, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_96_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_96_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_96_custom_mask_sm90_kernel_nl", 32768, 128, 64, 3, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 104, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_104_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_104_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_104_causal_sm90_kernel_nl", 32768, 128, 64, 1, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 104, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_104_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_104_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_104_sliding_window_causal_sm90_kernel_nl", 32768, 128, 64, 2, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 104, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_104_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_104_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_104_custom_mask_sm90_kernel_nl", 32768, 128, 64, 3, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_128_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_128_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_128_causal_sm90_kernel_nl", 32768, 128, 64, 1, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_128_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_128_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_128_sliding_window_causal_sm90_kernel_nl", 32768, 128, 64, 2, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_128_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_128_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_128_custom_mask_sm90_kernel_nl", 32768, 128, 64, 3, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 160, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_160_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_160_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_16_S_qkv_160_causal_sm90_kernel_nl", 49152, 128, 64, 1, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 160, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_160_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_160_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_16_S_qkv_160_sliding_window_causal_sm90_kernel_nl", 49152, 128, 64, 2, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 160, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_160_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_160_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_16_S_qkv_160_custom_mask_sm90_kernel_nl", 49152, 128, 64, 3, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 192, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_192_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_192_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_16_S_qkv_192_causal_sm90_kernel_nl", 49152, 128, 64, 1, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 192, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_192_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_192_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_16_S_qkv_192_sliding_window_causal_sm90_kernel_nl", 49152, 128, 64, 2, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 192, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_192_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_192_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_16_S_qkv_192_custom_mask_sm90_kernel_nl", 49152, 128, 64, 3, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 256, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_256_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_256_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_16_S_qkv_256_causal_sm90_kernel_nl", 49152, 128, 64, 1, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 256, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_256_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_256_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_16_S_qkv_256_sliding_window_causal_sm90_kernel_nl", 49152, 128, 64, 2, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 256, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_256_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_256_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_16_S_qkv_256_custom_mask_sm90_kernel_nl", 49152, 128, 64, 3, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_128_qk_tanh_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_128_qk_tanh_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_128_causal_qk_tanh_sm90_kernel_nl_tiled", 81920, 128, 64, 1, 0, false, true, false, false, true, true, true},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_128_qk_tanh_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_128_qk_tanh_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_128_sliding_window_causal_qk_tanh_sm90_kernel_nl_tiled", 81920, 128, 64, 2, 0, false, true, false, false, true, true, true},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_128_qk_tanh_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_128_qk_tanh_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_128_custom_mask_qk_tanh_sm90_kernel_nl_tiled", 81920, 128, 64, 3, 0, false, true, false, false, true, true, true},
{ DATA_TYPE_FP16, 0, 64, 128, 256, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_256_qk_tanh_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_256_qk_tanh_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_256_causal_qk_tanh_sm90_kernel_nl_tiled", 81920, 128, 64, 1, 0, false, true, false, false, true, true, true},
{ DATA_TYPE_FP16, 0, 64, 128, 256, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_256_qk_tanh_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_256_qk_tanh_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_256_sliding_window_causal_qk_tanh_sm90_kernel_nl_tiled", 81920, 128, 64, 2, 0, false, true, false, false, true, true, true},
{ DATA_TYPE_FP16, 0, 64, 128, 256, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_256_qk_tanh_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_256_qk_tanh_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_256_custom_mask_qk_tanh_sm90_kernel_nl_tiled", 81920, 128, 64, 3, 0, false, true, false, false, true, true, true},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_128_qk_tanh_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_128_qk_tanh_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_128_causal_qk_tanh_sm90_kernel_nl", 32768, 128, 64, 1, 0, false, true, false, false, true, false, true},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_128_qk_tanh_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_128_qk_tanh_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_128_sliding_window_causal_qk_tanh_sm90_kernel_nl", 32768, 128, 64, 2, 0, false, true, false, false, true, false, true},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_128_qk_tanh_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_128_qk_tanh_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_128_custom_mask_qk_tanh_sm90_kernel_nl", 32768, 128, 64, 3, 0, false, true, false, false, true, false, true},
{ DATA_TYPE_FP16, 0, 64, 16, 256, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_256_qk_tanh_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_256_qk_tanh_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_16_S_qkv_256_causal_qk_tanh_sm90_kernel_nl", 49152, 128, 64, 1, 0, false, true, false, false, true, false, true},
{ DATA_TYPE_FP16, 0, 64, 16, 256, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_256_qk_tanh_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_256_qk_tanh_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_16_S_qkv_256_sliding_window_causal_qk_tanh_sm90_kernel_nl", 49152, 128, 64, 2, 0, false, true, false, false, true, false, true},
{ DATA_TYPE_FP16, 0, 64, 16, 256, kSM_90,  cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_256_qk_tanh_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_256_qk_tanh_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_16_S_qkv_256_custom_mask_qk_tanh_sm90_kernel_nl", 49152, 128, 64, 3, 0, false, true, false, false, true, false, true},
{ DATA_TYPE_BF16, 0, 128, 128, 16, kSM_90,  cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_16_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_16_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_qkv_16_causal_sm90_kernel_nl_tiled", 16384, 128, 128, 1, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 16, kSM_90,  cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_16_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_16_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_qkv_16_sliding_window_causal_sm90_kernel_nl_tiled", 16384, 128, 128, 2, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 16, kSM_90,  cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_16_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_16_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_qkv_16_custom_mask_sm90_kernel_nl_tiled", 16384, 128, 128, 3, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 32, kSM_90,  cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_32_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_32_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_qkv_32_sm90_kernel_nl_tiled", 32768, 128, 128, 0, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 32, kSM_90,  cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_32_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_32_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_qkv_32_causal_sm90_kernel_nl_tiled", 32768, 128, 128, 1, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 32, kSM_90,  cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_32_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_32_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_qkv_32_sliding_window_causal_sm90_kernel_nl_tiled", 32768, 128, 128, 2, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 32, kSM_90,  cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_32_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_32_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_qkv_32_custom_mask_sm90_kernel_nl_tiled", 32768, 128, 128, 3, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 40, kSM_90,  cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_40_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_40_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_qkv_40_causal_sm90_kernel_nl_tiled", 65536, 128, 128, 1, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 40, kSM_90,  cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_40_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_40_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_qkv_40_sliding_window_causal_sm90_kernel_nl_tiled", 65536, 128, 128, 2, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 40, kSM_90,  cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_40_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_40_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_qkv_40_custom_mask_sm90_kernel_nl_tiled", 65536, 128, 128, 3, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 48, kSM_90,  cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_48_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_48_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_qkv_48_causal_sm90_kernel_nl_tiled", 65536, 128, 128, 1, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 48, kSM_90,  cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_48_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_48_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_qkv_48_sliding_window_causal_sm90_kernel_nl_tiled", 65536, 128, 128, 2, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 48, kSM_90,  cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_48_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_48_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_qkv_48_custom_mask_sm90_kernel_nl_tiled", 65536, 128, 128, 3, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 64, kSM_90,  cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_64_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_64_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_qkv_64_sm90_kernel_nl_tiled", 65536, 128, 128, 0, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 64, kSM_90,  cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_64_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_64_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_qkv_64_causal_sm90_kernel_nl_tiled", 65536, 128, 128, 1, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 64, kSM_90,  cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_64_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_64_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_qkv_64_sliding_window_causal_sm90_kernel_nl_tiled", 65536, 128, 128, 2, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 64, kSM_90,  cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_64_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_64_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_qkv_64_custom_mask_sm90_kernel_nl_tiled", 65536, 128, 128, 3, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 80, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_80_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_80_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_80_causal_sm90_kernel_nl_tiled", 81920, 128, 64, 1, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 80, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_80_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_80_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_80_sliding_window_causal_sm90_kernel_nl_tiled", 81920, 128, 64, 2, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 80, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_80_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_80_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_80_custom_mask_sm90_kernel_nl_tiled", 81920, 128, 64, 3, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 96, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_96_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_96_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_96_causal_sm90_kernel_nl_tiled", 81920, 128, 64, 1, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 96, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_96_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_96_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_96_sliding_window_causal_sm90_kernel_nl_tiled", 81920, 128, 64, 2, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 96, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_96_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_96_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_96_custom_mask_sm90_kernel_nl_tiled", 81920, 128, 64, 3, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 104, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_104_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_104_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_104_causal_sm90_kernel_nl_tiled", 81920, 128, 64, 1, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 104, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_104_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_104_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_104_sliding_window_causal_sm90_kernel_nl_tiled", 81920, 128, 64, 2, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 104, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_104_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_104_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_104_custom_mask_sm90_kernel_nl_tiled", 81920, 128, 64, 3, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 128, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_128_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_128_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_128_causal_sm90_kernel_nl_tiled", 81920, 128, 64, 1, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 128, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_128_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_128_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_128_sliding_window_causal_sm90_kernel_nl_tiled", 81920, 128, 64, 2, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 128, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_128_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_128_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_128_custom_mask_sm90_kernel_nl_tiled", 81920, 128, 64, 3, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 160, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_160_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_160_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_160_causal_sm90_kernel_nl_tiled", 81920, 128, 64, 1, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 160, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_160_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_160_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_160_sliding_window_causal_sm90_kernel_nl_tiled", 81920, 128, 64, 2, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 160, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_160_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_160_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_160_custom_mask_sm90_kernel_nl_tiled", 81920, 128, 64, 3, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 192, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_192_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_192_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_192_causal_sm90_kernel_nl_tiled", 81920, 128, 64, 1, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 192, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_192_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_192_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_192_sliding_window_causal_sm90_kernel_nl_tiled", 81920, 128, 64, 2, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 192, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_192_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_192_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_192_custom_mask_sm90_kernel_nl_tiled", 81920, 128, 64, 3, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 256, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_256_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_256_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_256_causal_sm90_kernel_nl_tiled", 81920, 128, 64, 1, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 256, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_256_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_256_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_256_sliding_window_causal_sm90_kernel_nl_tiled", 81920, 128, 64, 2, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 256, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_256_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_256_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_256_custom_mask_sm90_kernel_nl_tiled", 81920, 128, 64, 3, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 64, 16, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_16_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_16_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_64_S_qkv_16_causal_sm90_kernel_nl", 6144, 128, 64, 1, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 64, 16, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_16_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_16_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_64_S_qkv_16_sliding_window_causal_sm90_kernel_nl", 6144, 128, 64, 2, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 64, 16, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_16_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_16_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_64_S_qkv_16_custom_mask_sm90_kernel_nl", 6144, 128, 64, 3, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 64, 32, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_32_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_32_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_64_S_qkv_32_sm90_kernel_nl", 12288, 128, 64, 0, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 64, 32, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_32_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_32_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_64_S_qkv_32_causal_sm90_kernel_nl", 12288, 128, 64, 1, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 64, 32, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_32_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_32_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_64_S_qkv_32_sliding_window_causal_sm90_kernel_nl", 12288, 128, 64, 2, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 64, 32, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_32_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_32_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_64_S_qkv_32_custom_mask_sm90_kernel_nl", 12288, 128, 64, 3, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 40, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_40_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_40_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_qkv_40_causal_sm90_kernel_nl", 16384, 128, 64, 1, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 40, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_40_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_40_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_qkv_40_sliding_window_causal_sm90_kernel_nl", 16384, 128, 64, 2, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 40, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_40_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_40_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_qkv_40_custom_mask_sm90_kernel_nl", 16384, 128, 64, 3, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 48, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_48_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_48_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_qkv_48_causal_sm90_kernel_nl", 16384, 128, 64, 1, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 48, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_48_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_48_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_qkv_48_sliding_window_causal_sm90_kernel_nl", 16384, 128, 64, 2, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 48, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_48_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_48_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_qkv_48_custom_mask_sm90_kernel_nl", 16384, 128, 64, 3, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 64, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_64_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_64_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_qkv_64_sm90_kernel_nl", 16384, 128, 64, 0, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 64, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_64_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_64_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_qkv_64_causal_sm90_kernel_nl", 16384, 128, 64, 1, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 64, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_64_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_64_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_qkv_64_sliding_window_causal_sm90_kernel_nl", 16384, 128, 64, 2, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 64, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_64_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_64_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_qkv_64_custom_mask_sm90_kernel_nl", 16384, 128, 64, 3, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 80, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_80_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_80_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_qkv_80_causal_sm90_kernel_nl", 32768, 128, 64, 1, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 80, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_80_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_80_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_qkv_80_sliding_window_causal_sm90_kernel_nl", 32768, 128, 64, 2, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 80, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_80_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_80_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_qkv_80_custom_mask_sm90_kernel_nl", 32768, 128, 64, 3, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 96, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_96_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_96_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_qkv_96_causal_sm90_kernel_nl", 32768, 128, 64, 1, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 96, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_96_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_96_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_qkv_96_sliding_window_causal_sm90_kernel_nl", 32768, 128, 64, 2, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 96, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_96_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_96_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_qkv_96_custom_mask_sm90_kernel_nl", 32768, 128, 64, 3, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 104, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_104_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_104_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_qkv_104_causal_sm90_kernel_nl", 32768, 128, 64, 1, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 104, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_104_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_104_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_qkv_104_sliding_window_causal_sm90_kernel_nl", 32768, 128, 64, 2, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 104, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_104_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_104_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_qkv_104_custom_mask_sm90_kernel_nl", 32768, 128, 64, 3, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 128, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_128_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_128_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_qkv_128_causal_sm90_kernel_nl", 32768, 128, 64, 1, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 128, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_128_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_128_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_qkv_128_sliding_window_causal_sm90_kernel_nl", 32768, 128, 64, 2, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 128, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_128_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_128_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_qkv_128_custom_mask_sm90_kernel_nl", 32768, 128, 64, 3, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 16, 160, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_160_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_160_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_16_S_qkv_160_causal_sm90_kernel_nl", 49152, 128, 64, 1, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 16, 160, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_160_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_160_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_16_S_qkv_160_sliding_window_causal_sm90_kernel_nl", 49152, 128, 64, 2, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 16, 160, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_160_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_160_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_16_S_qkv_160_custom_mask_sm90_kernel_nl", 49152, 128, 64, 3, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 16, 192, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_192_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_192_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_16_S_qkv_192_causal_sm90_kernel_nl", 49152, 128, 64, 1, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 16, 192, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_192_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_192_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_16_S_qkv_192_sliding_window_causal_sm90_kernel_nl", 49152, 128, 64, 2, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 16, 192, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_192_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_192_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_16_S_qkv_192_custom_mask_sm90_kernel_nl", 49152, 128, 64, 3, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 16, 256, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_256_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_256_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_16_S_qkv_256_causal_sm90_kernel_nl", 49152, 128, 64, 1, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 16, 256, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_256_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_256_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_16_S_qkv_256_sliding_window_causal_sm90_kernel_nl", 49152, 128, 64, 2, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 16, 256, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_256_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_256_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_16_S_qkv_256_custom_mask_sm90_kernel_nl", 49152, 128, 64, 3, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 128, 128, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_128_qk_tanh_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_128_qk_tanh_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_128_causal_qk_tanh_sm90_kernel_nl_tiled", 81920, 128, 64, 1, 0, false, true, false, true, true, true, true},
{ DATA_TYPE_BF16, 0, 64, 128, 128, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_128_qk_tanh_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_128_qk_tanh_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_128_sliding_window_causal_qk_tanh_sm90_kernel_nl_tiled", 81920, 128, 64, 2, 0, false, true, false, true, true, true, true},
{ DATA_TYPE_BF16, 0, 64, 128, 128, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_128_qk_tanh_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_128_qk_tanh_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_128_custom_mask_qk_tanh_sm90_kernel_nl_tiled", 81920, 128, 64, 3, 0, false, true, false, true, true, true, true},
{ DATA_TYPE_BF16, 0, 64, 128, 256, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_256_qk_tanh_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_256_qk_tanh_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_256_causal_qk_tanh_sm90_kernel_nl_tiled", 81920, 128, 64, 1, 0, false, true, false, true, true, true, true},
{ DATA_TYPE_BF16, 0, 64, 128, 256, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_256_qk_tanh_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_256_qk_tanh_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_256_sliding_window_causal_qk_tanh_sm90_kernel_nl_tiled", 81920, 128, 64, 2, 0, false, true, false, true, true, true, true},
{ DATA_TYPE_BF16, 0, 64, 128, 256, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_256_qk_tanh_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_256_qk_tanh_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_256_custom_mask_qk_tanh_sm90_kernel_nl_tiled", 81920, 128, 64, 3, 0, false, true, false, true, true, true, true},
{ DATA_TYPE_BF16, 0, 64, 32, 128, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_128_qk_tanh_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_128_qk_tanh_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_qkv_128_causal_qk_tanh_sm90_kernel_nl", 32768, 128, 64, 1, 0, false, true, false, true, true, false, true},
{ DATA_TYPE_BF16, 0, 64, 32, 128, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_128_qk_tanh_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_128_qk_tanh_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_qkv_128_sliding_window_causal_qk_tanh_sm90_kernel_nl", 32768, 128, 64, 2, 0, false, true, false, true, true, false, true},
{ DATA_TYPE_BF16, 0, 64, 32, 128, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_128_qk_tanh_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_128_qk_tanh_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_qkv_128_custom_mask_qk_tanh_sm90_kernel_nl", 32768, 128, 64, 3, 0, false, true, false, true, true, false, true},
{ DATA_TYPE_BF16, 0, 64, 16, 256, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_256_qk_tanh_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_256_qk_tanh_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_16_S_qkv_256_causal_qk_tanh_sm90_kernel_nl", 49152, 128, 64, 1, 0, false, true, false, true, true, false, true},
{ DATA_TYPE_BF16, 0, 64, 16, 256, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_256_qk_tanh_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_256_qk_tanh_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_16_S_qkv_256_sliding_window_causal_qk_tanh_sm90_kernel_nl", 49152, 128, 64, 2, 0, false, true, false, true, true, false, true},
{ DATA_TYPE_BF16, 0, 64, 16, 256, kSM_90,  cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_256_qk_tanh_sm90_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_256_qk_tanh_sm90_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_16_S_qkv_256_custom_mask_qk_tanh_sm90_kernel_nl", 49152, 128, 64, 3, 0, false, true, false, true, true, false, true},
{ DATA_TYPE_FP16, 0, 128, 128, 16, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_16_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_16_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_16_causal_sm90_kernel_nl_tiled", 16384, 128, 128, 1, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 16, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_16_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_16_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_16_sliding_window_causal_sm90_kernel_nl_tiled", 16384, 128, 128, 2, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 16, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_16_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_16_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_16_custom_mask_sm90_kernel_nl_tiled", 16384, 128, 128, 3, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 32, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_32_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_32_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_32_sm90_kernel_nl_tiled", 32768, 128, 128, 0, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 32, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_32_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_32_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_32_causal_sm90_kernel_nl_tiled", 32768, 128, 128, 1, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 32, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_32_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_32_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_32_sliding_window_causal_sm90_kernel_nl_tiled", 32768, 128, 128, 2, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 32, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_32_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_32_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_32_custom_mask_sm90_kernel_nl_tiled", 32768, 128, 128, 3, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 40, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_40_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_40_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_40_causal_sm90_kernel_nl_tiled", 65536, 128, 128, 1, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 40, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_40_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_40_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_40_sliding_window_causal_sm90_kernel_nl_tiled", 65536, 128, 128, 2, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 40, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_40_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_40_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_40_custom_mask_sm90_kernel_nl_tiled", 65536, 128, 128, 3, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 48, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_48_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_48_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_48_causal_sm90_kernel_nl_tiled", 65536, 128, 128, 1, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 48, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_48_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_48_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_48_sliding_window_causal_sm90_kernel_nl_tiled", 65536, 128, 128, 2, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 48, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_48_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_48_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_48_custom_mask_sm90_kernel_nl_tiled", 65536, 128, 128, 3, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 64, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_64_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_64_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_64_sm90_kernel_nl_tiled", 65536, 128, 128, 0, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 64, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_64_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_64_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_64_causal_sm90_kernel_nl_tiled", 65536, 128, 128, 1, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 64, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_64_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_64_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_64_sliding_window_causal_sm90_kernel_nl_tiled", 65536, 128, 128, 2, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 64, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_64_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_64_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_64_custom_mask_sm90_kernel_nl_tiled", 65536, 128, 128, 3, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 80, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_80_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_80_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_80_causal_sm90_kernel_nl_tiled", 81920, 128, 64, 1, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 80, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_80_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_80_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_80_sliding_window_causal_sm90_kernel_nl_tiled", 81920, 128, 64, 2, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 80, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_80_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_80_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_80_custom_mask_sm90_kernel_nl_tiled", 81920, 128, 64, 3, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 96, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_96_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_96_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_96_causal_sm90_kernel_nl_tiled", 81920, 128, 64, 1, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 96, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_96_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_96_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_96_sliding_window_causal_sm90_kernel_nl_tiled", 81920, 128, 64, 2, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 96, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_96_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_96_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_96_custom_mask_sm90_kernel_nl_tiled", 81920, 128, 64, 3, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 104, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_104_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_104_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_104_causal_sm90_kernel_nl_tiled", 81920, 128, 64, 1, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 104, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_104_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_104_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_104_sliding_window_causal_sm90_kernel_nl_tiled", 81920, 128, 64, 2, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 104, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_104_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_104_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_104_custom_mask_sm90_kernel_nl_tiled", 81920, 128, 64, 3, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_causal_sm90_kernel_nl_tiled", 81920, 128, 64, 1, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_sliding_window_causal_sm90_kernel_nl_tiled", 81920, 128, 64, 2, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_custom_mask_sm90_kernel_nl_tiled", 81920, 128, 64, 3, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 160, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_160_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_160_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_160_causal_sm90_kernel_nl_tiled", 81920, 128, 64, 1, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 160, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_160_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_160_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_160_sliding_window_causal_sm90_kernel_nl_tiled", 81920, 128, 64, 2, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 160, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_160_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_160_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_160_custom_mask_sm90_kernel_nl_tiled", 81920, 128, 64, 3, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 192, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_192_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_192_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_192_causal_sm90_kernel_nl_tiled", 81920, 128, 64, 1, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 192, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_192_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_192_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_192_sliding_window_causal_sm90_kernel_nl_tiled", 81920, 128, 64, 2, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 192, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_192_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_192_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_192_custom_mask_sm90_kernel_nl_tiled", 81920, 128, 64, 3, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 256, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_256_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_256_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_256_causal_sm90_kernel_nl_tiled", 81920, 128, 64, 1, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 256, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_256_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_256_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_256_sliding_window_causal_sm90_kernel_nl_tiled", 81920, 128, 64, 2, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 256, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_256_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_256_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_256_custom_mask_sm90_kernel_nl_tiled", 81920, 128, 64, 3, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 64, 16, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_16_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_16_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_16_causal_sm90_kernel_nl", 6144, 128, 64, 1, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 16, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_16_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_16_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_16_sliding_window_causal_sm90_kernel_nl", 6144, 128, 64, 2, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 16, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_16_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_16_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_16_custom_mask_sm90_kernel_nl", 6144, 128, 64, 3, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 32, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_32_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_32_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_32_sm90_kernel_nl", 12288, 128, 64, 0, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 32, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_32_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_32_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_32_causal_sm90_kernel_nl", 12288, 128, 64, 1, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 32, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_32_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_32_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_32_sliding_window_causal_sm90_kernel_nl", 12288, 128, 64, 2, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 32, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_32_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_32_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_32_custom_mask_sm90_kernel_nl", 12288, 128, 64, 3, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 40, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_40_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_40_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_40_causal_sm90_kernel_nl", 16384, 128, 64, 1, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 40, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_40_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_40_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_40_sliding_window_causal_sm90_kernel_nl", 16384, 128, 64, 2, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 40, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_40_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_40_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_40_custom_mask_sm90_kernel_nl", 16384, 128, 64, 3, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 48, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_48_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_48_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_48_causal_sm90_kernel_nl", 16384, 128, 64, 1, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 48, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_48_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_48_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_48_sliding_window_causal_sm90_kernel_nl", 16384, 128, 64, 2, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 48, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_48_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_48_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_48_custom_mask_sm90_kernel_nl", 16384, 128, 64, 3, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 64, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_64_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_64_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_64_sm90_kernel_nl", 16384, 128, 64, 0, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 64, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_64_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_64_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_64_causal_sm90_kernel_nl", 16384, 128, 64, 1, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 64, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_64_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_64_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_64_sliding_window_causal_sm90_kernel_nl", 16384, 128, 64, 2, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 64, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_64_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_64_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_64_custom_mask_sm90_kernel_nl", 16384, 128, 64, 3, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 80, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_80_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_80_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_80_causal_sm90_kernel_nl", 32768, 128, 64, 1, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 80, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_80_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_80_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_80_sliding_window_causal_sm90_kernel_nl", 32768, 128, 64, 2, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 80, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_80_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_80_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_80_custom_mask_sm90_kernel_nl", 32768, 128, 64, 3, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 96, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_96_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_96_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_96_causal_sm90_kernel_nl", 32768, 128, 64, 1, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 96, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_96_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_96_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_96_sliding_window_causal_sm90_kernel_nl", 32768, 128, 64, 2, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 96, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_96_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_96_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_96_custom_mask_sm90_kernel_nl", 32768, 128, 64, 3, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 104, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_104_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_104_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_104_causal_sm90_kernel_nl", 32768, 128, 64, 1, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 104, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_104_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_104_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_104_sliding_window_causal_sm90_kernel_nl", 32768, 128, 64, 2, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 104, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_104_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_104_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_104_custom_mask_sm90_kernel_nl", 32768, 128, 64, 3, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_128_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_128_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_128_causal_sm90_kernel_nl", 32768, 128, 64, 1, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_128_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_128_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_128_sliding_window_causal_sm90_kernel_nl", 32768, 128, 64, 2, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_128_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_128_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_128_custom_mask_sm90_kernel_nl", 32768, 128, 64, 3, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 160, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_160_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_160_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_160_causal_sm90_kernel_nl", 49152, 128, 64, 1, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 160, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_160_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_160_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_160_sliding_window_causal_sm90_kernel_nl", 49152, 128, 64, 2, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 160, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_160_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_160_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_160_custom_mask_sm90_kernel_nl", 49152, 128, 64, 3, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 192, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_192_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_192_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_192_causal_sm90_kernel_nl", 49152, 128, 64, 1, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 192, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_192_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_192_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_192_sliding_window_causal_sm90_kernel_nl", 49152, 128, 64, 2, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 192, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_192_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_192_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_192_custom_mask_sm90_kernel_nl", 49152, 128, 64, 3, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 256, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_256_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_256_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_256_causal_sm90_kernel_nl", 49152, 128, 64, 1, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 256, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_256_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_256_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_256_sliding_window_causal_sm90_kernel_nl", 49152, 128, 64, 2, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 256, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_256_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_256_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_256_custom_mask_sm90_kernel_nl", 49152, 128, 64, 3, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_qk_tanh_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_qk_tanh_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_causal_qk_tanh_sm90_kernel_nl_tiled", 81920, 128, 64, 1, 0, false, true, false, true, true, true, true},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_qk_tanh_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_qk_tanh_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_sliding_window_causal_qk_tanh_sm90_kernel_nl_tiled", 81920, 128, 64, 2, 0, false, true, false, true, true, true, true},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_qk_tanh_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_qk_tanh_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_custom_mask_qk_tanh_sm90_kernel_nl_tiled", 81920, 128, 64, 3, 0, false, true, false, true, true, true, true},
{ DATA_TYPE_FP16, 0, 64, 128, 256, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_256_qk_tanh_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_256_qk_tanh_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_256_causal_qk_tanh_sm90_kernel_nl_tiled", 81920, 128, 64, 1, 0, false, true, false, true, true, true, true},
{ DATA_TYPE_FP16, 0, 64, 128, 256, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_256_qk_tanh_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_256_qk_tanh_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_256_sliding_window_causal_qk_tanh_sm90_kernel_nl_tiled", 81920, 128, 64, 2, 0, false, true, false, true, true, true, true},
{ DATA_TYPE_FP16, 0, 64, 128, 256, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_256_qk_tanh_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_256_qk_tanh_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_256_custom_mask_qk_tanh_sm90_kernel_nl_tiled", 81920, 128, 64, 3, 0, false, true, false, true, true, true, true},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_128_qk_tanh_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_128_qk_tanh_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_128_causal_qk_tanh_sm90_kernel_nl", 32768, 128, 64, 1, 0, false, true, false, true, true, false, true},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_128_qk_tanh_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_128_qk_tanh_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_128_sliding_window_causal_qk_tanh_sm90_kernel_nl", 32768, 128, 64, 2, 0, false, true, false, true, true, false, true},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_128_qk_tanh_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_128_qk_tanh_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_128_custom_mask_qk_tanh_sm90_kernel_nl", 32768, 128, 64, 3, 0, false, true, false, true, true, false, true},
{ DATA_TYPE_FP16, 0, 64, 16, 256, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_256_qk_tanh_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_256_qk_tanh_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_256_causal_qk_tanh_sm90_kernel_nl", 49152, 128, 64, 1, 0, false, true, false, true, true, false, true},
{ DATA_TYPE_FP16, 0, 64, 16, 256, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_256_qk_tanh_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_256_qk_tanh_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_256_sliding_window_causal_qk_tanh_sm90_kernel_nl", 49152, 128, 64, 2, 0, false, true, false, true, true, false, true},
{ DATA_TYPE_FP16, 0, 64, 16, 256, kSM_90,  cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_256_qk_tanh_sm90_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_256_qk_tanh_sm90_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_256_custom_mask_qk_tanh_sm90_kernel_nl", 49152, 128, 64, 3, 0, false, true, false, true, true, false, true},
#endif

#ifndef EXCLUDE_SM_89
{ DATA_TYPE_E4M3, 0, 128, 128, 32, kSM_89,  cubin_fmha_v2_flash_attention_e4m3_fp32_128_128_S_qkv_32_sm89_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_fp32_128_128_S_qkv_32_sm89_cu_cubin_len, "fmha_v2_flash_attention_e4m3_fp32_128_128_S_qkv_32_sm89_kernel_nl", 12288, 128, 128, 0, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 128, 128, 32, kSM_89,  cubin_fmha_v2_flash_attention_e4m3_fp32_128_128_S_qkv_32_sm89_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_fp32_128_128_S_qkv_32_sm89_cu_cubin_len, "fmha_v2_flash_attention_e4m3_fp32_128_128_S_qkv_32_causal_sm89_kernel_nl", 12288, 128, 128, 1, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 128, 128, 32, kSM_89,  cubin_fmha_v2_flash_attention_e4m3_fp32_128_128_S_qkv_32_sm89_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_fp32_128_128_S_qkv_32_sm89_cu_cubin_len, "fmha_v2_flash_attention_e4m3_fp32_128_128_S_qkv_32_sliding_window_causal_sm89_kernel_nl", 12288, 128, 128, 2, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 128, 128, 32, kSM_89,  cubin_fmha_v2_flash_attention_e4m3_fp32_128_128_S_qkv_32_sm89_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_fp32_128_128_S_qkv_32_sm89_cu_cubin_len, "fmha_v2_flash_attention_e4m3_fp32_128_128_S_qkv_32_custom_mask_sm89_kernel_nl", 12288, 128, 128, 3, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 128, 128, 32, kSM_89,  cubin_fmha_v2_flash_attention_e4m3_fp32_128_128_S_q_kv_32_sm89_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_fp32_128_128_S_q_kv_32_sm89_cu_cubin_len, "fmha_v2_flash_attention_e4m3_fp32_128_128_S_q_kv_32_sm89_kernel_nl", 12288, 128, 128, 0, 1, false, true, false, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 128, 128, 32, kSM_89,  cubin_fmha_v2_flash_attention_e4m3_fp32_128_128_S_q_kv_32_sm89_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_fp32_128_128_S_q_kv_32_sm89_cu_cubin_len, "fmha_v2_flash_attention_e4m3_fp32_128_128_S_q_kv_32_custom_mask_sm89_kernel_nl", 12288, 128, 128, 3, 1, false, true, false, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 128, 128, 32, kSM_89,  cubin_fmha_v2_flash_attention_e4m3_fp32_128_128_S_q_paged_kv_32_sm89_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_fp32_128_128_S_q_paged_kv_32_sm89_cu_cubin_len, "fmha_v2_flash_attention_e4m3_fp32_128_128_S_q_paged_kv_32_sm89_kernel_nl", 12288, 128, 128, 0, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 128, 128, 32, kSM_89,  cubin_fmha_v2_flash_attention_e4m3_fp32_128_128_S_q_paged_kv_32_sm89_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_fp32_128_128_S_q_paged_kv_32_sm89_cu_cubin_len, "fmha_v2_flash_attention_e4m3_fp32_128_128_S_q_paged_kv_32_causal_sm89_kernel_nl", 12288, 128, 128, 1, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 128, 128, 32, kSM_89,  cubin_fmha_v2_flash_attention_e4m3_fp32_128_128_S_q_paged_kv_32_sm89_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_fp32_128_128_S_q_paged_kv_32_sm89_cu_cubin_len, "fmha_v2_flash_attention_e4m3_fp32_128_128_S_q_paged_kv_32_sliding_window_causal_sm89_kernel_nl", 12288, 128, 128, 2, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 128, 128, 32, kSM_89,  cubin_fmha_v2_flash_attention_e4m3_fp32_128_128_S_q_paged_kv_32_sm89_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_fp32_128_128_S_q_paged_kv_32_sm89_cu_cubin_len, "fmha_v2_flash_attention_e4m3_fp32_128_128_S_q_paged_kv_32_custom_mask_sm89_kernel_nl", 12288, 128, 128, 3, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 128, 128, 40, kSM_89,  cubin_fmha_v2_flash_attention_e4m3_fp32_128_128_S_qkv_40_sm89_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_fp32_128_128_S_qkv_40_sm89_cu_cubin_len, "fmha_v2_flash_attention_e4m3_fp32_128_128_S_qkv_40_causal_sm89_kernel_nl", 24576, 128, 128, 1, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 128, 128, 40, kSM_89,  cubin_fmha_v2_flash_attention_e4m3_fp32_128_128_S_qkv_40_sm89_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_fp32_128_128_S_qkv_40_sm89_cu_cubin_len, "fmha_v2_flash_attention_e4m3_fp32_128_128_S_qkv_40_sliding_window_causal_sm89_kernel_nl", 24576, 128, 128, 2, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 128, 128, 40, kSM_89,  cubin_fmha_v2_flash_attention_e4m3_fp32_128_128_S_qkv_40_sm89_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_fp32_128_128_S_qkv_40_sm89_cu_cubin_len, "fmha_v2_flash_attention_e4m3_fp32_128_128_S_qkv_40_custom_mask_sm89_kernel_nl", 24576, 128, 128, 3, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 128, 128, 40, kSM_89,  cubin_fmha_v2_flash_attention_e4m3_fp32_128_128_S_q_paged_kv_40_sm89_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_fp32_128_128_S_q_paged_kv_40_sm89_cu_cubin_len, "fmha_v2_flash_attention_e4m3_fp32_128_128_S_q_paged_kv_40_causal_sm89_kernel_nl", 24576, 128, 128, 1, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 128, 128, 40, kSM_89,  cubin_fmha_v2_flash_attention_e4m3_fp32_128_128_S_q_paged_kv_40_sm89_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_fp32_128_128_S_q_paged_kv_40_sm89_cu_cubin_len, "fmha_v2_flash_attention_e4m3_fp32_128_128_S_q_paged_kv_40_sliding_window_causal_sm89_kernel_nl", 24576, 128, 128, 2, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 128, 128, 40, kSM_89,  cubin_fmha_v2_flash_attention_e4m3_fp32_128_128_S_q_paged_kv_40_sm89_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_fp32_128_128_S_q_paged_kv_40_sm89_cu_cubin_len, "fmha_v2_flash_attention_e4m3_fp32_128_128_S_q_paged_kv_40_custom_mask_sm89_kernel_nl", 24576, 128, 128, 3, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 128, 128, 48, kSM_89,  cubin_fmha_v2_flash_attention_e4m3_fp32_128_128_S_qkv_48_sm89_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_fp32_128_128_S_qkv_48_sm89_cu_cubin_len, "fmha_v2_flash_attention_e4m3_fp32_128_128_S_qkv_48_causal_sm89_kernel_nl", 24576, 128, 128, 1, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 128, 128, 48, kSM_89,  cubin_fmha_v2_flash_attention_e4m3_fp32_128_128_S_qkv_48_sm89_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_fp32_128_128_S_qkv_48_sm89_cu_cubin_len, "fmha_v2_flash_attention_e4m3_fp32_128_128_S_qkv_48_sliding_window_causal_sm89_kernel_nl", 24576, 128, 128, 2, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 128, 128, 48, kSM_89,  cubin_fmha_v2_flash_attention_e4m3_fp32_128_128_S_qkv_48_sm89_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_fp32_128_128_S_qkv_48_sm89_cu_cubin_len, "fmha_v2_flash_attention_e4m3_fp32_128_128_S_qkv_48_custom_mask_sm89_kernel_nl", 24576, 128, 128, 3, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 128, 128, 48, kSM_89,  cubin_fmha_v2_flash_attention_e4m3_fp32_128_128_S_q_paged_kv_48_sm89_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_fp32_128_128_S_q_paged_kv_48_sm89_cu_cubin_len, "fmha_v2_flash_attention_e4m3_fp32_128_128_S_q_paged_kv_48_causal_sm89_kernel_nl", 24576, 128, 128, 1, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 128, 128, 48, kSM_89,  cubin_fmha_v2_flash_attention_e4m3_fp32_128_128_S_q_paged_kv_48_sm89_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_fp32_128_128_S_q_paged_kv_48_sm89_cu_cubin_len, "fmha_v2_flash_attention_e4m3_fp32_128_128_S_q_paged_kv_48_sliding_window_causal_sm89_kernel_nl", 24576, 128, 128, 2, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 128, 128, 48, kSM_89,  cubin_fmha_v2_flash_attention_e4m3_fp32_128_128_S_q_paged_kv_48_sm89_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_fp32_128_128_S_q_paged_kv_48_sm89_cu_cubin_len, "fmha_v2_flash_attention_e4m3_fp32_128_128_S_q_paged_kv_48_custom_mask_sm89_kernel_nl", 24576, 128, 128, 3, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 128, 128, 64, kSM_89,  cubin_fmha_v2_flash_attention_e4m3_fp32_128_128_S_qkv_64_sm89_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_fp32_128_128_S_qkv_64_sm89_cu_cubin_len, "fmha_v2_flash_attention_e4m3_fp32_128_128_S_qkv_64_sm89_kernel_nl", 24576, 128, 128, 0, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 128, 128, 64, kSM_89,  cubin_fmha_v2_flash_attention_e4m3_fp32_128_128_S_qkv_64_sm89_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_fp32_128_128_S_qkv_64_sm89_cu_cubin_len, "fmha_v2_flash_attention_e4m3_fp32_128_128_S_qkv_64_causal_sm89_kernel_nl", 24576, 128, 128, 1, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 128, 128, 64, kSM_89,  cubin_fmha_v2_flash_attention_e4m3_fp32_128_128_S_qkv_64_sm89_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_fp32_128_128_S_qkv_64_sm89_cu_cubin_len, "fmha_v2_flash_attention_e4m3_fp32_128_128_S_qkv_64_sliding_window_causal_sm89_kernel_nl", 24576, 128, 128, 2, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 128, 128, 64, kSM_89,  cubin_fmha_v2_flash_attention_e4m3_fp32_128_128_S_qkv_64_sm89_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_fp32_128_128_S_qkv_64_sm89_cu_cubin_len, "fmha_v2_flash_attention_e4m3_fp32_128_128_S_qkv_64_custom_mask_sm89_kernel_nl", 24576, 128, 128, 3, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 128, 128, 64, kSM_89,  cubin_fmha_v2_flash_attention_e4m3_fp32_128_128_S_q_kv_64_sm89_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_fp32_128_128_S_q_kv_64_sm89_cu_cubin_len, "fmha_v2_flash_attention_e4m3_fp32_128_128_S_q_kv_64_sm89_kernel_nl", 24576, 128, 128, 0, 1, false, true, false, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 128, 128, 64, kSM_89,  cubin_fmha_v2_flash_attention_e4m3_fp32_128_128_S_q_kv_64_sm89_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_fp32_128_128_S_q_kv_64_sm89_cu_cubin_len, "fmha_v2_flash_attention_e4m3_fp32_128_128_S_q_kv_64_custom_mask_sm89_kernel_nl", 24576, 128, 128, 3, 1, false, true, false, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 128, 128, 64, kSM_89,  cubin_fmha_v2_flash_attention_e4m3_fp32_128_128_S_q_paged_kv_64_sm89_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_fp32_128_128_S_q_paged_kv_64_sm89_cu_cubin_len, "fmha_v2_flash_attention_e4m3_fp32_128_128_S_q_paged_kv_64_sm89_kernel_nl", 24576, 128, 128, 0, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 128, 128, 64, kSM_89,  cubin_fmha_v2_flash_attention_e4m3_fp32_128_128_S_q_paged_kv_64_sm89_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_fp32_128_128_S_q_paged_kv_64_sm89_cu_cubin_len, "fmha_v2_flash_attention_e4m3_fp32_128_128_S_q_paged_kv_64_causal_sm89_kernel_nl", 24576, 128, 128, 1, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 128, 128, 64, kSM_89,  cubin_fmha_v2_flash_attention_e4m3_fp32_128_128_S_q_paged_kv_64_sm89_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_fp32_128_128_S_q_paged_kv_64_sm89_cu_cubin_len, "fmha_v2_flash_attention_e4m3_fp32_128_128_S_q_paged_kv_64_sliding_window_causal_sm89_kernel_nl", 24576, 128, 128, 2, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 128, 128, 64, kSM_89,  cubin_fmha_v2_flash_attention_e4m3_fp32_128_128_S_q_paged_kv_64_sm89_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_fp32_128_128_S_q_paged_kv_64_sm89_cu_cubin_len, "fmha_v2_flash_attention_e4m3_fp32_128_128_S_q_paged_kv_64_custom_mask_sm89_kernel_nl", 24576, 128, 128, 3, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 64, 32, 80, kSM_89,  cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_qkv_80_sm89_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_qkv_80_sm89_cu_cubin_len, "fmha_v2_flash_attention_e4m3_fp32_64_32_S_qkv_80_causal_sm89_kernel_nl", 32768, 128, 64, 1, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 64, 32, 80, kSM_89,  cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_qkv_80_sm89_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_qkv_80_sm89_cu_cubin_len, "fmha_v2_flash_attention_e4m3_fp32_64_32_S_qkv_80_sliding_window_causal_sm89_kernel_nl", 32768, 128, 64, 2, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 64, 32, 80, kSM_89,  cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_qkv_80_sm89_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_qkv_80_sm89_cu_cubin_len, "fmha_v2_flash_attention_e4m3_fp32_64_32_S_qkv_80_custom_mask_sm89_kernel_nl", 32768, 128, 64, 3, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 64, 32, 80, kSM_89,  cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_q_paged_kv_80_sm89_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_q_paged_kv_80_sm89_cu_cubin_len, "fmha_v2_flash_attention_e4m3_fp32_64_32_S_q_paged_kv_80_causal_sm89_kernel_nl", 32768, 128, 64, 1, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 64, 32, 80, kSM_89,  cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_q_paged_kv_80_sm89_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_q_paged_kv_80_sm89_cu_cubin_len, "fmha_v2_flash_attention_e4m3_fp32_64_32_S_q_paged_kv_80_sliding_window_causal_sm89_kernel_nl", 32768, 128, 64, 2, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 64, 32, 80, kSM_89,  cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_q_paged_kv_80_sm89_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_q_paged_kv_80_sm89_cu_cubin_len, "fmha_v2_flash_attention_e4m3_fp32_64_32_S_q_paged_kv_80_custom_mask_sm89_kernel_nl", 32768, 128, 64, 3, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 64, 32, 96, kSM_89,  cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_qkv_96_sm89_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_qkv_96_sm89_cu_cubin_len, "fmha_v2_flash_attention_e4m3_fp32_64_32_S_qkv_96_causal_sm89_kernel_nl", 32768, 128, 64, 1, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 64, 32, 96, kSM_89,  cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_qkv_96_sm89_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_qkv_96_sm89_cu_cubin_len, "fmha_v2_flash_attention_e4m3_fp32_64_32_S_qkv_96_sliding_window_causal_sm89_kernel_nl", 32768, 128, 64, 2, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 64, 32, 96, kSM_89,  cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_qkv_96_sm89_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_qkv_96_sm89_cu_cubin_len, "fmha_v2_flash_attention_e4m3_fp32_64_32_S_qkv_96_custom_mask_sm89_kernel_nl", 32768, 128, 64, 3, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 64, 32, 96, kSM_89,  cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_q_paged_kv_96_sm89_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_q_paged_kv_96_sm89_cu_cubin_len, "fmha_v2_flash_attention_e4m3_fp32_64_32_S_q_paged_kv_96_causal_sm89_kernel_nl", 32768, 128, 64, 1, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 64, 32, 96, kSM_89,  cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_q_paged_kv_96_sm89_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_q_paged_kv_96_sm89_cu_cubin_len, "fmha_v2_flash_attention_e4m3_fp32_64_32_S_q_paged_kv_96_sliding_window_causal_sm89_kernel_nl", 32768, 128, 64, 2, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 64, 32, 96, kSM_89,  cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_q_paged_kv_96_sm89_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_q_paged_kv_96_sm89_cu_cubin_len, "fmha_v2_flash_attention_e4m3_fp32_64_32_S_q_paged_kv_96_custom_mask_sm89_kernel_nl", 32768, 128, 64, 3, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 64, 32, 104, kSM_89,  cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_qkv_104_sm89_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_qkv_104_sm89_cu_cubin_len, "fmha_v2_flash_attention_e4m3_fp32_64_32_S_qkv_104_causal_sm89_kernel_nl", 32768, 128, 64, 1, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 64, 32, 104, kSM_89,  cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_qkv_104_sm89_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_qkv_104_sm89_cu_cubin_len, "fmha_v2_flash_attention_e4m3_fp32_64_32_S_qkv_104_sliding_window_causal_sm89_kernel_nl", 32768, 128, 64, 2, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 64, 32, 104, kSM_89,  cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_qkv_104_sm89_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_qkv_104_sm89_cu_cubin_len, "fmha_v2_flash_attention_e4m3_fp32_64_32_S_qkv_104_custom_mask_sm89_kernel_nl", 32768, 128, 64, 3, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 64, 32, 104, kSM_89,  cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_q_paged_kv_104_sm89_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_q_paged_kv_104_sm89_cu_cubin_len, "fmha_v2_flash_attention_e4m3_fp32_64_32_S_q_paged_kv_104_causal_sm89_kernel_nl", 32768, 128, 64, 1, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 64, 32, 104, kSM_89,  cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_q_paged_kv_104_sm89_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_q_paged_kv_104_sm89_cu_cubin_len, "fmha_v2_flash_attention_e4m3_fp32_64_32_S_q_paged_kv_104_sliding_window_causal_sm89_kernel_nl", 32768, 128, 64, 2, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 64, 32, 104, kSM_89,  cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_q_paged_kv_104_sm89_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_q_paged_kv_104_sm89_cu_cubin_len, "fmha_v2_flash_attention_e4m3_fp32_64_32_S_q_paged_kv_104_custom_mask_sm89_kernel_nl", 32768, 128, 64, 3, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 64, 32, 128, kSM_89,  cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_qkv_128_sm89_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_qkv_128_sm89_cu_cubin_len, "fmha_v2_flash_attention_e4m3_fp32_64_32_S_qkv_128_causal_sm89_kernel_nl", 32768, 128, 64, 1, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 64, 32, 128, kSM_89,  cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_qkv_128_sm89_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_qkv_128_sm89_cu_cubin_len, "fmha_v2_flash_attention_e4m3_fp32_64_32_S_qkv_128_sliding_window_causal_sm89_kernel_nl", 32768, 128, 64, 2, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 64, 32, 128, kSM_89,  cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_qkv_128_sm89_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_qkv_128_sm89_cu_cubin_len, "fmha_v2_flash_attention_e4m3_fp32_64_32_S_qkv_128_custom_mask_sm89_kernel_nl", 32768, 128, 64, 3, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 64, 32, 128, kSM_89,  cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_q_kv_128_sm89_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_q_kv_128_sm89_cu_cubin_len, "fmha_v2_flash_attention_e4m3_fp32_64_32_S_q_kv_128_sm89_kernel_nl", 32768, 128, 64, 0, 1, false, true, false, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 64, 32, 128, kSM_89,  cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_q_kv_128_sm89_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_q_kv_128_sm89_cu_cubin_len, "fmha_v2_flash_attention_e4m3_fp32_64_32_S_q_kv_128_custom_mask_sm89_kernel_nl", 32768, 128, 64, 3, 1, false, true, false, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 64, 32, 128, kSM_89,  cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_q_paged_kv_128_sm89_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_q_paged_kv_128_sm89_cu_cubin_len, "fmha_v2_flash_attention_e4m3_fp32_64_32_S_q_paged_kv_128_sm89_kernel_nl", 32768, 128, 64, 0, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 64, 32, 128, kSM_89,  cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_q_paged_kv_128_sm89_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_q_paged_kv_128_sm89_cu_cubin_len, "fmha_v2_flash_attention_e4m3_fp32_64_32_S_q_paged_kv_128_causal_sm89_kernel_nl", 32768, 128, 64, 1, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 64, 32, 128, kSM_89,  cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_q_paged_kv_128_sm89_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_q_paged_kv_128_sm89_cu_cubin_len, "fmha_v2_flash_attention_e4m3_fp32_64_32_S_q_paged_kv_128_sliding_window_causal_sm89_kernel_nl", 32768, 128, 64, 2, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 64, 32, 128, kSM_89,  cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_q_paged_kv_128_sm89_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_q_paged_kv_128_sm89_cu_cubin_len, "fmha_v2_flash_attention_e4m3_fp32_64_32_S_q_paged_kv_128_custom_mask_sm89_kernel_nl", 32768, 128, 64, 3, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 64, 32, 160, kSM_89,  cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_qkv_160_sm89_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_qkv_160_sm89_cu_cubin_len, "fmha_v2_flash_attention_e4m3_fp32_64_32_S_qkv_160_causal_sm89_kernel_nl", 65536, 128, 64, 1, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 64, 32, 160, kSM_89,  cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_qkv_160_sm89_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_qkv_160_sm89_cu_cubin_len, "fmha_v2_flash_attention_e4m3_fp32_64_32_S_qkv_160_sliding_window_causal_sm89_kernel_nl", 65536, 128, 64, 2, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 64, 32, 160, kSM_89,  cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_qkv_160_sm89_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_qkv_160_sm89_cu_cubin_len, "fmha_v2_flash_attention_e4m3_fp32_64_32_S_qkv_160_custom_mask_sm89_kernel_nl", 65536, 128, 64, 3, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 64, 32, 160, kSM_89,  cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_q_paged_kv_160_sm89_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_q_paged_kv_160_sm89_cu_cubin_len, "fmha_v2_flash_attention_e4m3_fp32_64_32_S_q_paged_kv_160_causal_sm89_kernel_nl", 65536, 128, 64, 1, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 64, 32, 160, kSM_89,  cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_q_paged_kv_160_sm89_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_q_paged_kv_160_sm89_cu_cubin_len, "fmha_v2_flash_attention_e4m3_fp32_64_32_S_q_paged_kv_160_sliding_window_causal_sm89_kernel_nl", 65536, 128, 64, 2, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 64, 32, 160, kSM_89,  cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_q_paged_kv_160_sm89_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_q_paged_kv_160_sm89_cu_cubin_len, "fmha_v2_flash_attention_e4m3_fp32_64_32_S_q_paged_kv_160_custom_mask_sm89_kernel_nl", 65536, 128, 64, 3, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 64, 32, 192, kSM_89,  cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_qkv_192_sm89_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_qkv_192_sm89_cu_cubin_len, "fmha_v2_flash_attention_e4m3_fp32_64_32_S_qkv_192_causal_sm89_kernel_nl", 65536, 128, 64, 1, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 64, 32, 192, kSM_89,  cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_qkv_192_sm89_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_qkv_192_sm89_cu_cubin_len, "fmha_v2_flash_attention_e4m3_fp32_64_32_S_qkv_192_sliding_window_causal_sm89_kernel_nl", 65536, 128, 64, 2, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 64, 32, 192, kSM_89,  cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_qkv_192_sm89_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_qkv_192_sm89_cu_cubin_len, "fmha_v2_flash_attention_e4m3_fp32_64_32_S_qkv_192_custom_mask_sm89_kernel_nl", 65536, 128, 64, 3, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 64, 32, 192, kSM_89,  cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_q_paged_kv_192_sm89_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_q_paged_kv_192_sm89_cu_cubin_len, "fmha_v2_flash_attention_e4m3_fp32_64_32_S_q_paged_kv_192_causal_sm89_kernel_nl", 65536, 128, 64, 1, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 64, 32, 192, kSM_89,  cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_q_paged_kv_192_sm89_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_q_paged_kv_192_sm89_cu_cubin_len, "fmha_v2_flash_attention_e4m3_fp32_64_32_S_q_paged_kv_192_sliding_window_causal_sm89_kernel_nl", 65536, 128, 64, 2, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 64, 32, 192, kSM_89,  cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_q_paged_kv_192_sm89_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_q_paged_kv_192_sm89_cu_cubin_len, "fmha_v2_flash_attention_e4m3_fp32_64_32_S_q_paged_kv_192_custom_mask_sm89_kernel_nl", 65536, 128, 64, 3, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 64, 32, 256, kSM_89,  cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_qkv_256_sm89_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_qkv_256_sm89_cu_cubin_len, "fmha_v2_flash_attention_e4m3_fp32_64_32_S_qkv_256_causal_sm89_kernel_nl", 65536, 128, 64, 1, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 64, 32, 256, kSM_89,  cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_qkv_256_sm89_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_qkv_256_sm89_cu_cubin_len, "fmha_v2_flash_attention_e4m3_fp32_64_32_S_qkv_256_sliding_window_causal_sm89_kernel_nl", 65536, 128, 64, 2, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 64, 32, 256, kSM_89,  cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_qkv_256_sm89_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_qkv_256_sm89_cu_cubin_len, "fmha_v2_flash_attention_e4m3_fp32_64_32_S_qkv_256_custom_mask_sm89_kernel_nl", 65536, 128, 64, 3, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 64, 32, 256, kSM_89,  cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_q_paged_kv_256_sm89_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_q_paged_kv_256_sm89_cu_cubin_len, "fmha_v2_flash_attention_e4m3_fp32_64_32_S_q_paged_kv_256_causal_sm89_kernel_nl", 65536, 128, 64, 1, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 64, 32, 256, kSM_89,  cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_q_paged_kv_256_sm89_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_q_paged_kv_256_sm89_cu_cubin_len, "fmha_v2_flash_attention_e4m3_fp32_64_32_S_q_paged_kv_256_sliding_window_causal_sm89_kernel_nl", 65536, 128, 64, 2, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_E4M3, 0, 64, 32, 256, kSM_89,  cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_q_paged_kv_256_sm89_cu_cubin, cubin_fmha_v2_flash_attention_e4m3_fp32_64_32_S_q_paged_kv_256_sm89_cu_cubin_len, "fmha_v2_flash_attention_e4m3_fp32_64_32_S_q_paged_kv_256_custom_mask_sm89_kernel_nl", 65536, 128, 64, 3, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 128, 128, 16, kSM_89,  cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_16_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_16_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_qkv_16_causal_sm89_kernel_nl_tiled", 16384, 128, 128, 1, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 16, kSM_89,  cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_16_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_16_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_qkv_16_sliding_window_causal_sm89_kernel_nl_tiled", 16384, 128, 128, 2, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 16, kSM_89,  cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_16_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_16_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_qkv_16_custom_mask_sm89_kernel_nl_tiled", 16384, 128, 128, 3, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 32, kSM_89,  cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_32_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_32_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_qkv_32_sm89_kernel_nl_tiled", 32768, 128, 128, 0, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 32, kSM_89,  cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_32_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_32_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_qkv_32_causal_sm89_kernel_nl_tiled", 32768, 128, 128, 1, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 32, kSM_89,  cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_32_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_32_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_qkv_32_sliding_window_causal_sm89_kernel_nl_tiled", 32768, 128, 128, 2, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 32, kSM_89,  cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_32_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_32_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_qkv_32_custom_mask_sm89_kernel_nl_tiled", 32768, 128, 128, 3, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 40, kSM_89,  cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_40_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_40_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_qkv_40_causal_sm89_kernel_nl_tiled", 65536, 128, 128, 1, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 40, kSM_89,  cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_40_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_40_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_qkv_40_sliding_window_causal_sm89_kernel_nl_tiled", 65536, 128, 128, 2, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 40, kSM_89,  cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_40_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_40_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_qkv_40_custom_mask_sm89_kernel_nl_tiled", 65536, 128, 128, 3, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 48, kSM_89,  cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_48_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_48_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_qkv_48_causal_sm89_kernel_nl_tiled", 65536, 128, 128, 1, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 48, kSM_89,  cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_48_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_48_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_qkv_48_sliding_window_causal_sm89_kernel_nl_tiled", 65536, 128, 128, 2, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 48, kSM_89,  cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_48_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_48_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_qkv_48_custom_mask_sm89_kernel_nl_tiled", 65536, 128, 128, 3, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 64, kSM_89,  cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_64_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_64_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_qkv_64_sm89_kernel_nl_tiled", 65536, 128, 128, 0, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 64, kSM_89,  cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_64_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_64_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_qkv_64_causal_sm89_kernel_nl_tiled", 65536, 128, 128, 1, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 64, kSM_89,  cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_64_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_64_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_qkv_64_sliding_window_causal_sm89_kernel_nl_tiled", 65536, 128, 128, 2, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 64, kSM_89,  cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_64_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_64_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_qkv_64_custom_mask_sm89_kernel_nl_tiled", 65536, 128, 128, 3, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 80, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_80_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_80_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_80_causal_sm89_kernel_nl_tiled", 81920, 128, 64, 1, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 80, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_80_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_80_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_80_sliding_window_causal_sm89_kernel_nl_tiled", 81920, 128, 64, 2, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 80, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_80_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_80_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_80_custom_mask_sm89_kernel_nl_tiled", 81920, 128, 64, 3, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 96, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_96_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_96_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_96_causal_sm89_kernel_nl_tiled", 81920, 128, 64, 1, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 96, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_96_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_96_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_96_sliding_window_causal_sm89_kernel_nl_tiled", 81920, 128, 64, 2, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 96, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_96_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_96_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_96_custom_mask_sm89_kernel_nl_tiled", 81920, 128, 64, 3, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 104, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_104_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_104_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_104_causal_sm89_kernel_nl_tiled", 81920, 128, 64, 1, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 104, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_104_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_104_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_104_sliding_window_causal_sm89_kernel_nl_tiled", 81920, 128, 64, 2, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 104, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_104_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_104_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_104_custom_mask_sm89_kernel_nl_tiled", 81920, 128, 64, 3, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_128_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_128_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_128_causal_sm89_kernel_nl_tiled", 81920, 128, 64, 1, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_128_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_128_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_128_sliding_window_causal_sm89_kernel_nl_tiled", 81920, 128, 64, 2, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_128_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_128_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_128_custom_mask_sm89_kernel_nl_tiled", 81920, 128, 64, 3, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 160, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_160_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_160_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_160_causal_sm89_kernel_nl_tiled", 81920, 128, 64, 1, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 160, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_160_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_160_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_160_sliding_window_causal_sm89_kernel_nl_tiled", 81920, 128, 64, 2, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 160, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_160_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_160_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_160_custom_mask_sm89_kernel_nl_tiled", 81920, 128, 64, 3, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 192, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_192_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_192_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_192_causal_sm89_kernel_nl_tiled", 81920, 128, 64, 1, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 192, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_192_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_192_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_192_sliding_window_causal_sm89_kernel_nl_tiled", 81920, 128, 64, 2, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 192, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_192_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_192_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_192_custom_mask_sm89_kernel_nl_tiled", 81920, 128, 64, 3, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 256, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_256_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_256_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_256_causal_sm89_kernel_nl_tiled", 81920, 128, 64, 1, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 256, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_256_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_256_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_256_sliding_window_causal_sm89_kernel_nl_tiled", 81920, 128, 64, 2, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 256, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_256_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_256_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_256_custom_mask_sm89_kernel_nl_tiled", 81920, 128, 64, 3, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 64, 16, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_16_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_16_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_64_S_qkv_16_causal_sm89_kernel_nl", 6144, 128, 64, 1, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 16, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_16_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_16_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_64_S_qkv_16_sliding_window_causal_sm89_kernel_nl", 6144, 128, 64, 2, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 16, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_16_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_16_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_64_S_qkv_16_custom_mask_sm89_kernel_nl", 6144, 128, 64, 3, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 32, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_32_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_32_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_64_S_qkv_32_sm89_kernel_nl", 12288, 128, 64, 0, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 32, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_32_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_32_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_64_S_qkv_32_causal_sm89_kernel_nl", 12288, 128, 64, 1, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 32, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_32_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_32_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_64_S_qkv_32_sliding_window_causal_sm89_kernel_nl", 12288, 128, 64, 2, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 32, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_32_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_32_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_64_S_qkv_32_custom_mask_sm89_kernel_nl", 12288, 128, 64, 3, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 40, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_40_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_40_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_40_causal_sm89_kernel_nl", 16384, 128, 64, 1, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 40, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_40_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_40_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_40_sliding_window_causal_sm89_kernel_nl", 16384, 128, 64, 2, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 40, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_40_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_40_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_40_custom_mask_sm89_kernel_nl", 16384, 128, 64, 3, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 48, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_48_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_48_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_48_causal_sm89_kernel_nl", 16384, 128, 64, 1, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 48, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_48_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_48_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_48_sliding_window_causal_sm89_kernel_nl", 16384, 128, 64, 2, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 48, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_48_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_48_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_48_custom_mask_sm89_kernel_nl", 16384, 128, 64, 3, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 64, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_64_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_64_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_64_sm89_kernel_nl", 16384, 128, 64, 0, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 64, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_64_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_64_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_64_causal_sm89_kernel_nl", 16384, 128, 64, 1, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 64, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_64_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_64_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_64_sliding_window_causal_sm89_kernel_nl", 16384, 128, 64, 2, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 64, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_64_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_64_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_64_custom_mask_sm89_kernel_nl", 16384, 128, 64, 3, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 80, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_80_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_80_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_80_causal_sm89_kernel_nl", 32768, 128, 64, 1, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 80, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_80_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_80_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_80_sliding_window_causal_sm89_kernel_nl", 32768, 128, 64, 2, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 80, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_80_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_80_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_80_custom_mask_sm89_kernel_nl", 32768, 128, 64, 3, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 96, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_96_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_96_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_96_causal_sm89_kernel_nl", 32768, 128, 64, 1, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 96, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_96_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_96_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_96_sliding_window_causal_sm89_kernel_nl", 32768, 128, 64, 2, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 96, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_96_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_96_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_96_custom_mask_sm89_kernel_nl", 32768, 128, 64, 3, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 104, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_104_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_104_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_104_causal_sm89_kernel_nl", 32768, 128, 64, 1, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 104, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_104_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_104_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_104_sliding_window_causal_sm89_kernel_nl", 32768, 128, 64, 2, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 104, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_104_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_104_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_104_custom_mask_sm89_kernel_nl", 32768, 128, 64, 3, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_128_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_128_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_128_causal_sm89_kernel_nl", 32768, 128, 64, 1, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_128_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_128_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_128_sliding_window_causal_sm89_kernel_nl", 32768, 128, 64, 2, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_128_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_128_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_128_custom_mask_sm89_kernel_nl", 32768, 128, 64, 3, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 160, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_160_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_160_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_16_S_qkv_160_causal_sm89_kernel_nl", 49152, 128, 64, 1, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 160, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_160_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_160_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_16_S_qkv_160_sliding_window_causal_sm89_kernel_nl", 49152, 128, 64, 2, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 160, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_160_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_160_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_16_S_qkv_160_custom_mask_sm89_kernel_nl", 49152, 128, 64, 3, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 192, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_192_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_192_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_16_S_qkv_192_causal_sm89_kernel_nl", 49152, 128, 64, 1, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 192, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_192_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_192_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_16_S_qkv_192_sliding_window_causal_sm89_kernel_nl", 49152, 128, 64, 2, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 192, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_192_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_192_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_16_S_qkv_192_custom_mask_sm89_kernel_nl", 49152, 128, 64, 3, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 256, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_256_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_256_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_16_S_qkv_256_causal_sm89_kernel_nl", 49152, 128, 64, 1, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 256, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_256_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_256_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_16_S_qkv_256_sliding_window_causal_sm89_kernel_nl", 49152, 128, 64, 2, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 256, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_256_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_256_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_16_S_qkv_256_custom_mask_sm89_kernel_nl", 49152, 128, 64, 3, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_128_qk_tanh_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_128_qk_tanh_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_128_causal_qk_tanh_sm89_kernel_nl_tiled", 81920, 128, 64, 1, 0, false, true, false, false, true, true, true},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_128_qk_tanh_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_128_qk_tanh_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_128_sliding_window_causal_qk_tanh_sm89_kernel_nl_tiled", 81920, 128, 64, 2, 0, false, true, false, false, true, true, true},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_128_qk_tanh_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_128_qk_tanh_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_128_custom_mask_qk_tanh_sm89_kernel_nl_tiled", 81920, 128, 64, 3, 0, false, true, false, false, true, true, true},
{ DATA_TYPE_FP16, 0, 64, 128, 256, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_256_qk_tanh_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_256_qk_tanh_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_256_causal_qk_tanh_sm89_kernel_nl_tiled", 81920, 128, 64, 1, 0, false, true, false, false, true, true, true},
{ DATA_TYPE_FP16, 0, 64, 128, 256, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_256_qk_tanh_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_256_qk_tanh_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_256_sliding_window_causal_qk_tanh_sm89_kernel_nl_tiled", 81920, 128, 64, 2, 0, false, true, false, false, true, true, true},
{ DATA_TYPE_FP16, 0, 64, 128, 256, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_256_qk_tanh_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_256_qk_tanh_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_256_custom_mask_qk_tanh_sm89_kernel_nl_tiled", 81920, 128, 64, 3, 0, false, true, false, false, true, true, true},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_128_qk_tanh_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_128_qk_tanh_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_128_causal_qk_tanh_sm89_kernel_nl", 32768, 128, 64, 1, 0, false, true, false, false, true, false, true},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_128_qk_tanh_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_128_qk_tanh_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_128_sliding_window_causal_qk_tanh_sm89_kernel_nl", 32768, 128, 64, 2, 0, false, true, false, false, true, false, true},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_128_qk_tanh_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_128_qk_tanh_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_128_custom_mask_qk_tanh_sm89_kernel_nl", 32768, 128, 64, 3, 0, false, true, false, false, true, false, true},
{ DATA_TYPE_FP16, 0, 64, 16, 256, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_256_qk_tanh_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_256_qk_tanh_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_16_S_qkv_256_causal_qk_tanh_sm89_kernel_nl", 49152, 128, 64, 1, 0, false, true, false, false, true, false, true},
{ DATA_TYPE_FP16, 0, 64, 16, 256, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_256_qk_tanh_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_256_qk_tanh_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_16_S_qkv_256_sliding_window_causal_qk_tanh_sm89_kernel_nl", 49152, 128, 64, 2, 0, false, true, false, false, true, false, true},
{ DATA_TYPE_FP16, 0, 64, 16, 256, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_256_qk_tanh_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_256_qk_tanh_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_16_S_qkv_256_custom_mask_qk_tanh_sm89_kernel_nl", 49152, 128, 64, 3, 0, false, true, false, false, true, false, true},
{ DATA_TYPE_FP16, 0, 128, 128, 32, kSM_89,  cubin_fmha_v2_flash_attention_fp16_128_128_S_q_kv_32_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_q_kv_32_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_q_kv_32_sm89_kernel_nl_tiled", 32768, 128, 128, 0, 1, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 32, kSM_89,  cubin_fmha_v2_flash_attention_fp16_128_128_S_q_kv_32_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_q_kv_32_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_q_kv_32_custom_mask_sm89_kernel_nl_tiled", 32768, 128, 128, 3, 1, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 64, kSM_89,  cubin_fmha_v2_flash_attention_fp16_128_128_S_q_kv_64_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_q_kv_64_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_q_kv_64_sm89_kernel_nl_tiled", 65536, 128, 128, 0, 1, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 64, kSM_89,  cubin_fmha_v2_flash_attention_fp16_128_128_S_q_kv_64_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_q_kv_64_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_q_kv_64_custom_mask_sm89_kernel_nl_tiled", 65536, 128, 128, 3, 1, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_kv_128_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_kv_128_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_kv_128_sm89_kernel_nl_tiled", 81920, 128, 64, 0, 1, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_kv_128_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_kv_128_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_kv_128_custom_mask_sm89_kernel_nl_tiled", 81920, 128, 64, 3, 1, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 64, 32, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_64_S_q_kv_32_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_64_S_q_kv_32_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_64_S_q_kv_32_sm89_kernel_nl", 12288, 128, 64, 0, 1, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 32, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_64_S_q_kv_32_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_64_S_q_kv_32_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_64_S_q_kv_32_custom_mask_sm89_kernel_nl", 12288, 128, 64, 3, 1, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 64, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_kv_64_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_kv_64_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_kv_64_sm89_kernel_nl", 16384, 128, 64, 0, 1, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 64, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_kv_64_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_kv_64_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_kv_64_custom_mask_sm89_kernel_nl", 16384, 128, 64, 3, 1, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_kv_128_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_kv_128_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_kv_128_sm89_kernel_nl", 32768, 128, 64, 0, 1, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_kv_128_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_kv_128_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_kv_128_custom_mask_sm89_kernel_nl", 32768, 128, 64, 3, 1, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_kv_128_qk_tanh_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_kv_128_qk_tanh_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_kv_128_qk_tanh_sm89_kernel_nl_tiled", 81920, 128, 64, 0, 1, false, true, false, false, true, true, true},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_kv_128_qk_tanh_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_kv_128_qk_tanh_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_kv_128_custom_mask_qk_tanh_sm89_kernel_nl_tiled", 81920, 128, 64, 3, 1, false, true, false, false, true, true, true},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_kv_128_qk_tanh_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_kv_128_qk_tanh_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_kv_128_qk_tanh_sm89_kernel_nl", 32768, 128, 64, 0, 1, false, true, false, false, true, false, true},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_kv_128_qk_tanh_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_kv_128_qk_tanh_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_kv_128_custom_mask_qk_tanh_sm89_kernel_nl", 32768, 128, 64, 3, 1, false, true, false, false, true, false, true},
{ DATA_TYPE_FP16, 0, 128, 128, 16, kSM_89,  cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_16_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_16_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_16_causal_sm89_kernel_nl_tiled", 16384, 128, 128, 1, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 16, kSM_89,  cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_16_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_16_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_16_sliding_window_causal_sm89_kernel_nl_tiled", 16384, 128, 128, 2, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 16, kSM_89,  cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_16_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_16_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_16_custom_mask_sm89_kernel_nl_tiled", 16384, 128, 128, 3, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 32, kSM_89,  cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_32_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_32_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_32_sm89_kernel_nl_tiled", 32768, 128, 128, 0, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 32, kSM_89,  cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_32_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_32_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_32_causal_sm89_kernel_nl_tiled", 32768, 128, 128, 1, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 32, kSM_89,  cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_32_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_32_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_32_sliding_window_causal_sm89_kernel_nl_tiled", 32768, 128, 128, 2, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 32, kSM_89,  cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_32_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_32_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_32_custom_mask_sm89_kernel_nl_tiled", 32768, 128, 128, 3, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 40, kSM_89,  cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_40_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_40_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_40_causal_sm89_kernel_nl_tiled", 65536, 128, 128, 1, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 40, kSM_89,  cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_40_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_40_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_40_sliding_window_causal_sm89_kernel_nl_tiled", 65536, 128, 128, 2, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 40, kSM_89,  cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_40_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_40_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_40_custom_mask_sm89_kernel_nl_tiled", 65536, 128, 128, 3, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 48, kSM_89,  cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_48_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_48_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_48_causal_sm89_kernel_nl_tiled", 65536, 128, 128, 1, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 48, kSM_89,  cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_48_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_48_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_48_sliding_window_causal_sm89_kernel_nl_tiled", 65536, 128, 128, 2, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 48, kSM_89,  cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_48_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_48_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_48_custom_mask_sm89_kernel_nl_tiled", 65536, 128, 128, 3, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 64, kSM_89,  cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_64_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_64_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_64_sm89_kernel_nl_tiled", 65536, 128, 128, 0, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 64, kSM_89,  cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_64_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_64_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_64_causal_sm89_kernel_nl_tiled", 65536, 128, 128, 1, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 64, kSM_89,  cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_64_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_64_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_64_sliding_window_causal_sm89_kernel_nl_tiled", 65536, 128, 128, 2, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 64, kSM_89,  cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_64_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_64_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_64_custom_mask_sm89_kernel_nl_tiled", 65536, 128, 128, 3, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 80, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_80_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_80_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_80_causal_sm89_kernel_nl_tiled", 81920, 128, 64, 1, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 80, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_80_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_80_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_80_sliding_window_causal_sm89_kernel_nl_tiled", 81920, 128, 64, 2, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 80, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_80_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_80_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_80_custom_mask_sm89_kernel_nl_tiled", 81920, 128, 64, 3, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 96, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_96_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_96_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_96_causal_sm89_kernel_nl_tiled", 81920, 128, 64, 1, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 96, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_96_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_96_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_96_sliding_window_causal_sm89_kernel_nl_tiled", 81920, 128, 64, 2, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 96, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_96_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_96_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_96_custom_mask_sm89_kernel_nl_tiled", 81920, 128, 64, 3, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 104, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_104_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_104_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_104_causal_sm89_kernel_nl_tiled", 81920, 128, 64, 1, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 104, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_104_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_104_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_104_sliding_window_causal_sm89_kernel_nl_tiled", 81920, 128, 64, 2, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 104, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_104_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_104_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_104_custom_mask_sm89_kernel_nl_tiled", 81920, 128, 64, 3, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_sm89_kernel_nl_tiled", 81920, 128, 64, 0, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_causal_sm89_kernel_nl_tiled", 81920, 128, 64, 1, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_sliding_window_causal_sm89_kernel_nl_tiled", 81920, 128, 64, 2, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_custom_mask_sm89_kernel_nl_tiled", 81920, 128, 64, 3, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 160, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_160_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_160_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_160_causal_sm89_kernel_nl_tiled", 81920, 128, 64, 1, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 160, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_160_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_160_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_160_sliding_window_causal_sm89_kernel_nl_tiled", 81920, 128, 64, 2, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 160, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_160_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_160_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_160_custom_mask_sm89_kernel_nl_tiled", 81920, 128, 64, 3, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 192, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_192_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_192_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_192_causal_sm89_kernel_nl_tiled", 81920, 128, 64, 1, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 192, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_192_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_192_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_192_sliding_window_causal_sm89_kernel_nl_tiled", 81920, 128, 64, 2, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 192, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_192_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_192_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_192_custom_mask_sm89_kernel_nl_tiled", 81920, 128, 64, 3, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 256, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_256_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_256_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_256_causal_sm89_kernel_nl_tiled", 81920, 128, 64, 1, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 256, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_256_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_256_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_256_sliding_window_causal_sm89_kernel_nl_tiled", 81920, 128, 64, 2, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 256, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_256_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_256_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_256_custom_mask_sm89_kernel_nl_tiled", 81920, 128, 64, 3, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 64, 16, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_16_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_16_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_16_causal_sm89_kernel_nl", 6144, 128, 64, 1, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 16, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_16_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_16_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_16_sliding_window_causal_sm89_kernel_nl", 6144, 128, 64, 2, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 16, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_16_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_16_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_16_custom_mask_sm89_kernel_nl", 6144, 128, 64, 3, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 32, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_32_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_32_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_32_sm89_kernel_nl", 12288, 128, 64, 0, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 32, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_32_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_32_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_32_causal_sm89_kernel_nl", 12288, 128, 64, 1, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 32, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_32_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_32_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_32_sliding_window_causal_sm89_kernel_nl", 12288, 128, 64, 2, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 32, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_32_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_32_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_32_custom_mask_sm89_kernel_nl", 12288, 128, 64, 3, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 40, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_40_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_40_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_40_causal_sm89_kernel_nl", 16384, 128, 64, 1, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 40, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_40_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_40_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_40_sliding_window_causal_sm89_kernel_nl", 16384, 128, 64, 2, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 40, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_40_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_40_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_40_custom_mask_sm89_kernel_nl", 16384, 128, 64, 3, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 48, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_48_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_48_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_48_causal_sm89_kernel_nl", 16384, 128, 64, 1, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 48, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_48_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_48_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_48_sliding_window_causal_sm89_kernel_nl", 16384, 128, 64, 2, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 48, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_48_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_48_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_48_custom_mask_sm89_kernel_nl", 16384, 128, 64, 3, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 64, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_64_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_64_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_64_sm89_kernel_nl", 16384, 128, 64, 0, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 64, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_64_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_64_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_64_causal_sm89_kernel_nl", 16384, 128, 64, 1, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 64, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_64_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_64_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_64_sliding_window_causal_sm89_kernel_nl", 16384, 128, 64, 2, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 64, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_64_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_64_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_64_custom_mask_sm89_kernel_nl", 16384, 128, 64, 3, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 80, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_80_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_80_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_80_causal_sm89_kernel_nl", 32768, 128, 64, 1, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 80, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_80_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_80_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_80_sliding_window_causal_sm89_kernel_nl", 32768, 128, 64, 2, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 80, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_80_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_80_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_80_custom_mask_sm89_kernel_nl", 32768, 128, 64, 3, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 96, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_96_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_96_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_96_causal_sm89_kernel_nl", 32768, 128, 64, 1, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 96, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_96_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_96_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_96_sliding_window_causal_sm89_kernel_nl", 32768, 128, 64, 2, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 96, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_96_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_96_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_96_custom_mask_sm89_kernel_nl", 32768, 128, 64, 3, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 104, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_104_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_104_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_104_causal_sm89_kernel_nl", 32768, 128, 64, 1, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 104, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_104_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_104_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_104_sliding_window_causal_sm89_kernel_nl", 32768, 128, 64, 2, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 104, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_104_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_104_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_104_custom_mask_sm89_kernel_nl", 32768, 128, 64, 3, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_128_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_128_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_128_sm89_kernel_nl", 32768, 128, 64, 0, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_128_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_128_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_128_causal_sm89_kernel_nl", 32768, 128, 64, 1, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_128_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_128_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_128_sliding_window_causal_sm89_kernel_nl", 32768, 128, 64, 2, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_128_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_128_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_128_custom_mask_sm89_kernel_nl", 32768, 128, 64, 3, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 160, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_160_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_160_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_160_causal_sm89_kernel_nl", 49152, 128, 64, 1, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 160, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_160_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_160_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_160_sliding_window_causal_sm89_kernel_nl", 49152, 128, 64, 2, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 160, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_160_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_160_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_160_custom_mask_sm89_kernel_nl", 49152, 128, 64, 3, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 192, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_192_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_192_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_192_causal_sm89_kernel_nl", 49152, 128, 64, 1, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 192, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_192_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_192_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_192_sliding_window_causal_sm89_kernel_nl", 49152, 128, 64, 2, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 192, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_192_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_192_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_192_custom_mask_sm89_kernel_nl", 49152, 128, 64, 3, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 256, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_256_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_256_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_256_causal_sm89_kernel_nl", 49152, 128, 64, 1, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 256, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_256_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_256_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_256_sliding_window_causal_sm89_kernel_nl", 49152, 128, 64, 2, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 256, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_256_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_256_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_256_custom_mask_sm89_kernel_nl", 49152, 128, 64, 3, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_qk_tanh_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_qk_tanh_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_qk_tanh_sm89_kernel_nl_tiled", 81920, 128, 64, 0, 2, false, true, false, false, true, true, true},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_qk_tanh_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_qk_tanh_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_causal_qk_tanh_sm89_kernel_nl_tiled", 81920, 128, 64, 1, 2, false, true, false, false, true, true, true},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_qk_tanh_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_qk_tanh_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_sliding_window_causal_qk_tanh_sm89_kernel_nl_tiled", 81920, 128, 64, 2, 2, false, true, false, false, true, true, true},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_qk_tanh_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_qk_tanh_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_custom_mask_qk_tanh_sm89_kernel_nl_tiled", 81920, 128, 64, 3, 2, false, true, false, false, true, true, true},
{ DATA_TYPE_FP16, 0, 64, 128, 256, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_256_qk_tanh_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_256_qk_tanh_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_256_causal_qk_tanh_sm89_kernel_nl_tiled", 81920, 128, 64, 1, 2, false, true, false, false, true, true, true},
{ DATA_TYPE_FP16, 0, 64, 128, 256, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_256_qk_tanh_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_256_qk_tanh_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_256_sliding_window_causal_qk_tanh_sm89_kernel_nl_tiled", 81920, 128, 64, 2, 2, false, true, false, false, true, true, true},
{ DATA_TYPE_FP16, 0, 64, 128, 256, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_256_qk_tanh_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_256_qk_tanh_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_256_custom_mask_qk_tanh_sm89_kernel_nl_tiled", 81920, 128, 64, 3, 2, false, true, false, false, true, true, true},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_128_qk_tanh_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_128_qk_tanh_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_128_qk_tanh_sm89_kernel_nl", 32768, 128, 64, 0, 2, false, true, false, false, true, false, true},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_128_qk_tanh_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_128_qk_tanh_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_128_causal_qk_tanh_sm89_kernel_nl", 32768, 128, 64, 1, 2, false, true, false, false, true, false, true},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_128_qk_tanh_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_128_qk_tanh_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_128_sliding_window_causal_qk_tanh_sm89_kernel_nl", 32768, 128, 64, 2, 2, false, true, false, false, true, false, true},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_128_qk_tanh_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_128_qk_tanh_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_128_custom_mask_qk_tanh_sm89_kernel_nl", 32768, 128, 64, 3, 2, false, true, false, false, true, false, true},
{ DATA_TYPE_FP16, 0, 64, 16, 256, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_256_qk_tanh_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_256_qk_tanh_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_256_causal_qk_tanh_sm89_kernel_nl", 49152, 128, 64, 1, 2, false, true, false, false, true, false, true},
{ DATA_TYPE_FP16, 0, 64, 16, 256, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_256_qk_tanh_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_256_qk_tanh_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_256_sliding_window_causal_qk_tanh_sm89_kernel_nl", 49152, 128, 64, 2, 2, false, true, false, false, true, false, true},
{ DATA_TYPE_FP16, 0, 64, 16, 256, kSM_89,  cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_256_qk_tanh_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_256_qk_tanh_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_256_custom_mask_qk_tanh_sm89_kernel_nl", 49152, 128, 64, 3, 2, false, true, false, false, true, false, true},
{ DATA_TYPE_BF16, 0, 128, 128, 16, kSM_89,  cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_16_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_16_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_qkv_16_causal_sm89_kernel_nl_tiled", 16384, 128, 128, 1, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 16, kSM_89,  cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_16_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_16_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_qkv_16_sliding_window_causal_sm89_kernel_nl_tiled", 16384, 128, 128, 2, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 16, kSM_89,  cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_16_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_16_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_qkv_16_custom_mask_sm89_kernel_nl_tiled", 16384, 128, 128, 3, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 32, kSM_89,  cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_32_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_32_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_qkv_32_sm89_kernel_nl_tiled", 32768, 128, 128, 0, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 32, kSM_89,  cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_32_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_32_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_qkv_32_causal_sm89_kernel_nl_tiled", 32768, 128, 128, 1, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 32, kSM_89,  cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_32_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_32_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_qkv_32_sliding_window_causal_sm89_kernel_nl_tiled", 32768, 128, 128, 2, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 32, kSM_89,  cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_32_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_32_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_qkv_32_custom_mask_sm89_kernel_nl_tiled", 32768, 128, 128, 3, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 40, kSM_89,  cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_40_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_40_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_qkv_40_causal_sm89_kernel_nl_tiled", 65536, 128, 128, 1, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 40, kSM_89,  cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_40_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_40_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_qkv_40_sliding_window_causal_sm89_kernel_nl_tiled", 65536, 128, 128, 2, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 40, kSM_89,  cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_40_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_40_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_qkv_40_custom_mask_sm89_kernel_nl_tiled", 65536, 128, 128, 3, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 48, kSM_89,  cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_48_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_48_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_qkv_48_causal_sm89_kernel_nl_tiled", 65536, 128, 128, 1, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 48, kSM_89,  cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_48_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_48_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_qkv_48_sliding_window_causal_sm89_kernel_nl_tiled", 65536, 128, 128, 2, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 48, kSM_89,  cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_48_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_48_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_qkv_48_custom_mask_sm89_kernel_nl_tiled", 65536, 128, 128, 3, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 64, kSM_89,  cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_64_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_64_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_qkv_64_sm89_kernel_nl_tiled", 65536, 128, 128, 0, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 64, kSM_89,  cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_64_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_64_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_qkv_64_causal_sm89_kernel_nl_tiled", 65536, 128, 128, 1, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 64, kSM_89,  cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_64_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_64_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_qkv_64_sliding_window_causal_sm89_kernel_nl_tiled", 65536, 128, 128, 2, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 64, kSM_89,  cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_64_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_64_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_qkv_64_custom_mask_sm89_kernel_nl_tiled", 65536, 128, 128, 3, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 80, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_80_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_80_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_80_causal_sm89_kernel_nl_tiled", 81920, 128, 64, 1, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 80, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_80_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_80_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_80_sliding_window_causal_sm89_kernel_nl_tiled", 81920, 128, 64, 2, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 80, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_80_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_80_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_80_custom_mask_sm89_kernel_nl_tiled", 81920, 128, 64, 3, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 96, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_96_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_96_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_96_causal_sm89_kernel_nl_tiled", 81920, 128, 64, 1, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 96, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_96_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_96_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_96_sliding_window_causal_sm89_kernel_nl_tiled", 81920, 128, 64, 2, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 96, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_96_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_96_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_96_custom_mask_sm89_kernel_nl_tiled", 81920, 128, 64, 3, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 104, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_104_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_104_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_104_causal_sm89_kernel_nl_tiled", 81920, 128, 64, 1, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 104, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_104_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_104_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_104_sliding_window_causal_sm89_kernel_nl_tiled", 81920, 128, 64, 2, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 104, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_104_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_104_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_104_custom_mask_sm89_kernel_nl_tiled", 81920, 128, 64, 3, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 128, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_128_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_128_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_128_causal_sm89_kernel_nl_tiled", 81920, 128, 64, 1, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 128, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_128_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_128_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_128_sliding_window_causal_sm89_kernel_nl_tiled", 81920, 128, 64, 2, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 128, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_128_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_128_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_128_custom_mask_sm89_kernel_nl_tiled", 81920, 128, 64, 3, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 160, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_160_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_160_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_160_causal_sm89_kernel_nl_tiled", 81920, 128, 64, 1, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 160, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_160_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_160_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_160_sliding_window_causal_sm89_kernel_nl_tiled", 81920, 128, 64, 2, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 160, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_160_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_160_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_160_custom_mask_sm89_kernel_nl_tiled", 81920, 128, 64, 3, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 192, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_192_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_192_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_192_causal_sm89_kernel_nl_tiled", 81920, 128, 64, 1, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 192, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_192_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_192_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_192_sliding_window_causal_sm89_kernel_nl_tiled", 81920, 128, 64, 2, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 192, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_192_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_192_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_192_custom_mask_sm89_kernel_nl_tiled", 81920, 128, 64, 3, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 256, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_256_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_256_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_256_causal_sm89_kernel_nl_tiled", 81920, 128, 64, 1, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 256, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_256_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_256_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_256_sliding_window_causal_sm89_kernel_nl_tiled", 81920, 128, 64, 2, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 256, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_256_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_256_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_256_custom_mask_sm89_kernel_nl_tiled", 81920, 128, 64, 3, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 64, 16, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_16_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_16_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_64_S_qkv_16_causal_sm89_kernel_nl", 6144, 128, 64, 1, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 64, 16, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_16_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_16_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_64_S_qkv_16_sliding_window_causal_sm89_kernel_nl", 6144, 128, 64, 2, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 64, 16, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_16_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_16_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_64_S_qkv_16_custom_mask_sm89_kernel_nl", 6144, 128, 64, 3, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 64, 32, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_32_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_32_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_64_S_qkv_32_sm89_kernel_nl", 12288, 128, 64, 0, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 64, 32, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_32_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_32_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_64_S_qkv_32_causal_sm89_kernel_nl", 12288, 128, 64, 1, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 64, 32, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_32_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_32_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_64_S_qkv_32_sliding_window_causal_sm89_kernel_nl", 12288, 128, 64, 2, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 64, 32, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_32_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_32_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_64_S_qkv_32_custom_mask_sm89_kernel_nl", 12288, 128, 64, 3, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 40, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_40_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_40_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_qkv_40_causal_sm89_kernel_nl", 16384, 128, 64, 1, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 40, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_40_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_40_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_qkv_40_sliding_window_causal_sm89_kernel_nl", 16384, 128, 64, 2, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 40, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_40_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_40_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_qkv_40_custom_mask_sm89_kernel_nl", 16384, 128, 64, 3, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 48, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_48_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_48_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_qkv_48_causal_sm89_kernel_nl", 16384, 128, 64, 1, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 48, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_48_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_48_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_qkv_48_sliding_window_causal_sm89_kernel_nl", 16384, 128, 64, 2, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 48, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_48_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_48_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_qkv_48_custom_mask_sm89_kernel_nl", 16384, 128, 64, 3, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 64, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_64_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_64_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_qkv_64_sm89_kernel_nl", 16384, 128, 64, 0, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 64, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_64_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_64_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_qkv_64_causal_sm89_kernel_nl", 16384, 128, 64, 1, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 64, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_64_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_64_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_qkv_64_sliding_window_causal_sm89_kernel_nl", 16384, 128, 64, 2, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 64, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_64_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_64_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_qkv_64_custom_mask_sm89_kernel_nl", 16384, 128, 64, 3, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 80, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_80_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_80_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_qkv_80_causal_sm89_kernel_nl", 32768, 128, 64, 1, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 80, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_80_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_80_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_qkv_80_sliding_window_causal_sm89_kernel_nl", 32768, 128, 64, 2, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 80, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_80_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_80_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_qkv_80_custom_mask_sm89_kernel_nl", 32768, 128, 64, 3, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 96, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_96_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_96_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_qkv_96_causal_sm89_kernel_nl", 32768, 128, 64, 1, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 96, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_96_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_96_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_qkv_96_sliding_window_causal_sm89_kernel_nl", 32768, 128, 64, 2, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 96, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_96_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_96_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_qkv_96_custom_mask_sm89_kernel_nl", 32768, 128, 64, 3, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 104, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_104_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_104_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_qkv_104_causal_sm89_kernel_nl", 32768, 128, 64, 1, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 104, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_104_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_104_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_qkv_104_sliding_window_causal_sm89_kernel_nl", 32768, 128, 64, 2, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 104, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_104_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_104_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_qkv_104_custom_mask_sm89_kernel_nl", 32768, 128, 64, 3, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 128, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_128_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_128_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_qkv_128_causal_sm89_kernel_nl", 32768, 128, 64, 1, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 128, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_128_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_128_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_qkv_128_sliding_window_causal_sm89_kernel_nl", 32768, 128, 64, 2, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 128, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_128_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_128_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_qkv_128_custom_mask_sm89_kernel_nl", 32768, 128, 64, 3, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 16, 160, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_160_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_160_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_16_S_qkv_160_causal_sm89_kernel_nl", 49152, 128, 64, 1, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 16, 160, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_160_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_160_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_16_S_qkv_160_sliding_window_causal_sm89_kernel_nl", 49152, 128, 64, 2, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 16, 160, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_160_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_160_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_16_S_qkv_160_custom_mask_sm89_kernel_nl", 49152, 128, 64, 3, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 16, 192, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_192_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_192_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_16_S_qkv_192_causal_sm89_kernel_nl", 49152, 128, 64, 1, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 16, 192, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_192_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_192_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_16_S_qkv_192_sliding_window_causal_sm89_kernel_nl", 49152, 128, 64, 2, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 16, 192, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_192_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_192_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_16_S_qkv_192_custom_mask_sm89_kernel_nl", 49152, 128, 64, 3, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 16, 256, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_256_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_256_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_16_S_qkv_256_causal_sm89_kernel_nl", 49152, 128, 64, 1, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 16, 256, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_256_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_256_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_16_S_qkv_256_sliding_window_causal_sm89_kernel_nl", 49152, 128, 64, 2, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 16, 256, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_256_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_256_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_16_S_qkv_256_custom_mask_sm89_kernel_nl", 49152, 128, 64, 3, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 128, 128, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_128_qk_tanh_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_128_qk_tanh_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_128_causal_qk_tanh_sm89_kernel_nl_tiled", 81920, 128, 64, 1, 0, false, true, false, true, true, true, true},
{ DATA_TYPE_BF16, 0, 64, 128, 128, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_128_qk_tanh_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_128_qk_tanh_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_128_sliding_window_causal_qk_tanh_sm89_kernel_nl_tiled", 81920, 128, 64, 2, 0, false, true, false, true, true, true, true},
{ DATA_TYPE_BF16, 0, 64, 128, 128, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_128_qk_tanh_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_128_qk_tanh_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_128_custom_mask_qk_tanh_sm89_kernel_nl_tiled", 81920, 128, 64, 3, 0, false, true, false, true, true, true, true},
{ DATA_TYPE_BF16, 0, 64, 128, 256, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_256_qk_tanh_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_256_qk_tanh_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_256_causal_qk_tanh_sm89_kernel_nl_tiled", 81920, 128, 64, 1, 0, false, true, false, true, true, true, true},
{ DATA_TYPE_BF16, 0, 64, 128, 256, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_256_qk_tanh_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_256_qk_tanh_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_256_sliding_window_causal_qk_tanh_sm89_kernel_nl_tiled", 81920, 128, 64, 2, 0, false, true, false, true, true, true, true},
{ DATA_TYPE_BF16, 0, 64, 128, 256, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_256_qk_tanh_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_256_qk_tanh_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_256_custom_mask_qk_tanh_sm89_kernel_nl_tiled", 81920, 128, 64, 3, 0, false, true, false, true, true, true, true},
{ DATA_TYPE_BF16, 0, 64, 32, 128, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_128_qk_tanh_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_128_qk_tanh_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_qkv_128_causal_qk_tanh_sm89_kernel_nl", 32768, 128, 64, 1, 0, false, true, false, true, true, false, true},
{ DATA_TYPE_BF16, 0, 64, 32, 128, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_128_qk_tanh_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_128_qk_tanh_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_qkv_128_sliding_window_causal_qk_tanh_sm89_kernel_nl", 32768, 128, 64, 2, 0, false, true, false, true, true, false, true},
{ DATA_TYPE_BF16, 0, 64, 32, 128, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_128_qk_tanh_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_128_qk_tanh_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_qkv_128_custom_mask_qk_tanh_sm89_kernel_nl", 32768, 128, 64, 3, 0, false, true, false, true, true, false, true},
{ DATA_TYPE_BF16, 0, 64, 16, 256, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_256_qk_tanh_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_256_qk_tanh_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_16_S_qkv_256_causal_qk_tanh_sm89_kernel_nl", 49152, 128, 64, 1, 0, false, true, false, true, true, false, true},
{ DATA_TYPE_BF16, 0, 64, 16, 256, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_256_qk_tanh_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_256_qk_tanh_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_16_S_qkv_256_sliding_window_causal_qk_tanh_sm89_kernel_nl", 49152, 128, 64, 2, 0, false, true, false, true, true, false, true},
{ DATA_TYPE_BF16, 0, 64, 16, 256, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_256_qk_tanh_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_256_qk_tanh_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_16_S_qkv_256_custom_mask_qk_tanh_sm89_kernel_nl", 49152, 128, 64, 3, 0, false, true, false, true, true, false, true},
{ DATA_TYPE_BF16, 0, 128, 128, 32, kSM_89,  cubin_fmha_v2_flash_attention_bf16_128_128_S_q_kv_32_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_q_kv_32_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_q_kv_32_sm89_kernel_nl_tiled", 32768, 128, 128, 0, 1, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 32, kSM_89,  cubin_fmha_v2_flash_attention_bf16_128_128_S_q_kv_32_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_q_kv_32_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_q_kv_32_custom_mask_sm89_kernel_nl_tiled", 32768, 128, 128, 3, 1, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 64, kSM_89,  cubin_fmha_v2_flash_attention_bf16_128_128_S_q_kv_64_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_q_kv_64_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_q_kv_64_sm89_kernel_nl_tiled", 65536, 128, 128, 0, 1, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 64, kSM_89,  cubin_fmha_v2_flash_attention_bf16_128_128_S_q_kv_64_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_q_kv_64_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_q_kv_64_custom_mask_sm89_kernel_nl_tiled", 65536, 128, 128, 3, 1, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 128, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_128_S_q_kv_128_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_q_kv_128_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_q_kv_128_sm89_kernel_nl_tiled", 81920, 128, 64, 0, 1, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 128, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_128_S_q_kv_128_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_q_kv_128_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_q_kv_128_custom_mask_sm89_kernel_nl_tiled", 81920, 128, 64, 3, 1, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 64, 32, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_64_S_q_kv_32_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_64_S_q_kv_32_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_64_S_q_kv_32_sm89_kernel_nl", 12288, 128, 64, 0, 1, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 64, 32, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_64_S_q_kv_32_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_64_S_q_kv_32_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_64_S_q_kv_32_custom_mask_sm89_kernel_nl", 12288, 128, 64, 3, 1, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 64, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_32_S_q_kv_64_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_q_kv_64_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_q_kv_64_sm89_kernel_nl", 16384, 128, 64, 0, 1, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 64, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_32_S_q_kv_64_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_q_kv_64_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_q_kv_64_custom_mask_sm89_kernel_nl", 16384, 128, 64, 3, 1, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 128, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_32_S_q_kv_128_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_q_kv_128_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_q_kv_128_sm89_kernel_nl", 32768, 128, 64, 0, 1, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 128, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_32_S_q_kv_128_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_q_kv_128_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_q_kv_128_custom_mask_sm89_kernel_nl", 32768, 128, 64, 3, 1, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 128, 128, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_128_S_q_kv_128_qk_tanh_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_q_kv_128_qk_tanh_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_q_kv_128_qk_tanh_sm89_kernel_nl_tiled", 81920, 128, 64, 0, 1, false, true, false, true, true, true, true},
{ DATA_TYPE_BF16, 0, 64, 128, 128, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_128_S_q_kv_128_qk_tanh_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_q_kv_128_qk_tanh_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_q_kv_128_custom_mask_qk_tanh_sm89_kernel_nl_tiled", 81920, 128, 64, 3, 1, false, true, false, true, true, true, true},
{ DATA_TYPE_BF16, 0, 64, 32, 128, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_32_S_q_kv_128_qk_tanh_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_q_kv_128_qk_tanh_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_q_kv_128_qk_tanh_sm89_kernel_nl", 32768, 128, 64, 0, 1, false, true, false, true, true, false, true},
{ DATA_TYPE_BF16, 0, 64, 32, 128, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_32_S_q_kv_128_qk_tanh_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_q_kv_128_qk_tanh_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_q_kv_128_custom_mask_qk_tanh_sm89_kernel_nl", 32768, 128, 64, 3, 1, false, true, false, true, true, false, true},
{ DATA_TYPE_BF16, 0, 128, 128, 16, kSM_89,  cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_16_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_16_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_16_causal_sm89_kernel_nl_tiled", 16384, 128, 128, 1, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 16, kSM_89,  cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_16_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_16_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_16_sliding_window_causal_sm89_kernel_nl_tiled", 16384, 128, 128, 2, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 16, kSM_89,  cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_16_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_16_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_16_custom_mask_sm89_kernel_nl_tiled", 16384, 128, 128, 3, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 32, kSM_89,  cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_32_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_32_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_32_sm89_kernel_nl_tiled", 32768, 128, 128, 0, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 32, kSM_89,  cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_32_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_32_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_32_causal_sm89_kernel_nl_tiled", 32768, 128, 128, 1, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 32, kSM_89,  cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_32_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_32_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_32_sliding_window_causal_sm89_kernel_nl_tiled", 32768, 128, 128, 2, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 32, kSM_89,  cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_32_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_32_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_32_custom_mask_sm89_kernel_nl_tiled", 32768, 128, 128, 3, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 40, kSM_89,  cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_40_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_40_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_40_causal_sm89_kernel_nl_tiled", 65536, 128, 128, 1, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 40, kSM_89,  cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_40_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_40_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_40_sliding_window_causal_sm89_kernel_nl_tiled", 65536, 128, 128, 2, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 40, kSM_89,  cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_40_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_40_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_40_custom_mask_sm89_kernel_nl_tiled", 65536, 128, 128, 3, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 48, kSM_89,  cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_48_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_48_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_48_causal_sm89_kernel_nl_tiled", 65536, 128, 128, 1, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 48, kSM_89,  cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_48_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_48_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_48_sliding_window_causal_sm89_kernel_nl_tiled", 65536, 128, 128, 2, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 48, kSM_89,  cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_48_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_48_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_48_custom_mask_sm89_kernel_nl_tiled", 65536, 128, 128, 3, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 64, kSM_89,  cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_64_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_64_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_64_sm89_kernel_nl_tiled", 65536, 128, 128, 0, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 64, kSM_89,  cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_64_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_64_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_64_causal_sm89_kernel_nl_tiled", 65536, 128, 128, 1, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 64, kSM_89,  cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_64_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_64_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_64_sliding_window_causal_sm89_kernel_nl_tiled", 65536, 128, 128, 2, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 64, kSM_89,  cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_64_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_64_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_64_custom_mask_sm89_kernel_nl_tiled", 65536, 128, 128, 3, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 80, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_80_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_80_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_80_causal_sm89_kernel_nl_tiled", 81920, 128, 64, 1, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 80, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_80_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_80_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_80_sliding_window_causal_sm89_kernel_nl_tiled", 81920, 128, 64, 2, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 80, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_80_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_80_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_80_custom_mask_sm89_kernel_nl_tiled", 81920, 128, 64, 3, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 96, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_96_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_96_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_96_causal_sm89_kernel_nl_tiled", 81920, 128, 64, 1, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 96, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_96_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_96_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_96_sliding_window_causal_sm89_kernel_nl_tiled", 81920, 128, 64, 2, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 96, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_96_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_96_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_96_custom_mask_sm89_kernel_nl_tiled", 81920, 128, 64, 3, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 104, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_104_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_104_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_104_causal_sm89_kernel_nl_tiled", 81920, 128, 64, 1, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 104, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_104_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_104_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_104_sliding_window_causal_sm89_kernel_nl_tiled", 81920, 128, 64, 2, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 104, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_104_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_104_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_104_custom_mask_sm89_kernel_nl_tiled", 81920, 128, 64, 3, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 128, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_sm89_kernel_nl_tiled", 81920, 128, 64, 0, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 128, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_causal_sm89_kernel_nl_tiled", 81920, 128, 64, 1, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 128, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_sliding_window_causal_sm89_kernel_nl_tiled", 81920, 128, 64, 2, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 128, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_custom_mask_sm89_kernel_nl_tiled", 81920, 128, 64, 3, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 160, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_160_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_160_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_160_causal_sm89_kernel_nl_tiled", 81920, 128, 64, 1, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 160, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_160_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_160_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_160_sliding_window_causal_sm89_kernel_nl_tiled", 81920, 128, 64, 2, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 160, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_160_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_160_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_160_custom_mask_sm89_kernel_nl_tiled", 81920, 128, 64, 3, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 192, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_192_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_192_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_192_causal_sm89_kernel_nl_tiled", 81920, 128, 64, 1, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 192, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_192_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_192_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_192_sliding_window_causal_sm89_kernel_nl_tiled", 81920, 128, 64, 2, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 192, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_192_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_192_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_192_custom_mask_sm89_kernel_nl_tiled", 81920, 128, 64, 3, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 256, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_256_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_256_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_256_causal_sm89_kernel_nl_tiled", 81920, 128, 64, 1, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 256, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_256_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_256_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_256_sliding_window_causal_sm89_kernel_nl_tiled", 81920, 128, 64, 2, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 256, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_256_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_256_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_256_custom_mask_sm89_kernel_nl_tiled", 81920, 128, 64, 3, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 64, 16, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_16_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_16_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_16_causal_sm89_kernel_nl", 6144, 128, 64, 1, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 64, 16, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_16_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_16_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_16_sliding_window_causal_sm89_kernel_nl", 6144, 128, 64, 2, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 64, 16, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_16_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_16_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_16_custom_mask_sm89_kernel_nl", 6144, 128, 64, 3, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 64, 32, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_32_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_32_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_32_sm89_kernel_nl", 12288, 128, 64, 0, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 64, 32, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_32_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_32_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_32_causal_sm89_kernel_nl", 12288, 128, 64, 1, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 64, 32, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_32_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_32_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_32_sliding_window_causal_sm89_kernel_nl", 12288, 128, 64, 2, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 64, 32, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_32_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_32_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_32_custom_mask_sm89_kernel_nl", 12288, 128, 64, 3, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 40, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_40_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_40_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_40_causal_sm89_kernel_nl", 16384, 128, 64, 1, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 40, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_40_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_40_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_40_sliding_window_causal_sm89_kernel_nl", 16384, 128, 64, 2, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 40, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_40_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_40_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_40_custom_mask_sm89_kernel_nl", 16384, 128, 64, 3, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 48, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_48_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_48_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_48_causal_sm89_kernel_nl", 16384, 128, 64, 1, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 48, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_48_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_48_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_48_sliding_window_causal_sm89_kernel_nl", 16384, 128, 64, 2, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 48, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_48_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_48_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_48_custom_mask_sm89_kernel_nl", 16384, 128, 64, 3, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 64, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_64_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_64_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_64_sm89_kernel_nl", 16384, 128, 64, 0, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 64, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_64_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_64_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_64_causal_sm89_kernel_nl", 16384, 128, 64, 1, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 64, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_64_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_64_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_64_sliding_window_causal_sm89_kernel_nl", 16384, 128, 64, 2, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 64, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_64_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_64_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_64_custom_mask_sm89_kernel_nl", 16384, 128, 64, 3, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 80, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_80_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_80_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_80_causal_sm89_kernel_nl", 32768, 128, 64, 1, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 80, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_80_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_80_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_80_sliding_window_causal_sm89_kernel_nl", 32768, 128, 64, 2, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 80, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_80_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_80_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_80_custom_mask_sm89_kernel_nl", 32768, 128, 64, 3, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 96, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_96_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_96_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_96_causal_sm89_kernel_nl", 32768, 128, 64, 1, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 96, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_96_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_96_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_96_sliding_window_causal_sm89_kernel_nl", 32768, 128, 64, 2, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 96, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_96_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_96_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_96_custom_mask_sm89_kernel_nl", 32768, 128, 64, 3, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 104, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_104_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_104_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_104_causal_sm89_kernel_nl", 32768, 128, 64, 1, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 104, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_104_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_104_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_104_sliding_window_causal_sm89_kernel_nl", 32768, 128, 64, 2, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 104, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_104_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_104_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_104_custom_mask_sm89_kernel_nl", 32768, 128, 64, 3, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 128, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_128_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_128_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_128_sm89_kernel_nl", 32768, 128, 64, 0, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 128, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_128_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_128_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_128_causal_sm89_kernel_nl", 32768, 128, 64, 1, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 128, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_128_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_128_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_128_sliding_window_causal_sm89_kernel_nl", 32768, 128, 64, 2, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 128, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_128_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_128_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_128_custom_mask_sm89_kernel_nl", 32768, 128, 64, 3, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 16, 160, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_160_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_160_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_160_causal_sm89_kernel_nl", 49152, 128, 64, 1, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 16, 160, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_160_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_160_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_160_sliding_window_causal_sm89_kernel_nl", 49152, 128, 64, 2, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 16, 160, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_160_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_160_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_160_custom_mask_sm89_kernel_nl", 49152, 128, 64, 3, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 16, 192, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_192_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_192_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_192_causal_sm89_kernel_nl", 49152, 128, 64, 1, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 16, 192, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_192_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_192_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_192_sliding_window_causal_sm89_kernel_nl", 49152, 128, 64, 2, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 16, 192, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_192_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_192_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_192_custom_mask_sm89_kernel_nl", 49152, 128, 64, 3, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 16, 256, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_256_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_256_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_256_causal_sm89_kernel_nl", 49152, 128, 64, 1, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 16, 256, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_256_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_256_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_256_sliding_window_causal_sm89_kernel_nl", 49152, 128, 64, 2, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 16, 256, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_256_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_256_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_256_custom_mask_sm89_kernel_nl", 49152, 128, 64, 3, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 128, 128, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_qk_tanh_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_qk_tanh_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_qk_tanh_sm89_kernel_nl_tiled", 81920, 128, 64, 0, 2, false, true, false, true, true, true, true},
{ DATA_TYPE_BF16, 0, 64, 128, 128, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_qk_tanh_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_qk_tanh_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_causal_qk_tanh_sm89_kernel_nl_tiled", 81920, 128, 64, 1, 2, false, true, false, true, true, true, true},
{ DATA_TYPE_BF16, 0, 64, 128, 128, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_qk_tanh_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_qk_tanh_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_sliding_window_causal_qk_tanh_sm89_kernel_nl_tiled", 81920, 128, 64, 2, 2, false, true, false, true, true, true, true},
{ DATA_TYPE_BF16, 0, 64, 128, 128, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_qk_tanh_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_qk_tanh_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_custom_mask_qk_tanh_sm89_kernel_nl_tiled", 81920, 128, 64, 3, 2, false, true, false, true, true, true, true},
{ DATA_TYPE_BF16, 0, 64, 128, 256, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_256_qk_tanh_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_256_qk_tanh_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_256_causal_qk_tanh_sm89_kernel_nl_tiled", 81920, 128, 64, 1, 2, false, true, false, true, true, true, true},
{ DATA_TYPE_BF16, 0, 64, 128, 256, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_256_qk_tanh_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_256_qk_tanh_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_256_sliding_window_causal_qk_tanh_sm89_kernel_nl_tiled", 81920, 128, 64, 2, 2, false, true, false, true, true, true, true},
{ DATA_TYPE_BF16, 0, 64, 128, 256, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_256_qk_tanh_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_256_qk_tanh_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_256_custom_mask_qk_tanh_sm89_kernel_nl_tiled", 81920, 128, 64, 3, 2, false, true, false, true, true, true, true},
{ DATA_TYPE_BF16, 0, 64, 32, 128, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_128_qk_tanh_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_128_qk_tanh_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_128_qk_tanh_sm89_kernel_nl", 32768, 128, 64, 0, 2, false, true, false, true, true, false, true},
{ DATA_TYPE_BF16, 0, 64, 32, 128, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_128_qk_tanh_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_128_qk_tanh_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_128_causal_qk_tanh_sm89_kernel_nl", 32768, 128, 64, 1, 2, false, true, false, true, true, false, true},
{ DATA_TYPE_BF16, 0, 64, 32, 128, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_128_qk_tanh_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_128_qk_tanh_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_128_sliding_window_causal_qk_tanh_sm89_kernel_nl", 32768, 128, 64, 2, 2, false, true, false, true, true, false, true},
{ DATA_TYPE_BF16, 0, 64, 32, 128, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_128_qk_tanh_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_128_qk_tanh_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_128_custom_mask_qk_tanh_sm89_kernel_nl", 32768, 128, 64, 3, 2, false, true, false, true, true, false, true},
{ DATA_TYPE_BF16, 0, 64, 16, 256, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_256_qk_tanh_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_256_qk_tanh_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_256_causal_qk_tanh_sm89_kernel_nl", 49152, 128, 64, 1, 2, false, true, false, true, true, false, true},
{ DATA_TYPE_BF16, 0, 64, 16, 256, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_256_qk_tanh_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_256_qk_tanh_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_256_sliding_window_causal_qk_tanh_sm89_kernel_nl", 49152, 128, 64, 2, 2, false, true, false, true, true, false, true},
{ DATA_TYPE_BF16, 0, 64, 16, 256, kSM_89,  cubin_fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_256_qk_tanh_sm89_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_256_qk_tanh_sm89_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_256_custom_mask_qk_tanh_sm89_kernel_nl", 49152, 128, 64, 3, 2, false, true, false, true, true, false, true},
{ DATA_TYPE_FP16, 0, 128, 128, 16, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_16_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_16_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_16_causal_sm89_kernel_nl_tiled", 16384, 128, 128, 1, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 16, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_16_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_16_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_16_sliding_window_causal_sm89_kernel_nl_tiled", 16384, 128, 128, 2, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 16, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_16_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_16_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_16_custom_mask_sm89_kernel_nl_tiled", 16384, 128, 128, 3, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 32, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_32_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_32_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_32_sm89_kernel_nl_tiled", 32768, 128, 128, 0, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 32, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_32_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_32_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_32_causal_sm89_kernel_nl_tiled", 32768, 128, 128, 1, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 32, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_32_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_32_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_32_sliding_window_causal_sm89_kernel_nl_tiled", 32768, 128, 128, 2, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 32, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_32_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_32_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_32_custom_mask_sm89_kernel_nl_tiled", 32768, 128, 128, 3, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 40, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_40_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_40_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_40_causal_sm89_kernel_nl_tiled", 65536, 128, 128, 1, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 40, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_40_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_40_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_40_sliding_window_causal_sm89_kernel_nl_tiled", 65536, 128, 128, 2, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 40, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_40_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_40_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_40_custom_mask_sm89_kernel_nl_tiled", 65536, 128, 128, 3, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 48, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_48_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_48_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_48_causal_sm89_kernel_nl_tiled", 65536, 128, 128, 1, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 48, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_48_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_48_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_48_sliding_window_causal_sm89_kernel_nl_tiled", 65536, 128, 128, 2, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 48, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_48_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_48_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_48_custom_mask_sm89_kernel_nl_tiled", 65536, 128, 128, 3, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 64, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_64_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_64_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_64_sm89_kernel_nl_tiled", 65536, 128, 128, 0, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 64, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_64_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_64_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_64_causal_sm89_kernel_nl_tiled", 65536, 128, 128, 1, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 64, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_64_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_64_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_64_sliding_window_causal_sm89_kernel_nl_tiled", 65536, 128, 128, 2, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 64, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_64_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_64_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_64_custom_mask_sm89_kernel_nl_tiled", 65536, 128, 128, 3, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 80, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_80_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_80_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_80_causal_sm89_kernel_nl_tiled", 81920, 128, 64, 1, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 80, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_80_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_80_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_80_sliding_window_causal_sm89_kernel_nl_tiled", 81920, 128, 64, 2, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 80, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_80_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_80_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_80_custom_mask_sm89_kernel_nl_tiled", 81920, 128, 64, 3, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 96, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_96_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_96_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_96_causal_sm89_kernel_nl_tiled", 81920, 128, 64, 1, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 96, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_96_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_96_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_96_sliding_window_causal_sm89_kernel_nl_tiled", 81920, 128, 64, 2, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 96, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_96_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_96_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_96_custom_mask_sm89_kernel_nl_tiled", 81920, 128, 64, 3, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 104, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_104_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_104_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_104_causal_sm89_kernel_nl_tiled", 81920, 128, 64, 1, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 104, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_104_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_104_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_104_sliding_window_causal_sm89_kernel_nl_tiled", 81920, 128, 64, 2, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 104, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_104_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_104_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_104_custom_mask_sm89_kernel_nl_tiled", 81920, 128, 64, 3, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_causal_sm89_kernel_nl_tiled", 81920, 128, 64, 1, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_sliding_window_causal_sm89_kernel_nl_tiled", 81920, 128, 64, 2, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_custom_mask_sm89_kernel_nl_tiled", 81920, 128, 64, 3, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 160, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_160_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_160_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_160_causal_sm89_kernel_nl_tiled", 81920, 128, 64, 1, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 160, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_160_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_160_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_160_sliding_window_causal_sm89_kernel_nl_tiled", 81920, 128, 64, 2, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 160, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_160_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_160_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_160_custom_mask_sm89_kernel_nl_tiled", 81920, 128, 64, 3, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 192, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_192_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_192_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_192_causal_sm89_kernel_nl_tiled", 81920, 128, 64, 1, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 192, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_192_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_192_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_192_sliding_window_causal_sm89_kernel_nl_tiled", 81920, 128, 64, 2, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 192, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_192_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_192_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_192_custom_mask_sm89_kernel_nl_tiled", 81920, 128, 64, 3, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 256, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_256_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_256_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_256_causal_sm89_kernel_nl_tiled", 81920, 128, 64, 1, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 256, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_256_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_256_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_256_sliding_window_causal_sm89_kernel_nl_tiled", 81920, 128, 64, 2, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 256, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_256_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_256_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_256_custom_mask_sm89_kernel_nl_tiled", 81920, 128, 64, 3, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 64, 16, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_16_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_16_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_16_causal_sm89_kernel_nl", 6144, 128, 64, 1, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 16, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_16_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_16_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_16_sliding_window_causal_sm89_kernel_nl", 6144, 128, 64, 2, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 16, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_16_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_16_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_16_custom_mask_sm89_kernel_nl", 6144, 128, 64, 3, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 32, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_32_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_32_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_32_sm89_kernel_nl", 12288, 128, 64, 0, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 32, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_32_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_32_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_32_causal_sm89_kernel_nl", 12288, 128, 64, 1, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 32, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_32_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_32_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_32_sliding_window_causal_sm89_kernel_nl", 12288, 128, 64, 2, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 32, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_32_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_32_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_32_custom_mask_sm89_kernel_nl", 12288, 128, 64, 3, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 40, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_40_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_40_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_40_causal_sm89_kernel_nl", 16384, 128, 64, 1, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 40, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_40_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_40_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_40_sliding_window_causal_sm89_kernel_nl", 16384, 128, 64, 2, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 40, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_40_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_40_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_40_custom_mask_sm89_kernel_nl", 16384, 128, 64, 3, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 48, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_48_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_48_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_48_causal_sm89_kernel_nl", 16384, 128, 64, 1, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 48, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_48_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_48_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_48_sliding_window_causal_sm89_kernel_nl", 16384, 128, 64, 2, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 48, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_48_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_48_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_48_custom_mask_sm89_kernel_nl", 16384, 128, 64, 3, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 64, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_64_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_64_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_64_sm89_kernel_nl", 16384, 128, 64, 0, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 64, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_64_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_64_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_64_causal_sm89_kernel_nl", 16384, 128, 64, 1, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 64, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_64_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_64_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_64_sliding_window_causal_sm89_kernel_nl", 16384, 128, 64, 2, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 64, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_64_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_64_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_64_custom_mask_sm89_kernel_nl", 16384, 128, 64, 3, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 80, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_80_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_80_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_80_causal_sm89_kernel_nl", 32768, 128, 64, 1, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 80, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_80_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_80_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_80_sliding_window_causal_sm89_kernel_nl", 32768, 128, 64, 2, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 80, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_80_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_80_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_80_custom_mask_sm89_kernel_nl", 32768, 128, 64, 3, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 96, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_96_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_96_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_96_causal_sm89_kernel_nl", 32768, 128, 64, 1, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 96, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_96_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_96_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_96_sliding_window_causal_sm89_kernel_nl", 32768, 128, 64, 2, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 96, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_96_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_96_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_96_custom_mask_sm89_kernel_nl", 32768, 128, 64, 3, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 104, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_104_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_104_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_104_causal_sm89_kernel_nl", 32768, 128, 64, 1, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 104, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_104_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_104_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_104_sliding_window_causal_sm89_kernel_nl", 32768, 128, 64, 2, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 104, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_104_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_104_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_104_custom_mask_sm89_kernel_nl", 32768, 128, 64, 3, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_128_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_128_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_128_causal_sm89_kernel_nl", 32768, 128, 64, 1, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_128_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_128_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_128_sliding_window_causal_sm89_kernel_nl", 32768, 128, 64, 2, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_128_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_128_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_128_custom_mask_sm89_kernel_nl", 32768, 128, 64, 3, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 160, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_160_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_160_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_160_causal_sm89_kernel_nl", 49152, 128, 64, 1, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 160, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_160_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_160_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_160_sliding_window_causal_sm89_kernel_nl", 49152, 128, 64, 2, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 160, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_160_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_160_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_160_custom_mask_sm89_kernel_nl", 49152, 128, 64, 3, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 192, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_192_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_192_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_192_causal_sm89_kernel_nl", 49152, 128, 64, 1, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 192, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_192_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_192_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_192_sliding_window_causal_sm89_kernel_nl", 49152, 128, 64, 2, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 192, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_192_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_192_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_192_custom_mask_sm89_kernel_nl", 49152, 128, 64, 3, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 256, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_256_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_256_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_256_causal_sm89_kernel_nl", 49152, 128, 64, 1, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 256, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_256_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_256_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_256_sliding_window_causal_sm89_kernel_nl", 49152, 128, 64, 2, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 256, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_256_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_256_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_256_custom_mask_sm89_kernel_nl", 49152, 128, 64, 3, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_qk_tanh_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_qk_tanh_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_causal_qk_tanh_sm89_kernel_nl_tiled", 81920, 128, 64, 1, 0, false, true, false, true, true, true, true},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_qk_tanh_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_qk_tanh_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_sliding_window_causal_qk_tanh_sm89_kernel_nl_tiled", 81920, 128, 64, 2, 0, false, true, false, true, true, true, true},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_qk_tanh_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_qk_tanh_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_custom_mask_qk_tanh_sm89_kernel_nl_tiled", 81920, 128, 64, 3, 0, false, true, false, true, true, true, true},
{ DATA_TYPE_FP16, 0, 64, 128, 256, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_256_qk_tanh_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_256_qk_tanh_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_256_causal_qk_tanh_sm89_kernel_nl_tiled", 81920, 128, 64, 1, 0, false, true, false, true, true, true, true},
{ DATA_TYPE_FP16, 0, 64, 128, 256, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_256_qk_tanh_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_256_qk_tanh_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_256_sliding_window_causal_qk_tanh_sm89_kernel_nl_tiled", 81920, 128, 64, 2, 0, false, true, false, true, true, true, true},
{ DATA_TYPE_FP16, 0, 64, 128, 256, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_256_qk_tanh_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_256_qk_tanh_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_256_custom_mask_qk_tanh_sm89_kernel_nl_tiled", 81920, 128, 64, 3, 0, false, true, false, true, true, true, true},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_128_qk_tanh_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_128_qk_tanh_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_128_causal_qk_tanh_sm89_kernel_nl", 32768, 128, 64, 1, 0, false, true, false, true, true, false, true},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_128_qk_tanh_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_128_qk_tanh_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_128_sliding_window_causal_qk_tanh_sm89_kernel_nl", 32768, 128, 64, 2, 0, false, true, false, true, true, false, true},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_128_qk_tanh_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_128_qk_tanh_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_128_custom_mask_qk_tanh_sm89_kernel_nl", 32768, 128, 64, 3, 0, false, true, false, true, true, false, true},
{ DATA_TYPE_FP16, 0, 64, 16, 256, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_256_qk_tanh_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_256_qk_tanh_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_256_causal_qk_tanh_sm89_kernel_nl", 49152, 128, 64, 1, 0, false, true, false, true, true, false, true},
{ DATA_TYPE_FP16, 0, 64, 16, 256, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_256_qk_tanh_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_256_qk_tanh_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_256_sliding_window_causal_qk_tanh_sm89_kernel_nl", 49152, 128, 64, 2, 0, false, true, false, true, true, false, true},
{ DATA_TYPE_FP16, 0, 64, 16, 256, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_256_qk_tanh_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_256_qk_tanh_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_256_custom_mask_qk_tanh_sm89_kernel_nl", 49152, 128, 64, 3, 0, false, true, false, true, true, false, true},
{ DATA_TYPE_FP16, 0, 128, 128, 32, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_kv_32_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_kv_32_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_128_128_S_q_kv_32_sm89_kernel_nl_tiled", 32768, 128, 128, 0, 1, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 32, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_kv_32_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_kv_32_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_128_128_S_q_kv_32_custom_mask_sm89_kernel_nl_tiled", 32768, 128, 128, 3, 1, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 64, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_kv_64_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_kv_64_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_128_128_S_q_kv_64_sm89_kernel_nl_tiled", 65536, 128, 128, 0, 1, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 64, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_kv_64_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_kv_64_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_128_128_S_q_kv_64_custom_mask_sm89_kernel_nl_tiled", 65536, 128, 128, 3, 1, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_kv_128_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_kv_128_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_q_kv_128_sm89_kernel_nl_tiled", 81920, 128, 64, 0, 1, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_kv_128_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_kv_128_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_q_kv_128_custom_mask_sm89_kernel_nl_tiled", 81920, 128, 64, 3, 1, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 64, 32, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_kv_32_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_kv_32_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_64_S_q_kv_32_sm89_kernel_nl", 12288, 128, 64, 0, 1, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 32, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_kv_32_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_kv_32_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_64_S_q_kv_32_custom_mask_sm89_kernel_nl", 12288, 128, 64, 3, 1, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 64, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_kv_64_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_kv_64_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_q_kv_64_sm89_kernel_nl", 16384, 128, 64, 0, 1, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 64, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_kv_64_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_kv_64_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_q_kv_64_custom_mask_sm89_kernel_nl", 16384, 128, 64, 3, 1, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_kv_128_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_kv_128_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_q_kv_128_sm89_kernel_nl", 32768, 128, 64, 0, 1, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_kv_128_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_kv_128_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_q_kv_128_custom_mask_sm89_kernel_nl", 32768, 128, 64, 3, 1, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_kv_128_qk_tanh_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_kv_128_qk_tanh_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_q_kv_128_qk_tanh_sm89_kernel_nl_tiled", 81920, 128, 64, 0, 1, false, true, false, true, true, true, true},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_kv_128_qk_tanh_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_kv_128_qk_tanh_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_q_kv_128_custom_mask_qk_tanh_sm89_kernel_nl_tiled", 81920, 128, 64, 3, 1, false, true, false, true, true, true, true},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_kv_128_qk_tanh_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_kv_128_qk_tanh_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_q_kv_128_qk_tanh_sm89_kernel_nl", 32768, 128, 64, 0, 1, false, true, false, true, true, false, true},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_kv_128_qk_tanh_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_kv_128_qk_tanh_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_q_kv_128_custom_mask_qk_tanh_sm89_kernel_nl", 32768, 128, 64, 3, 1, false, true, false, true, true, false, true},
{ DATA_TYPE_FP16, 0, 128, 128, 16, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_16_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_16_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_16_causal_sm89_kernel_nl_tiled", 16384, 128, 128, 1, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 16, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_16_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_16_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_16_sliding_window_causal_sm89_kernel_nl_tiled", 16384, 128, 128, 2, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 16, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_16_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_16_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_16_custom_mask_sm89_kernel_nl_tiled", 16384, 128, 128, 3, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 32, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_32_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_32_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_32_sm89_kernel_nl_tiled", 32768, 128, 128, 0, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 32, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_32_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_32_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_32_causal_sm89_kernel_nl_tiled", 32768, 128, 128, 1, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 32, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_32_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_32_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_32_sliding_window_causal_sm89_kernel_nl_tiled", 32768, 128, 128, 2, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 32, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_32_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_32_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_32_custom_mask_sm89_kernel_nl_tiled", 32768, 128, 128, 3, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 40, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_40_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_40_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_40_causal_sm89_kernel_nl_tiled", 65536, 128, 128, 1, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 40, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_40_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_40_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_40_sliding_window_causal_sm89_kernel_nl_tiled", 65536, 128, 128, 2, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 40, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_40_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_40_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_40_custom_mask_sm89_kernel_nl_tiled", 65536, 128, 128, 3, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 48, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_48_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_48_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_48_causal_sm89_kernel_nl_tiled", 65536, 128, 128, 1, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 48, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_48_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_48_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_48_sliding_window_causal_sm89_kernel_nl_tiled", 65536, 128, 128, 2, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 48, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_48_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_48_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_48_custom_mask_sm89_kernel_nl_tiled", 65536, 128, 128, 3, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 64, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_64_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_64_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_64_sm89_kernel_nl_tiled", 65536, 128, 128, 0, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 64, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_64_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_64_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_64_causal_sm89_kernel_nl_tiled", 65536, 128, 128, 1, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 64, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_64_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_64_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_64_sliding_window_causal_sm89_kernel_nl_tiled", 65536, 128, 128, 2, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 64, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_64_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_64_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_64_custom_mask_sm89_kernel_nl_tiled", 65536, 128, 128, 3, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 80, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_80_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_80_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_80_causal_sm89_kernel_nl_tiled", 81920, 128, 64, 1, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 80, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_80_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_80_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_80_sliding_window_causal_sm89_kernel_nl_tiled", 81920, 128, 64, 2, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 80, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_80_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_80_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_80_custom_mask_sm89_kernel_nl_tiled", 81920, 128, 64, 3, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 96, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_96_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_96_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_96_causal_sm89_kernel_nl_tiled", 81920, 128, 64, 1, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 96, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_96_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_96_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_96_sliding_window_causal_sm89_kernel_nl_tiled", 81920, 128, 64, 2, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 96, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_96_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_96_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_96_custom_mask_sm89_kernel_nl_tiled", 81920, 128, 64, 3, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 104, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_104_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_104_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_104_causal_sm89_kernel_nl_tiled", 81920, 128, 64, 1, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 104, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_104_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_104_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_104_sliding_window_causal_sm89_kernel_nl_tiled", 81920, 128, 64, 2, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 104, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_104_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_104_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_104_custom_mask_sm89_kernel_nl_tiled", 81920, 128, 64, 3, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_sm89_kernel_nl_tiled", 81920, 128, 64, 0, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_causal_sm89_kernel_nl_tiled", 81920, 128, 64, 1, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_sliding_window_causal_sm89_kernel_nl_tiled", 81920, 128, 64, 2, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_custom_mask_sm89_kernel_nl_tiled", 81920, 128, 64, 3, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 160, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_160_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_160_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_160_causal_sm89_kernel_nl_tiled", 81920, 128, 64, 1, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 160, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_160_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_160_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_160_sliding_window_causal_sm89_kernel_nl_tiled", 81920, 128, 64, 2, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 160, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_160_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_160_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_160_custom_mask_sm89_kernel_nl_tiled", 81920, 128, 64, 3, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 192, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_192_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_192_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_192_causal_sm89_kernel_nl_tiled", 81920, 128, 64, 1, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 192, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_192_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_192_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_192_sliding_window_causal_sm89_kernel_nl_tiled", 81920, 128, 64, 2, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 192, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_192_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_192_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_192_custom_mask_sm89_kernel_nl_tiled", 81920, 128, 64, 3, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 256, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_256_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_256_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_256_causal_sm89_kernel_nl_tiled", 81920, 128, 64, 1, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 256, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_256_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_256_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_256_sliding_window_causal_sm89_kernel_nl_tiled", 81920, 128, 64, 2, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 256, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_256_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_256_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_256_custom_mask_sm89_kernel_nl_tiled", 81920, 128, 64, 3, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 64, 16, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_16_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_16_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_16_causal_sm89_kernel_nl", 6144, 128, 64, 1, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 16, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_16_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_16_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_16_sliding_window_causal_sm89_kernel_nl", 6144, 128, 64, 2, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 16, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_16_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_16_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_16_custom_mask_sm89_kernel_nl", 6144, 128, 64, 3, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 32, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_32_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_32_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_32_sm89_kernel_nl", 12288, 128, 64, 0, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 32, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_32_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_32_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_32_causal_sm89_kernel_nl", 12288, 128, 64, 1, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 32, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_32_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_32_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_32_sliding_window_causal_sm89_kernel_nl", 12288, 128, 64, 2, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 32, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_32_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_32_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_32_custom_mask_sm89_kernel_nl", 12288, 128, 64, 3, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 40, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_40_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_40_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_40_causal_sm89_kernel_nl", 16384, 128, 64, 1, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 40, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_40_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_40_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_40_sliding_window_causal_sm89_kernel_nl", 16384, 128, 64, 2, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 40, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_40_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_40_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_40_custom_mask_sm89_kernel_nl", 16384, 128, 64, 3, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 48, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_48_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_48_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_48_causal_sm89_kernel_nl", 16384, 128, 64, 1, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 48, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_48_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_48_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_48_sliding_window_causal_sm89_kernel_nl", 16384, 128, 64, 2, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 48, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_48_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_48_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_48_custom_mask_sm89_kernel_nl", 16384, 128, 64, 3, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 64, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_64_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_64_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_64_sm89_kernel_nl", 16384, 128, 64, 0, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 64, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_64_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_64_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_64_causal_sm89_kernel_nl", 16384, 128, 64, 1, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 64, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_64_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_64_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_64_sliding_window_causal_sm89_kernel_nl", 16384, 128, 64, 2, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 64, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_64_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_64_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_64_custom_mask_sm89_kernel_nl", 16384, 128, 64, 3, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 80, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_80_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_80_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_80_causal_sm89_kernel_nl", 32768, 128, 64, 1, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 80, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_80_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_80_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_80_sliding_window_causal_sm89_kernel_nl", 32768, 128, 64, 2, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 80, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_80_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_80_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_80_custom_mask_sm89_kernel_nl", 32768, 128, 64, 3, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 96, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_96_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_96_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_96_causal_sm89_kernel_nl", 32768, 128, 64, 1, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 96, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_96_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_96_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_96_sliding_window_causal_sm89_kernel_nl", 32768, 128, 64, 2, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 96, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_96_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_96_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_96_custom_mask_sm89_kernel_nl", 32768, 128, 64, 3, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 104, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_104_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_104_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_104_causal_sm89_kernel_nl", 32768, 128, 64, 1, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 104, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_104_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_104_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_104_sliding_window_causal_sm89_kernel_nl", 32768, 128, 64, 2, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 104, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_104_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_104_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_104_custom_mask_sm89_kernel_nl", 32768, 128, 64, 3, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_128_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_128_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_128_sm89_kernel_nl", 32768, 128, 64, 0, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_128_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_128_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_128_causal_sm89_kernel_nl", 32768, 128, 64, 1, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_128_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_128_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_128_sliding_window_causal_sm89_kernel_nl", 32768, 128, 64, 2, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_128_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_128_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_128_custom_mask_sm89_kernel_nl", 32768, 128, 64, 3, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 160, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_160_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_160_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_160_causal_sm89_kernel_nl", 49152, 128, 64, 1, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 160, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_160_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_160_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_160_sliding_window_causal_sm89_kernel_nl", 49152, 128, 64, 2, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 160, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_160_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_160_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_160_custom_mask_sm89_kernel_nl", 49152, 128, 64, 3, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 192, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_192_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_192_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_192_causal_sm89_kernel_nl", 49152, 128, 64, 1, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 192, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_192_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_192_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_192_sliding_window_causal_sm89_kernel_nl", 49152, 128, 64, 2, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 192, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_192_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_192_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_192_custom_mask_sm89_kernel_nl", 49152, 128, 64, 3, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 256, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_256_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_256_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_256_causal_sm89_kernel_nl", 49152, 128, 64, 1, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 256, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_256_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_256_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_256_sliding_window_causal_sm89_kernel_nl", 49152, 128, 64, 2, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 256, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_256_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_256_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_256_custom_mask_sm89_kernel_nl", 49152, 128, 64, 3, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_qk_tanh_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_qk_tanh_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_qk_tanh_sm89_kernel_nl_tiled", 81920, 128, 64, 0, 2, false, true, false, true, true, true, true},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_qk_tanh_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_qk_tanh_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_causal_qk_tanh_sm89_kernel_nl_tiled", 81920, 128, 64, 1, 2, false, true, false, true, true, true, true},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_qk_tanh_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_qk_tanh_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_sliding_window_causal_qk_tanh_sm89_kernel_nl_tiled", 81920, 128, 64, 2, 2, false, true, false, true, true, true, true},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_qk_tanh_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_qk_tanh_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_custom_mask_qk_tanh_sm89_kernel_nl_tiled", 81920, 128, 64, 3, 2, false, true, false, true, true, true, true},
{ DATA_TYPE_FP16, 0, 64, 128, 256, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_256_qk_tanh_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_256_qk_tanh_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_256_causal_qk_tanh_sm89_kernel_nl_tiled", 81920, 128, 64, 1, 2, false, true, false, true, true, true, true},
{ DATA_TYPE_FP16, 0, 64, 128, 256, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_256_qk_tanh_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_256_qk_tanh_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_256_sliding_window_causal_qk_tanh_sm89_kernel_nl_tiled", 81920, 128, 64, 2, 2, false, true, false, true, true, true, true},
{ DATA_TYPE_FP16, 0, 64, 128, 256, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_256_qk_tanh_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_256_qk_tanh_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_256_custom_mask_qk_tanh_sm89_kernel_nl_tiled", 81920, 128, 64, 3, 2, false, true, false, true, true, true, true},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_128_qk_tanh_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_128_qk_tanh_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_128_qk_tanh_sm89_kernel_nl", 32768, 128, 64, 0, 2, false, true, false, true, true, false, true},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_128_qk_tanh_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_128_qk_tanh_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_128_causal_qk_tanh_sm89_kernel_nl", 32768, 128, 64, 1, 2, false, true, false, true, true, false, true},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_128_qk_tanh_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_128_qk_tanh_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_128_sliding_window_causal_qk_tanh_sm89_kernel_nl", 32768, 128, 64, 2, 2, false, true, false, true, true, false, true},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_128_qk_tanh_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_128_qk_tanh_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_128_custom_mask_qk_tanh_sm89_kernel_nl", 32768, 128, 64, 3, 2, false, true, false, true, true, false, true},
{ DATA_TYPE_FP16, 0, 64, 16, 256, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_256_qk_tanh_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_256_qk_tanh_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_256_causal_qk_tanh_sm89_kernel_nl", 49152, 128, 64, 1, 2, false, true, false, true, true, false, true},
{ DATA_TYPE_FP16, 0, 64, 16, 256, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_256_qk_tanh_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_256_qk_tanh_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_256_sliding_window_causal_qk_tanh_sm89_kernel_nl", 49152, 128, 64, 2, 2, false, true, false, true, true, false, true},
{ DATA_TYPE_FP16, 0, 64, 16, 256, kSM_89,  cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_256_qk_tanh_sm89_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_256_qk_tanh_sm89_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_256_custom_mask_qk_tanh_sm89_kernel_nl", 49152, 128, 64, 3, 2, false, true, false, true, true, false, true},
#endif

#ifndef EXCLUDE_SM_80
{ DATA_TYPE_FP16, 0, 128, 128, 16, kSM_80,  cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_16_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_16_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_qkv_16_causal_sm80_kernel_nl_tiled", 16384, 128, 128, 1, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 16, kSM_80,  cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_16_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_16_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_qkv_16_sliding_window_causal_sm80_kernel_nl_tiled", 16384, 128, 128, 2, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 16, kSM_80,  cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_16_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_16_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_qkv_16_custom_mask_sm80_kernel_nl_tiled", 16384, 128, 128, 3, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 32, kSM_80,  cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_32_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_32_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_qkv_32_sm80_kernel_nl_tiled", 32768, 128, 128, 0, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 32, kSM_80,  cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_32_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_32_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_qkv_32_causal_sm80_kernel_nl_tiled", 32768, 128, 128, 1, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 32, kSM_80,  cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_32_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_32_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_qkv_32_sliding_window_causal_sm80_kernel_nl_tiled", 32768, 128, 128, 2, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 32, kSM_80,  cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_32_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_32_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_qkv_32_custom_mask_sm80_kernel_nl_tiled", 32768, 128, 128, 3, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 40, kSM_80,  cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_40_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_40_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_qkv_40_causal_sm80_kernel_nl_tiled", 65536, 128, 128, 1, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 40, kSM_80,  cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_40_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_40_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_qkv_40_sliding_window_causal_sm80_kernel_nl_tiled", 65536, 128, 128, 2, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 40, kSM_80,  cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_40_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_40_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_qkv_40_custom_mask_sm80_kernel_nl_tiled", 65536, 128, 128, 3, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 48, kSM_80,  cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_48_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_48_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_qkv_48_causal_sm80_kernel_nl_tiled", 65536, 128, 128, 1, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 48, kSM_80,  cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_48_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_48_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_qkv_48_sliding_window_causal_sm80_kernel_nl_tiled", 65536, 128, 128, 2, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 48, kSM_80,  cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_48_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_48_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_qkv_48_custom_mask_sm80_kernel_nl_tiled", 65536, 128, 128, 3, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 64, kSM_80,  cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_64_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_64_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_qkv_64_sm80_kernel_nl_tiled", 65536, 128, 128, 0, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 64, kSM_80,  cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_64_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_64_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_qkv_64_causal_sm80_kernel_nl_tiled", 65536, 128, 128, 1, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 64, kSM_80,  cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_64_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_64_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_qkv_64_sliding_window_causal_sm80_kernel_nl_tiled", 65536, 128, 128, 2, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 64, kSM_80,  cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_64_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_64_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_qkv_64_custom_mask_sm80_kernel_nl_tiled", 65536, 128, 128, 3, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 80, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_80_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_80_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_80_causal_sm80_kernel_nl_tiled", 81920, 128, 64, 1, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 80, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_80_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_80_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_80_sliding_window_causal_sm80_kernel_nl_tiled", 81920, 128, 64, 2, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 80, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_80_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_80_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_80_custom_mask_sm80_kernel_nl_tiled", 81920, 128, 64, 3, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 96, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_96_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_96_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_96_causal_sm80_kernel_nl_tiled", 81920, 128, 64, 1, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 96, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_96_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_96_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_96_sliding_window_causal_sm80_kernel_nl_tiled", 81920, 128, 64, 2, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 96, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_96_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_96_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_96_custom_mask_sm80_kernel_nl_tiled", 81920, 128, 64, 3, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 104, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_104_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_104_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_104_causal_sm80_kernel_nl_tiled", 81920, 128, 64, 1, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 104, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_104_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_104_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_104_sliding_window_causal_sm80_kernel_nl_tiled", 81920, 128, 64, 2, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 104, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_104_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_104_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_104_custom_mask_sm80_kernel_nl_tiled", 81920, 128, 64, 3, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_128_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_128_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_128_causal_sm80_kernel_nl_tiled", 81920, 128, 64, 1, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_128_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_128_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_128_sliding_window_causal_sm80_kernel_nl_tiled", 81920, 128, 64, 2, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_128_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_128_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_128_custom_mask_sm80_kernel_nl_tiled", 81920, 128, 64, 3, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 160, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_160_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_160_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_160_causal_sm80_kernel_nl_tiled", 81920, 128, 64, 1, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 160, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_160_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_160_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_160_sliding_window_causal_sm80_kernel_nl_tiled", 81920, 128, 64, 2, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 160, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_160_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_160_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_160_custom_mask_sm80_kernel_nl_tiled", 81920, 128, 64, 3, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 192, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_192_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_192_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_192_causal_sm80_kernel_nl_tiled", 81920, 128, 64, 1, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 192, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_192_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_192_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_192_sliding_window_causal_sm80_kernel_nl_tiled", 81920, 128, 64, 2, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 192, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_192_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_192_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_192_custom_mask_sm80_kernel_nl_tiled", 81920, 128, 64, 3, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 256, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_256_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_256_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_256_causal_sm80_kernel_nl_tiled", 81920, 128, 64, 1, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 256, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_256_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_256_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_256_sliding_window_causal_sm80_kernel_nl_tiled", 81920, 128, 64, 2, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 256, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_256_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_256_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_256_custom_mask_sm80_kernel_nl_tiled", 81920, 128, 64, 3, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 64, 16, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_16_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_16_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_64_S_qkv_16_causal_sm80_kernel_nl", 6144, 128, 64, 1, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 16, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_16_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_16_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_64_S_qkv_16_sliding_window_causal_sm80_kernel_nl", 6144, 128, 64, 2, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 16, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_16_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_16_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_64_S_qkv_16_custom_mask_sm80_kernel_nl", 6144, 128, 64, 3, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 32, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_32_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_32_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_64_S_qkv_32_sm80_kernel_nl", 12288, 128, 64, 0, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 32, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_32_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_32_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_64_S_qkv_32_causal_sm80_kernel_nl", 12288, 128, 64, 1, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 32, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_32_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_32_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_64_S_qkv_32_sliding_window_causal_sm80_kernel_nl", 12288, 128, 64, 2, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 32, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_32_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_32_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_64_S_qkv_32_custom_mask_sm80_kernel_nl", 12288, 128, 64, 3, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 40, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_40_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_40_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_40_causal_sm80_kernel_nl", 16384, 128, 64, 1, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 40, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_40_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_40_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_40_sliding_window_causal_sm80_kernel_nl", 16384, 128, 64, 2, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 40, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_40_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_40_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_40_custom_mask_sm80_kernel_nl", 16384, 128, 64, 3, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 48, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_48_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_48_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_48_causal_sm80_kernel_nl", 16384, 128, 64, 1, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 48, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_48_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_48_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_48_sliding_window_causal_sm80_kernel_nl", 16384, 128, 64, 2, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 48, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_48_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_48_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_48_custom_mask_sm80_kernel_nl", 16384, 128, 64, 3, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 64, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_64_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_64_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_64_sm80_kernel_nl", 16384, 128, 64, 0, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 64, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_64_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_64_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_64_causal_sm80_kernel_nl", 16384, 128, 64, 1, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 64, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_64_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_64_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_64_sliding_window_causal_sm80_kernel_nl", 16384, 128, 64, 2, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 64, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_64_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_64_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_64_custom_mask_sm80_kernel_nl", 16384, 128, 64, 3, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 80, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_80_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_80_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_80_causal_sm80_kernel_nl", 32768, 128, 64, 1, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 80, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_80_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_80_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_80_sliding_window_causal_sm80_kernel_nl", 32768, 128, 64, 2, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 80, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_80_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_80_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_80_custom_mask_sm80_kernel_nl", 32768, 128, 64, 3, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 96, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_96_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_96_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_96_causal_sm80_kernel_nl", 32768, 128, 64, 1, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 96, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_96_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_96_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_96_sliding_window_causal_sm80_kernel_nl", 32768, 128, 64, 2, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 96, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_96_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_96_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_96_custom_mask_sm80_kernel_nl", 32768, 128, 64, 3, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 104, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_104_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_104_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_104_causal_sm80_kernel_nl", 32768, 128, 64, 1, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 104, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_104_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_104_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_104_sliding_window_causal_sm80_kernel_nl", 32768, 128, 64, 2, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 104, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_104_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_104_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_104_custom_mask_sm80_kernel_nl", 32768, 128, 64, 3, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_128_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_128_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_128_causal_sm80_kernel_nl", 32768, 128, 64, 1, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_128_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_128_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_128_sliding_window_causal_sm80_kernel_nl", 32768, 128, 64, 2, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_128_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_128_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_128_custom_mask_sm80_kernel_nl", 32768, 128, 64, 3, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 160, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_160_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_160_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_16_S_qkv_160_causal_sm80_kernel_nl", 49152, 128, 64, 1, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 160, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_160_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_160_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_16_S_qkv_160_sliding_window_causal_sm80_kernel_nl", 49152, 128, 64, 2, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 160, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_160_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_160_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_16_S_qkv_160_custom_mask_sm80_kernel_nl", 49152, 128, 64, 3, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 192, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_192_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_192_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_16_S_qkv_192_causal_sm80_kernel_nl", 49152, 128, 64, 1, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 192, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_192_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_192_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_16_S_qkv_192_sliding_window_causal_sm80_kernel_nl", 49152, 128, 64, 2, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 192, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_192_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_192_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_16_S_qkv_192_custom_mask_sm80_kernel_nl", 49152, 128, 64, 3, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 256, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_256_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_256_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_16_S_qkv_256_causal_sm80_kernel_nl", 49152, 128, 64, 1, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 256, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_256_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_256_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_16_S_qkv_256_sliding_window_causal_sm80_kernel_nl", 49152, 128, 64, 2, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 256, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_256_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_256_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_16_S_qkv_256_custom_mask_sm80_kernel_nl", 49152, 128, 64, 3, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_128_qk_tanh_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_128_qk_tanh_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_128_causal_qk_tanh_sm80_kernel_nl_tiled", 81920, 128, 64, 1, 0, false, true, false, false, true, true, true},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_128_qk_tanh_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_128_qk_tanh_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_128_sliding_window_causal_qk_tanh_sm80_kernel_nl_tiled", 81920, 128, 64, 2, 0, false, true, false, false, true, true, true},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_128_qk_tanh_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_128_qk_tanh_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_128_custom_mask_qk_tanh_sm80_kernel_nl_tiled", 81920, 128, 64, 3, 0, false, true, false, false, true, true, true},
{ DATA_TYPE_FP16, 0, 64, 128, 256, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_256_qk_tanh_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_256_qk_tanh_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_256_causal_qk_tanh_sm80_kernel_nl_tiled", 81920, 128, 64, 1, 0, false, true, false, false, true, true, true},
{ DATA_TYPE_FP16, 0, 64, 128, 256, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_256_qk_tanh_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_256_qk_tanh_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_256_sliding_window_causal_qk_tanh_sm80_kernel_nl_tiled", 81920, 128, 64, 2, 0, false, true, false, false, true, true, true},
{ DATA_TYPE_FP16, 0, 64, 128, 256, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_256_qk_tanh_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_256_qk_tanh_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_256_custom_mask_qk_tanh_sm80_kernel_nl_tiled", 81920, 128, 64, 3, 0, false, true, false, false, true, true, true},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_128_qk_tanh_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_128_qk_tanh_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_128_causal_qk_tanh_sm80_kernel_nl", 32768, 128, 64, 1, 0, false, true, false, false, true, false, true},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_128_qk_tanh_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_128_qk_tanh_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_128_sliding_window_causal_qk_tanh_sm80_kernel_nl", 32768, 128, 64, 2, 0, false, true, false, false, true, false, true},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_128_qk_tanh_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_128_qk_tanh_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_128_custom_mask_qk_tanh_sm80_kernel_nl", 32768, 128, 64, 3, 0, false, true, false, false, true, false, true},
{ DATA_TYPE_FP16, 0, 64, 16, 256, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_256_qk_tanh_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_256_qk_tanh_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_16_S_qkv_256_causal_qk_tanh_sm80_kernel_nl", 49152, 128, 64, 1, 0, false, true, false, false, true, false, true},
{ DATA_TYPE_FP16, 0, 64, 16, 256, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_256_qk_tanh_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_256_qk_tanh_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_16_S_qkv_256_sliding_window_causal_qk_tanh_sm80_kernel_nl", 49152, 128, 64, 2, 0, false, true, false, false, true, false, true},
{ DATA_TYPE_FP16, 0, 64, 16, 256, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_256_qk_tanh_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_256_qk_tanh_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_16_S_qkv_256_custom_mask_qk_tanh_sm80_kernel_nl", 49152, 128, 64, 3, 0, false, true, false, false, true, false, true},
{ DATA_TYPE_FP16, 0, 128, 128, 32, kSM_80,  cubin_fmha_v2_flash_attention_fp16_128_128_S_q_kv_32_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_q_kv_32_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_q_kv_32_sm80_kernel_nl_tiled", 32768, 128, 128, 0, 1, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 32, kSM_80,  cubin_fmha_v2_flash_attention_fp16_128_128_S_q_kv_32_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_q_kv_32_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_q_kv_32_custom_mask_sm80_kernel_nl_tiled", 32768, 128, 128, 3, 1, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 64, kSM_80,  cubin_fmha_v2_flash_attention_fp16_128_128_S_q_kv_64_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_q_kv_64_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_q_kv_64_sm80_kernel_nl_tiled", 65536, 128, 128, 0, 1, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 64, kSM_80,  cubin_fmha_v2_flash_attention_fp16_128_128_S_q_kv_64_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_q_kv_64_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_q_kv_64_custom_mask_sm80_kernel_nl_tiled", 65536, 128, 128, 3, 1, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_kv_128_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_kv_128_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_kv_128_sm80_kernel_nl_tiled", 81920, 128, 64, 0, 1, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_kv_128_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_kv_128_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_kv_128_custom_mask_sm80_kernel_nl_tiled", 81920, 128, 64, 3, 1, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 64, 32, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_64_S_q_kv_32_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_64_S_q_kv_32_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_64_S_q_kv_32_sm80_kernel_nl", 12288, 128, 64, 0, 1, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 32, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_64_S_q_kv_32_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_64_S_q_kv_32_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_64_S_q_kv_32_custom_mask_sm80_kernel_nl", 12288, 128, 64, 3, 1, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 64, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_kv_64_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_kv_64_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_kv_64_sm80_kernel_nl", 16384, 128, 64, 0, 1, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 64, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_kv_64_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_kv_64_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_kv_64_custom_mask_sm80_kernel_nl", 16384, 128, 64, 3, 1, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_kv_128_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_kv_128_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_kv_128_sm80_kernel_nl", 32768, 128, 64, 0, 1, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_kv_128_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_kv_128_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_kv_128_custom_mask_sm80_kernel_nl", 32768, 128, 64, 3, 1, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_kv_128_qk_tanh_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_kv_128_qk_tanh_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_kv_128_qk_tanh_sm80_kernel_nl_tiled", 81920, 128, 64, 0, 1, false, true, false, false, true, true, true},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_kv_128_qk_tanh_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_kv_128_qk_tanh_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_kv_128_custom_mask_qk_tanh_sm80_kernel_nl_tiled", 81920, 128, 64, 3, 1, false, true, false, false, true, true, true},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_kv_128_qk_tanh_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_kv_128_qk_tanh_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_kv_128_qk_tanh_sm80_kernel_nl", 32768, 128, 64, 0, 1, false, true, false, false, true, false, true},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_kv_128_qk_tanh_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_kv_128_qk_tanh_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_kv_128_custom_mask_qk_tanh_sm80_kernel_nl", 32768, 128, 64, 3, 1, false, true, false, false, true, false, true},
{ DATA_TYPE_FP16, 0, 128, 128, 16, kSM_80,  cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_16_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_16_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_16_causal_sm80_kernel_nl_tiled", 16384, 128, 128, 1, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 16, kSM_80,  cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_16_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_16_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_16_sliding_window_causal_sm80_kernel_nl_tiled", 16384, 128, 128, 2, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 16, kSM_80,  cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_16_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_16_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_16_custom_mask_sm80_kernel_nl_tiled", 16384, 128, 128, 3, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 32, kSM_80,  cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_32_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_32_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_32_sm80_kernel_nl_tiled", 32768, 128, 128, 0, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 32, kSM_80,  cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_32_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_32_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_32_causal_sm80_kernel_nl_tiled", 32768, 128, 128, 1, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 32, kSM_80,  cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_32_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_32_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_32_sliding_window_causal_sm80_kernel_nl_tiled", 32768, 128, 128, 2, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 32, kSM_80,  cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_32_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_32_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_32_custom_mask_sm80_kernel_nl_tiled", 32768, 128, 128, 3, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 40, kSM_80,  cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_40_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_40_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_40_causal_sm80_kernel_nl_tiled", 65536, 128, 128, 1, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 40, kSM_80,  cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_40_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_40_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_40_sliding_window_causal_sm80_kernel_nl_tiled", 65536, 128, 128, 2, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 40, kSM_80,  cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_40_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_40_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_40_custom_mask_sm80_kernel_nl_tiled", 65536, 128, 128, 3, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 48, kSM_80,  cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_48_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_48_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_48_causal_sm80_kernel_nl_tiled", 65536, 128, 128, 1, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 48, kSM_80,  cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_48_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_48_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_48_sliding_window_causal_sm80_kernel_nl_tiled", 65536, 128, 128, 2, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 48, kSM_80,  cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_48_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_48_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_48_custom_mask_sm80_kernel_nl_tiled", 65536, 128, 128, 3, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 64, kSM_80,  cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_64_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_64_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_64_sm80_kernel_nl_tiled", 65536, 128, 128, 0, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 64, kSM_80,  cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_64_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_64_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_64_causal_sm80_kernel_nl_tiled", 65536, 128, 128, 1, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 64, kSM_80,  cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_64_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_64_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_64_sliding_window_causal_sm80_kernel_nl_tiled", 65536, 128, 128, 2, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 64, kSM_80,  cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_64_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_64_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_64_custom_mask_sm80_kernel_nl_tiled", 65536, 128, 128, 3, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 80, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_80_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_80_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_80_causal_sm80_kernel_nl_tiled", 81920, 128, 64, 1, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 80, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_80_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_80_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_80_sliding_window_causal_sm80_kernel_nl_tiled", 81920, 128, 64, 2, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 80, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_80_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_80_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_80_custom_mask_sm80_kernel_nl_tiled", 81920, 128, 64, 3, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 96, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_96_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_96_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_96_causal_sm80_kernel_nl_tiled", 81920, 128, 64, 1, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 96, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_96_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_96_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_96_sliding_window_causal_sm80_kernel_nl_tiled", 81920, 128, 64, 2, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 96, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_96_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_96_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_96_custom_mask_sm80_kernel_nl_tiled", 81920, 128, 64, 3, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 104, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_104_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_104_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_104_causal_sm80_kernel_nl_tiled", 81920, 128, 64, 1, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 104, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_104_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_104_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_104_sliding_window_causal_sm80_kernel_nl_tiled", 81920, 128, 64, 2, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 104, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_104_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_104_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_104_custom_mask_sm80_kernel_nl_tiled", 81920, 128, 64, 3, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_sm80_kernel_nl_tiled", 81920, 128, 64, 0, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_causal_sm80_kernel_nl_tiled", 81920, 128, 64, 1, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_sliding_window_causal_sm80_kernel_nl_tiled", 81920, 128, 64, 2, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_custom_mask_sm80_kernel_nl_tiled", 81920, 128, 64, 3, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 160, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_160_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_160_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_160_causal_sm80_kernel_nl_tiled", 81920, 128, 64, 1, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 160, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_160_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_160_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_160_sliding_window_causal_sm80_kernel_nl_tiled", 81920, 128, 64, 2, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 160, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_160_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_160_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_160_custom_mask_sm80_kernel_nl_tiled", 81920, 128, 64, 3, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 192, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_192_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_192_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_192_causal_sm80_kernel_nl_tiled", 81920, 128, 64, 1, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 192, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_192_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_192_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_192_sliding_window_causal_sm80_kernel_nl_tiled", 81920, 128, 64, 2, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 192, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_192_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_192_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_192_custom_mask_sm80_kernel_nl_tiled", 81920, 128, 64, 3, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 256, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_256_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_256_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_256_causal_sm80_kernel_nl_tiled", 81920, 128, 64, 1, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 256, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_256_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_256_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_256_sliding_window_causal_sm80_kernel_nl_tiled", 81920, 128, 64, 2, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 256, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_256_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_256_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_256_custom_mask_sm80_kernel_nl_tiled", 81920, 128, 64, 3, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 64, 16, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_16_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_16_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_16_causal_sm80_kernel_nl", 6144, 128, 64, 1, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 16, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_16_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_16_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_16_sliding_window_causal_sm80_kernel_nl", 6144, 128, 64, 2, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 16, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_16_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_16_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_16_custom_mask_sm80_kernel_nl", 6144, 128, 64, 3, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 32, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_32_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_32_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_32_sm80_kernel_nl", 12288, 128, 64, 0, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 32, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_32_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_32_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_32_causal_sm80_kernel_nl", 12288, 128, 64, 1, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 32, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_32_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_32_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_32_sliding_window_causal_sm80_kernel_nl", 12288, 128, 64, 2, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 32, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_32_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_32_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_32_custom_mask_sm80_kernel_nl", 12288, 128, 64, 3, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 40, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_40_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_40_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_40_causal_sm80_kernel_nl", 16384, 128, 64, 1, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 40, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_40_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_40_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_40_sliding_window_causal_sm80_kernel_nl", 16384, 128, 64, 2, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 40, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_40_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_40_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_40_custom_mask_sm80_kernel_nl", 16384, 128, 64, 3, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 48, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_48_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_48_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_48_causal_sm80_kernel_nl", 16384, 128, 64, 1, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 48, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_48_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_48_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_48_sliding_window_causal_sm80_kernel_nl", 16384, 128, 64, 2, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 48, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_48_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_48_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_48_custom_mask_sm80_kernel_nl", 16384, 128, 64, 3, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 64, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_64_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_64_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_64_sm80_kernel_nl", 16384, 128, 64, 0, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 64, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_64_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_64_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_64_causal_sm80_kernel_nl", 16384, 128, 64, 1, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 64, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_64_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_64_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_64_sliding_window_causal_sm80_kernel_nl", 16384, 128, 64, 2, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 64, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_64_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_64_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_64_custom_mask_sm80_kernel_nl", 16384, 128, 64, 3, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 80, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_80_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_80_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_80_causal_sm80_kernel_nl", 32768, 128, 64, 1, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 80, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_80_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_80_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_80_sliding_window_causal_sm80_kernel_nl", 32768, 128, 64, 2, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 80, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_80_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_80_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_80_custom_mask_sm80_kernel_nl", 32768, 128, 64, 3, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 96, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_96_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_96_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_96_causal_sm80_kernel_nl", 32768, 128, 64, 1, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 96, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_96_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_96_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_96_sliding_window_causal_sm80_kernel_nl", 32768, 128, 64, 2, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 96, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_96_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_96_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_96_custom_mask_sm80_kernel_nl", 32768, 128, 64, 3, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 104, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_104_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_104_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_104_causal_sm80_kernel_nl", 32768, 128, 64, 1, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 104, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_104_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_104_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_104_sliding_window_causal_sm80_kernel_nl", 32768, 128, 64, 2, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 104, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_104_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_104_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_104_custom_mask_sm80_kernel_nl", 32768, 128, 64, 3, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_128_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_128_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_128_sm80_kernel_nl", 32768, 128, 64, 0, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_128_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_128_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_128_causal_sm80_kernel_nl", 32768, 128, 64, 1, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_128_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_128_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_128_sliding_window_causal_sm80_kernel_nl", 32768, 128, 64, 2, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_128_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_128_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_128_custom_mask_sm80_kernel_nl", 32768, 128, 64, 3, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 160, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_160_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_160_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_160_causal_sm80_kernel_nl", 49152, 128, 64, 1, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 160, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_160_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_160_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_160_sliding_window_causal_sm80_kernel_nl", 49152, 128, 64, 2, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 160, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_160_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_160_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_160_custom_mask_sm80_kernel_nl", 49152, 128, 64, 3, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 192, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_192_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_192_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_192_causal_sm80_kernel_nl", 49152, 128, 64, 1, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 192, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_192_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_192_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_192_sliding_window_causal_sm80_kernel_nl", 49152, 128, 64, 2, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 192, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_192_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_192_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_192_custom_mask_sm80_kernel_nl", 49152, 128, 64, 3, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 256, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_256_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_256_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_256_causal_sm80_kernel_nl", 49152, 128, 64, 1, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 256, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_256_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_256_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_256_sliding_window_causal_sm80_kernel_nl", 49152, 128, 64, 2, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 256, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_256_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_256_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_256_custom_mask_sm80_kernel_nl", 49152, 128, 64, 3, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_qk_tanh_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_qk_tanh_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_qk_tanh_sm80_kernel_nl_tiled", 81920, 128, 64, 0, 2, false, true, false, false, true, true, true},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_qk_tanh_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_qk_tanh_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_causal_qk_tanh_sm80_kernel_nl_tiled", 81920, 128, 64, 1, 2, false, true, false, false, true, true, true},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_qk_tanh_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_qk_tanh_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_sliding_window_causal_qk_tanh_sm80_kernel_nl_tiled", 81920, 128, 64, 2, 2, false, true, false, false, true, true, true},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_qk_tanh_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_qk_tanh_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_custom_mask_qk_tanh_sm80_kernel_nl_tiled", 81920, 128, 64, 3, 2, false, true, false, false, true, true, true},
{ DATA_TYPE_FP16, 0, 64, 128, 256, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_256_qk_tanh_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_256_qk_tanh_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_256_causal_qk_tanh_sm80_kernel_nl_tiled", 81920, 128, 64, 1, 2, false, true, false, false, true, true, true},
{ DATA_TYPE_FP16, 0, 64, 128, 256, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_256_qk_tanh_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_256_qk_tanh_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_256_sliding_window_causal_qk_tanh_sm80_kernel_nl_tiled", 81920, 128, 64, 2, 2, false, true, false, false, true, true, true},
{ DATA_TYPE_FP16, 0, 64, 128, 256, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_256_qk_tanh_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_256_qk_tanh_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_256_custom_mask_qk_tanh_sm80_kernel_nl_tiled", 81920, 128, 64, 3, 2, false, true, false, false, true, true, true},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_128_qk_tanh_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_128_qk_tanh_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_128_qk_tanh_sm80_kernel_nl", 32768, 128, 64, 0, 2, false, true, false, false, true, false, true},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_128_qk_tanh_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_128_qk_tanh_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_128_causal_qk_tanh_sm80_kernel_nl", 32768, 128, 64, 1, 2, false, true, false, false, true, false, true},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_128_qk_tanh_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_128_qk_tanh_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_128_sliding_window_causal_qk_tanh_sm80_kernel_nl", 32768, 128, 64, 2, 2, false, true, false, false, true, false, true},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_128_qk_tanh_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_128_qk_tanh_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_128_custom_mask_qk_tanh_sm80_kernel_nl", 32768, 128, 64, 3, 2, false, true, false, false, true, false, true},
{ DATA_TYPE_FP16, 0, 64, 16, 256, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_256_qk_tanh_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_256_qk_tanh_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_256_causal_qk_tanh_sm80_kernel_nl", 49152, 128, 64, 1, 2, false, true, false, false, true, false, true},
{ DATA_TYPE_FP16, 0, 64, 16, 256, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_256_qk_tanh_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_256_qk_tanh_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_256_sliding_window_causal_qk_tanh_sm80_kernel_nl", 49152, 128, 64, 2, 2, false, true, false, false, true, false, true},
{ DATA_TYPE_FP16, 0, 64, 16, 256, kSM_80,  cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_256_qk_tanh_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_256_qk_tanh_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_256_custom_mask_qk_tanh_sm80_kernel_nl", 49152, 128, 64, 3, 2, false, true, false, false, true, false, true},
{ DATA_TYPE_BF16, 0, 128, 128, 16, kSM_80,  cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_16_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_16_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_qkv_16_causal_sm80_kernel_nl_tiled", 16384, 128, 128, 1, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 16, kSM_80,  cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_16_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_16_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_qkv_16_sliding_window_causal_sm80_kernel_nl_tiled", 16384, 128, 128, 2, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 16, kSM_80,  cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_16_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_16_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_qkv_16_custom_mask_sm80_kernel_nl_tiled", 16384, 128, 128, 3, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 32, kSM_80,  cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_32_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_32_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_qkv_32_sm80_kernel_nl_tiled", 32768, 128, 128, 0, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 32, kSM_80,  cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_32_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_32_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_qkv_32_causal_sm80_kernel_nl_tiled", 32768, 128, 128, 1, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 32, kSM_80,  cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_32_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_32_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_qkv_32_sliding_window_causal_sm80_kernel_nl_tiled", 32768, 128, 128, 2, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 32, kSM_80,  cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_32_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_32_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_qkv_32_custom_mask_sm80_kernel_nl_tiled", 32768, 128, 128, 3, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 40, kSM_80,  cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_40_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_40_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_qkv_40_causal_sm80_kernel_nl_tiled", 65536, 128, 128, 1, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 40, kSM_80,  cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_40_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_40_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_qkv_40_sliding_window_causal_sm80_kernel_nl_tiled", 65536, 128, 128, 2, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 40, kSM_80,  cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_40_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_40_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_qkv_40_custom_mask_sm80_kernel_nl_tiled", 65536, 128, 128, 3, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 48, kSM_80,  cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_48_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_48_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_qkv_48_causal_sm80_kernel_nl_tiled", 65536, 128, 128, 1, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 48, kSM_80,  cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_48_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_48_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_qkv_48_sliding_window_causal_sm80_kernel_nl_tiled", 65536, 128, 128, 2, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 48, kSM_80,  cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_48_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_48_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_qkv_48_custom_mask_sm80_kernel_nl_tiled", 65536, 128, 128, 3, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 64, kSM_80,  cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_64_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_64_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_qkv_64_sm80_kernel_nl_tiled", 65536, 128, 128, 0, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 64, kSM_80,  cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_64_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_64_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_qkv_64_causal_sm80_kernel_nl_tiled", 65536, 128, 128, 1, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 64, kSM_80,  cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_64_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_64_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_qkv_64_sliding_window_causal_sm80_kernel_nl_tiled", 65536, 128, 128, 2, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 64, kSM_80,  cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_64_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_64_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_qkv_64_custom_mask_sm80_kernel_nl_tiled", 65536, 128, 128, 3, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 80, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_80_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_80_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_80_causal_sm80_kernel_nl_tiled", 81920, 128, 64, 1, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 80, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_80_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_80_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_80_sliding_window_causal_sm80_kernel_nl_tiled", 81920, 128, 64, 2, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 80, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_80_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_80_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_80_custom_mask_sm80_kernel_nl_tiled", 81920, 128, 64, 3, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 96, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_96_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_96_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_96_causal_sm80_kernel_nl_tiled", 81920, 128, 64, 1, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 96, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_96_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_96_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_96_sliding_window_causal_sm80_kernel_nl_tiled", 81920, 128, 64, 2, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 96, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_96_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_96_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_96_custom_mask_sm80_kernel_nl_tiled", 81920, 128, 64, 3, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 104, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_104_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_104_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_104_causal_sm80_kernel_nl_tiled", 81920, 128, 64, 1, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 104, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_104_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_104_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_104_sliding_window_causal_sm80_kernel_nl_tiled", 81920, 128, 64, 2, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 104, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_104_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_104_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_104_custom_mask_sm80_kernel_nl_tiled", 81920, 128, 64, 3, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 128, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_128_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_128_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_128_causal_sm80_kernel_nl_tiled", 81920, 128, 64, 1, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 128, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_128_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_128_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_128_sliding_window_causal_sm80_kernel_nl_tiled", 81920, 128, 64, 2, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 128, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_128_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_128_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_128_custom_mask_sm80_kernel_nl_tiled", 81920, 128, 64, 3, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 160, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_160_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_160_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_160_causal_sm80_kernel_nl_tiled", 81920, 128, 64, 1, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 160, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_160_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_160_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_160_sliding_window_causal_sm80_kernel_nl_tiled", 81920, 128, 64, 2, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 160, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_160_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_160_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_160_custom_mask_sm80_kernel_nl_tiled", 81920, 128, 64, 3, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 192, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_192_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_192_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_192_causal_sm80_kernel_nl_tiled", 81920, 128, 64, 1, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 192, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_192_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_192_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_192_sliding_window_causal_sm80_kernel_nl_tiled", 81920, 128, 64, 2, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 192, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_192_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_192_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_192_custom_mask_sm80_kernel_nl_tiled", 81920, 128, 64, 3, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 256, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_256_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_256_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_256_causal_sm80_kernel_nl_tiled", 81920, 128, 64, 1, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 256, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_256_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_256_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_256_sliding_window_causal_sm80_kernel_nl_tiled", 81920, 128, 64, 2, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 256, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_256_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_256_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_256_custom_mask_sm80_kernel_nl_tiled", 81920, 128, 64, 3, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 64, 16, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_16_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_16_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_64_S_qkv_16_causal_sm80_kernel_nl", 6144, 128, 64, 1, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 64, 16, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_16_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_16_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_64_S_qkv_16_sliding_window_causal_sm80_kernel_nl", 6144, 128, 64, 2, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 64, 16, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_16_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_16_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_64_S_qkv_16_custom_mask_sm80_kernel_nl", 6144, 128, 64, 3, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 64, 32, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_32_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_32_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_64_S_qkv_32_sm80_kernel_nl", 12288, 128, 64, 0, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 64, 32, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_32_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_32_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_64_S_qkv_32_causal_sm80_kernel_nl", 12288, 128, 64, 1, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 64, 32, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_32_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_32_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_64_S_qkv_32_sliding_window_causal_sm80_kernel_nl", 12288, 128, 64, 2, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 64, 32, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_32_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_64_S_qkv_32_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_64_S_qkv_32_custom_mask_sm80_kernel_nl", 12288, 128, 64, 3, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 40, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_40_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_40_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_qkv_40_causal_sm80_kernel_nl", 16384, 128, 64, 1, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 40, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_40_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_40_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_qkv_40_sliding_window_causal_sm80_kernel_nl", 16384, 128, 64, 2, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 40, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_40_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_40_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_qkv_40_custom_mask_sm80_kernel_nl", 16384, 128, 64, 3, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 48, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_48_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_48_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_qkv_48_causal_sm80_kernel_nl", 16384, 128, 64, 1, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 48, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_48_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_48_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_qkv_48_sliding_window_causal_sm80_kernel_nl", 16384, 128, 64, 2, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 48, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_48_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_48_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_qkv_48_custom_mask_sm80_kernel_nl", 16384, 128, 64, 3, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 64, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_64_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_64_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_qkv_64_sm80_kernel_nl", 16384, 128, 64, 0, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 64, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_64_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_64_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_qkv_64_causal_sm80_kernel_nl", 16384, 128, 64, 1, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 64, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_64_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_64_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_qkv_64_sliding_window_causal_sm80_kernel_nl", 16384, 128, 64, 2, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 64, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_64_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_64_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_qkv_64_custom_mask_sm80_kernel_nl", 16384, 128, 64, 3, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 80, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_80_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_80_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_qkv_80_causal_sm80_kernel_nl", 32768, 128, 64, 1, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 80, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_80_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_80_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_qkv_80_sliding_window_causal_sm80_kernel_nl", 32768, 128, 64, 2, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 80, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_80_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_80_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_qkv_80_custom_mask_sm80_kernel_nl", 32768, 128, 64, 3, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 96, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_96_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_96_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_qkv_96_causal_sm80_kernel_nl", 32768, 128, 64, 1, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 96, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_96_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_96_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_qkv_96_sliding_window_causal_sm80_kernel_nl", 32768, 128, 64, 2, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 96, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_96_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_96_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_qkv_96_custom_mask_sm80_kernel_nl", 32768, 128, 64, 3, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 104, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_104_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_104_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_qkv_104_causal_sm80_kernel_nl", 32768, 128, 64, 1, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 104, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_104_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_104_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_qkv_104_sliding_window_causal_sm80_kernel_nl", 32768, 128, 64, 2, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 104, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_104_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_104_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_qkv_104_custom_mask_sm80_kernel_nl", 32768, 128, 64, 3, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 128, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_128_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_128_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_qkv_128_causal_sm80_kernel_nl", 32768, 128, 64, 1, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 128, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_128_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_128_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_qkv_128_sliding_window_causal_sm80_kernel_nl", 32768, 128, 64, 2, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 128, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_128_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_128_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_qkv_128_custom_mask_sm80_kernel_nl", 32768, 128, 64, 3, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 16, 160, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_160_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_160_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_16_S_qkv_160_causal_sm80_kernel_nl", 49152, 128, 64, 1, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 16, 160, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_160_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_160_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_16_S_qkv_160_sliding_window_causal_sm80_kernel_nl", 49152, 128, 64, 2, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 16, 160, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_160_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_160_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_16_S_qkv_160_custom_mask_sm80_kernel_nl", 49152, 128, 64, 3, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 16, 192, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_192_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_192_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_16_S_qkv_192_causal_sm80_kernel_nl", 49152, 128, 64, 1, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 16, 192, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_192_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_192_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_16_S_qkv_192_sliding_window_causal_sm80_kernel_nl", 49152, 128, 64, 2, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 16, 192, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_192_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_192_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_16_S_qkv_192_custom_mask_sm80_kernel_nl", 49152, 128, 64, 3, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 16, 256, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_256_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_256_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_16_S_qkv_256_causal_sm80_kernel_nl", 49152, 128, 64, 1, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 16, 256, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_256_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_256_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_16_S_qkv_256_sliding_window_causal_sm80_kernel_nl", 49152, 128, 64, 2, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 16, 256, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_256_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_256_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_16_S_qkv_256_custom_mask_sm80_kernel_nl", 49152, 128, 64, 3, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 128, 128, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_128_qk_tanh_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_128_qk_tanh_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_128_causal_qk_tanh_sm80_kernel_nl_tiled", 81920, 128, 64, 1, 0, false, true, false, true, true, true, true},
{ DATA_TYPE_BF16, 0, 64, 128, 128, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_128_qk_tanh_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_128_qk_tanh_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_128_sliding_window_causal_qk_tanh_sm80_kernel_nl_tiled", 81920, 128, 64, 2, 0, false, true, false, true, true, true, true},
{ DATA_TYPE_BF16, 0, 64, 128, 128, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_128_qk_tanh_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_128_qk_tanh_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_128_custom_mask_qk_tanh_sm80_kernel_nl_tiled", 81920, 128, 64, 3, 0, false, true, false, true, true, true, true},
{ DATA_TYPE_BF16, 0, 64, 128, 256, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_256_qk_tanh_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_256_qk_tanh_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_256_causal_qk_tanh_sm80_kernel_nl_tiled", 81920, 128, 64, 1, 0, false, true, false, true, true, true, true},
{ DATA_TYPE_BF16, 0, 64, 128, 256, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_256_qk_tanh_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_256_qk_tanh_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_256_sliding_window_causal_qk_tanh_sm80_kernel_nl_tiled", 81920, 128, 64, 2, 0, false, true, false, true, true, true, true},
{ DATA_TYPE_BF16, 0, 64, 128, 256, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_256_qk_tanh_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_256_qk_tanh_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_256_custom_mask_qk_tanh_sm80_kernel_nl_tiled", 81920, 128, 64, 3, 0, false, true, false, true, true, true, true},
{ DATA_TYPE_BF16, 0, 64, 32, 128, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_128_qk_tanh_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_128_qk_tanh_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_qkv_128_causal_qk_tanh_sm80_kernel_nl", 32768, 128, 64, 1, 0, false, true, false, true, true, false, true},
{ DATA_TYPE_BF16, 0, 64, 32, 128, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_128_qk_tanh_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_128_qk_tanh_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_qkv_128_sliding_window_causal_qk_tanh_sm80_kernel_nl", 32768, 128, 64, 2, 0, false, true, false, true, true, false, true},
{ DATA_TYPE_BF16, 0, 64, 32, 128, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_128_qk_tanh_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_qkv_128_qk_tanh_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_qkv_128_custom_mask_qk_tanh_sm80_kernel_nl", 32768, 128, 64, 3, 0, false, true, false, true, true, false, true},
{ DATA_TYPE_BF16, 0, 64, 16, 256, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_256_qk_tanh_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_256_qk_tanh_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_16_S_qkv_256_causal_qk_tanh_sm80_kernel_nl", 49152, 128, 64, 1, 0, false, true, false, true, true, false, true},
{ DATA_TYPE_BF16, 0, 64, 16, 256, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_256_qk_tanh_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_256_qk_tanh_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_16_S_qkv_256_sliding_window_causal_qk_tanh_sm80_kernel_nl", 49152, 128, 64, 2, 0, false, true, false, true, true, false, true},
{ DATA_TYPE_BF16, 0, 64, 16, 256, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_256_qk_tanh_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_16_S_qkv_256_qk_tanh_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_16_S_qkv_256_custom_mask_qk_tanh_sm80_kernel_nl", 49152, 128, 64, 3, 0, false, true, false, true, true, false, true},
{ DATA_TYPE_BF16, 0, 128, 128, 32, kSM_80,  cubin_fmha_v2_flash_attention_bf16_128_128_S_q_kv_32_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_q_kv_32_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_q_kv_32_sm80_kernel_nl_tiled", 32768, 128, 128, 0, 1, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 32, kSM_80,  cubin_fmha_v2_flash_attention_bf16_128_128_S_q_kv_32_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_q_kv_32_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_q_kv_32_custom_mask_sm80_kernel_nl_tiled", 32768, 128, 128, 3, 1, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 64, kSM_80,  cubin_fmha_v2_flash_attention_bf16_128_128_S_q_kv_64_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_q_kv_64_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_q_kv_64_sm80_kernel_nl_tiled", 65536, 128, 128, 0, 1, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 64, kSM_80,  cubin_fmha_v2_flash_attention_bf16_128_128_S_q_kv_64_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_q_kv_64_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_q_kv_64_custom_mask_sm80_kernel_nl_tiled", 65536, 128, 128, 3, 1, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 128, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_128_S_q_kv_128_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_q_kv_128_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_q_kv_128_sm80_kernel_nl_tiled", 81920, 128, 64, 0, 1, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 128, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_128_S_q_kv_128_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_q_kv_128_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_q_kv_128_custom_mask_sm80_kernel_nl_tiled", 81920, 128, 64, 3, 1, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 64, 32, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_64_S_q_kv_32_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_64_S_q_kv_32_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_64_S_q_kv_32_sm80_kernel_nl", 12288, 128, 64, 0, 1, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 64, 32, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_64_S_q_kv_32_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_64_S_q_kv_32_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_64_S_q_kv_32_custom_mask_sm80_kernel_nl", 12288, 128, 64, 3, 1, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 64, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_32_S_q_kv_64_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_q_kv_64_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_q_kv_64_sm80_kernel_nl", 16384, 128, 64, 0, 1, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 64, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_32_S_q_kv_64_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_q_kv_64_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_q_kv_64_custom_mask_sm80_kernel_nl", 16384, 128, 64, 3, 1, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 128, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_32_S_q_kv_128_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_q_kv_128_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_q_kv_128_sm80_kernel_nl", 32768, 128, 64, 0, 1, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 128, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_32_S_q_kv_128_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_q_kv_128_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_q_kv_128_custom_mask_sm80_kernel_nl", 32768, 128, 64, 3, 1, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 128, 128, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_128_S_q_kv_128_qk_tanh_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_q_kv_128_qk_tanh_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_q_kv_128_qk_tanh_sm80_kernel_nl_tiled", 81920, 128, 64, 0, 1, false, true, false, true, true, true, true},
{ DATA_TYPE_BF16, 0, 64, 128, 128, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_128_S_q_kv_128_qk_tanh_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_q_kv_128_qk_tanh_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_q_kv_128_custom_mask_qk_tanh_sm80_kernel_nl_tiled", 81920, 128, 64, 3, 1, false, true, false, true, true, true, true},
{ DATA_TYPE_BF16, 0, 64, 32, 128, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_32_S_q_kv_128_qk_tanh_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_q_kv_128_qk_tanh_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_q_kv_128_qk_tanh_sm80_kernel_nl", 32768, 128, 64, 0, 1, false, true, false, true, true, false, true},
{ DATA_TYPE_BF16, 0, 64, 32, 128, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_32_S_q_kv_128_qk_tanh_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_q_kv_128_qk_tanh_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_q_kv_128_custom_mask_qk_tanh_sm80_kernel_nl", 32768, 128, 64, 3, 1, false, true, false, true, true, false, true},
{ DATA_TYPE_BF16, 0, 128, 128, 16, kSM_80,  cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_16_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_16_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_16_causal_sm80_kernel_nl_tiled", 16384, 128, 128, 1, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 16, kSM_80,  cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_16_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_16_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_16_sliding_window_causal_sm80_kernel_nl_tiled", 16384, 128, 128, 2, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 16, kSM_80,  cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_16_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_16_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_16_custom_mask_sm80_kernel_nl_tiled", 16384, 128, 128, 3, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 32, kSM_80,  cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_32_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_32_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_32_sm80_kernel_nl_tiled", 32768, 128, 128, 0, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 32, kSM_80,  cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_32_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_32_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_32_causal_sm80_kernel_nl_tiled", 32768, 128, 128, 1, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 32, kSM_80,  cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_32_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_32_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_32_sliding_window_causal_sm80_kernel_nl_tiled", 32768, 128, 128, 2, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 32, kSM_80,  cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_32_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_32_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_32_custom_mask_sm80_kernel_nl_tiled", 32768, 128, 128, 3, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 40, kSM_80,  cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_40_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_40_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_40_causal_sm80_kernel_nl_tiled", 65536, 128, 128, 1, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 40, kSM_80,  cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_40_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_40_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_40_sliding_window_causal_sm80_kernel_nl_tiled", 65536, 128, 128, 2, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 40, kSM_80,  cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_40_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_40_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_40_custom_mask_sm80_kernel_nl_tiled", 65536, 128, 128, 3, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 48, kSM_80,  cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_48_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_48_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_48_causal_sm80_kernel_nl_tiled", 65536, 128, 128, 1, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 48, kSM_80,  cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_48_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_48_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_48_sliding_window_causal_sm80_kernel_nl_tiled", 65536, 128, 128, 2, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 48, kSM_80,  cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_48_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_48_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_48_custom_mask_sm80_kernel_nl_tiled", 65536, 128, 128, 3, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 64, kSM_80,  cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_64_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_64_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_64_sm80_kernel_nl_tiled", 65536, 128, 128, 0, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 64, kSM_80,  cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_64_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_64_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_64_causal_sm80_kernel_nl_tiled", 65536, 128, 128, 1, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 64, kSM_80,  cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_64_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_64_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_64_sliding_window_causal_sm80_kernel_nl_tiled", 65536, 128, 128, 2, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 64, kSM_80,  cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_64_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_64_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_q_paged_kv_64_custom_mask_sm80_kernel_nl_tiled", 65536, 128, 128, 3, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 80, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_80_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_80_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_80_causal_sm80_kernel_nl_tiled", 81920, 128, 64, 1, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 80, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_80_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_80_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_80_sliding_window_causal_sm80_kernel_nl_tiled", 81920, 128, 64, 2, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 80, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_80_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_80_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_80_custom_mask_sm80_kernel_nl_tiled", 81920, 128, 64, 3, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 96, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_96_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_96_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_96_causal_sm80_kernel_nl_tiled", 81920, 128, 64, 1, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 96, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_96_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_96_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_96_sliding_window_causal_sm80_kernel_nl_tiled", 81920, 128, 64, 2, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 96, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_96_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_96_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_96_custom_mask_sm80_kernel_nl_tiled", 81920, 128, 64, 3, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 104, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_104_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_104_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_104_causal_sm80_kernel_nl_tiled", 81920, 128, 64, 1, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 104, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_104_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_104_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_104_sliding_window_causal_sm80_kernel_nl_tiled", 81920, 128, 64, 2, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 104, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_104_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_104_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_104_custom_mask_sm80_kernel_nl_tiled", 81920, 128, 64, 3, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 128, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_sm80_kernel_nl_tiled", 81920, 128, 64, 0, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 128, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_causal_sm80_kernel_nl_tiled", 81920, 128, 64, 1, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 128, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_sliding_window_causal_sm80_kernel_nl_tiled", 81920, 128, 64, 2, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 128, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_custom_mask_sm80_kernel_nl_tiled", 81920, 128, 64, 3, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 160, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_160_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_160_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_160_causal_sm80_kernel_nl_tiled", 81920, 128, 64, 1, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 160, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_160_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_160_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_160_sliding_window_causal_sm80_kernel_nl_tiled", 81920, 128, 64, 2, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 160, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_160_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_160_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_160_custom_mask_sm80_kernel_nl_tiled", 81920, 128, 64, 3, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 192, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_192_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_192_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_192_causal_sm80_kernel_nl_tiled", 81920, 128, 64, 1, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 192, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_192_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_192_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_192_sliding_window_causal_sm80_kernel_nl_tiled", 81920, 128, 64, 2, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 192, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_192_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_192_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_192_custom_mask_sm80_kernel_nl_tiled", 81920, 128, 64, 3, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 256, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_256_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_256_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_256_causal_sm80_kernel_nl_tiled", 81920, 128, 64, 1, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 256, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_256_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_256_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_256_sliding_window_causal_sm80_kernel_nl_tiled", 81920, 128, 64, 2, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 256, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_256_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_256_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_256_custom_mask_sm80_kernel_nl_tiled", 81920, 128, 64, 3, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 64, 16, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_16_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_16_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_16_causal_sm80_kernel_nl", 6144, 128, 64, 1, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 64, 16, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_16_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_16_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_16_sliding_window_causal_sm80_kernel_nl", 6144, 128, 64, 2, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 64, 16, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_16_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_16_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_16_custom_mask_sm80_kernel_nl", 6144, 128, 64, 3, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 64, 32, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_32_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_32_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_32_sm80_kernel_nl", 12288, 128, 64, 0, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 64, 32, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_32_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_32_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_32_causal_sm80_kernel_nl", 12288, 128, 64, 1, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 64, 32, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_32_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_32_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_32_sliding_window_causal_sm80_kernel_nl", 12288, 128, 64, 2, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 64, 32, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_32_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_32_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_64_S_q_paged_kv_32_custom_mask_sm80_kernel_nl", 12288, 128, 64, 3, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 40, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_40_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_40_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_40_causal_sm80_kernel_nl", 16384, 128, 64, 1, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 40, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_40_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_40_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_40_sliding_window_causal_sm80_kernel_nl", 16384, 128, 64, 2, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 40, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_40_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_40_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_40_custom_mask_sm80_kernel_nl", 16384, 128, 64, 3, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 48, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_48_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_48_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_48_causal_sm80_kernel_nl", 16384, 128, 64, 1, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 48, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_48_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_48_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_48_sliding_window_causal_sm80_kernel_nl", 16384, 128, 64, 2, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 48, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_48_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_48_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_48_custom_mask_sm80_kernel_nl", 16384, 128, 64, 3, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 64, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_64_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_64_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_64_sm80_kernel_nl", 16384, 128, 64, 0, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 64, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_64_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_64_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_64_causal_sm80_kernel_nl", 16384, 128, 64, 1, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 64, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_64_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_64_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_64_sliding_window_causal_sm80_kernel_nl", 16384, 128, 64, 2, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 64, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_64_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_64_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_64_custom_mask_sm80_kernel_nl", 16384, 128, 64, 3, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 80, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_80_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_80_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_80_causal_sm80_kernel_nl", 32768, 128, 64, 1, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 80, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_80_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_80_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_80_sliding_window_causal_sm80_kernel_nl", 32768, 128, 64, 2, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 80, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_80_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_80_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_80_custom_mask_sm80_kernel_nl", 32768, 128, 64, 3, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 96, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_96_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_96_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_96_causal_sm80_kernel_nl", 32768, 128, 64, 1, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 96, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_96_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_96_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_96_sliding_window_causal_sm80_kernel_nl", 32768, 128, 64, 2, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 96, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_96_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_96_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_96_custom_mask_sm80_kernel_nl", 32768, 128, 64, 3, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 104, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_104_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_104_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_104_causal_sm80_kernel_nl", 32768, 128, 64, 1, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 104, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_104_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_104_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_104_sliding_window_causal_sm80_kernel_nl", 32768, 128, 64, 2, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 104, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_104_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_104_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_104_custom_mask_sm80_kernel_nl", 32768, 128, 64, 3, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 128, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_128_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_128_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_128_sm80_kernel_nl", 32768, 128, 64, 0, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 128, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_128_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_128_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_128_causal_sm80_kernel_nl", 32768, 128, 64, 1, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 128, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_128_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_128_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_128_sliding_window_causal_sm80_kernel_nl", 32768, 128, 64, 2, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 32, 128, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_128_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_128_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_128_custom_mask_sm80_kernel_nl", 32768, 128, 64, 3, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 16, 160, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_160_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_160_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_160_causal_sm80_kernel_nl", 49152, 128, 64, 1, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 16, 160, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_160_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_160_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_160_sliding_window_causal_sm80_kernel_nl", 49152, 128, 64, 2, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 16, 160, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_160_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_160_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_160_custom_mask_sm80_kernel_nl", 49152, 128, 64, 3, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 16, 192, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_192_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_192_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_192_causal_sm80_kernel_nl", 49152, 128, 64, 1, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 16, 192, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_192_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_192_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_192_sliding_window_causal_sm80_kernel_nl", 49152, 128, 64, 2, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 16, 192, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_192_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_192_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_192_custom_mask_sm80_kernel_nl", 49152, 128, 64, 3, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 16, 256, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_256_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_256_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_256_causal_sm80_kernel_nl", 49152, 128, 64, 1, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 16, 256, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_256_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_256_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_256_sliding_window_causal_sm80_kernel_nl", 49152, 128, 64, 2, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 16, 256, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_256_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_256_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_256_custom_mask_sm80_kernel_nl", 49152, 128, 64, 3, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_BF16, 0, 64, 128, 128, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_qk_tanh_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_qk_tanh_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_qk_tanh_sm80_kernel_nl_tiled", 81920, 128, 64, 0, 2, false, true, false, true, true, true, true},
{ DATA_TYPE_BF16, 0, 64, 128, 128, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_qk_tanh_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_qk_tanh_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_causal_qk_tanh_sm80_kernel_nl_tiled", 81920, 128, 64, 1, 2, false, true, false, true, true, true, true},
{ DATA_TYPE_BF16, 0, 64, 128, 128, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_qk_tanh_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_qk_tanh_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_sliding_window_causal_qk_tanh_sm80_kernel_nl_tiled", 81920, 128, 64, 2, 2, false, true, false, true, true, true, true},
{ DATA_TYPE_BF16, 0, 64, 128, 128, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_qk_tanh_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_qk_tanh_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_128_custom_mask_qk_tanh_sm80_kernel_nl_tiled", 81920, 128, 64, 3, 2, false, true, false, true, true, true, true},
{ DATA_TYPE_BF16, 0, 64, 128, 256, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_256_qk_tanh_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_256_qk_tanh_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_256_causal_qk_tanh_sm80_kernel_nl_tiled", 81920, 128, 64, 1, 2, false, true, false, true, true, true, true},
{ DATA_TYPE_BF16, 0, 64, 128, 256, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_256_qk_tanh_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_256_qk_tanh_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_256_sliding_window_causal_qk_tanh_sm80_kernel_nl_tiled", 81920, 128, 64, 2, 2, false, true, false, true, true, true, true},
{ DATA_TYPE_BF16, 0, 64, 128, 256, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_256_qk_tanh_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_256_qk_tanh_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_q_paged_kv_256_custom_mask_qk_tanh_sm80_kernel_nl_tiled", 81920, 128, 64, 3, 2, false, true, false, true, true, true, true},
{ DATA_TYPE_BF16, 0, 64, 32, 128, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_128_qk_tanh_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_128_qk_tanh_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_128_qk_tanh_sm80_kernel_nl", 32768, 128, 64, 0, 2, false, true, false, true, true, false, true},
{ DATA_TYPE_BF16, 0, 64, 32, 128, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_128_qk_tanh_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_128_qk_tanh_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_128_causal_qk_tanh_sm80_kernel_nl", 32768, 128, 64, 1, 2, false, true, false, true, true, false, true},
{ DATA_TYPE_BF16, 0, 64, 32, 128, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_128_qk_tanh_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_128_qk_tanh_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_128_sliding_window_causal_qk_tanh_sm80_kernel_nl", 32768, 128, 64, 2, 2, false, true, false, true, true, false, true},
{ DATA_TYPE_BF16, 0, 64, 32, 128, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_128_qk_tanh_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_128_qk_tanh_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_32_S_q_paged_kv_128_custom_mask_qk_tanh_sm80_kernel_nl", 32768, 128, 64, 3, 2, false, true, false, true, true, false, true},
{ DATA_TYPE_BF16, 0, 64, 16, 256, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_256_qk_tanh_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_256_qk_tanh_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_256_causal_qk_tanh_sm80_kernel_nl", 49152, 128, 64, 1, 2, false, true, false, true, true, false, true},
{ DATA_TYPE_BF16, 0, 64, 16, 256, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_256_qk_tanh_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_256_qk_tanh_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_256_sliding_window_causal_qk_tanh_sm80_kernel_nl", 49152, 128, 64, 2, 2, false, true, false, true, true, false, true},
{ DATA_TYPE_BF16, 0, 64, 16, 256, kSM_80,  cubin_fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_256_qk_tanh_sm80_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_256_qk_tanh_sm80_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_16_S_q_paged_kv_256_custom_mask_qk_tanh_sm80_kernel_nl", 49152, 128, 64, 3, 2, false, true, false, true, true, false, true},
{ DATA_TYPE_FP16, 0, 128, 128, 16, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_16_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_16_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_16_causal_sm80_kernel_nl_tiled", 16384, 128, 128, 1, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 16, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_16_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_16_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_16_sliding_window_causal_sm80_kernel_nl_tiled", 16384, 128, 128, 2, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 16, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_16_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_16_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_16_custom_mask_sm80_kernel_nl_tiled", 16384, 128, 128, 3, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 32, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_32_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_32_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_32_sm80_kernel_nl_tiled", 32768, 128, 128, 0, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 32, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_32_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_32_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_32_causal_sm80_kernel_nl_tiled", 32768, 128, 128, 1, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 32, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_32_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_32_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_32_sliding_window_causal_sm80_kernel_nl_tiled", 32768, 128, 128, 2, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 32, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_32_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_32_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_32_custom_mask_sm80_kernel_nl_tiled", 32768, 128, 128, 3, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 40, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_40_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_40_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_40_causal_sm80_kernel_nl_tiled", 65536, 128, 128, 1, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 40, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_40_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_40_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_40_sliding_window_causal_sm80_kernel_nl_tiled", 65536, 128, 128, 2, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 40, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_40_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_40_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_40_custom_mask_sm80_kernel_nl_tiled", 65536, 128, 128, 3, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 48, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_48_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_48_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_48_causal_sm80_kernel_nl_tiled", 65536, 128, 128, 1, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 48, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_48_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_48_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_48_sliding_window_causal_sm80_kernel_nl_tiled", 65536, 128, 128, 2, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 48, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_48_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_48_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_48_custom_mask_sm80_kernel_nl_tiled", 65536, 128, 128, 3, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 64, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_64_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_64_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_64_sm80_kernel_nl_tiled", 65536, 128, 128, 0, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 64, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_64_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_64_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_64_causal_sm80_kernel_nl_tiled", 65536, 128, 128, 1, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 64, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_64_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_64_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_64_sliding_window_causal_sm80_kernel_nl_tiled", 65536, 128, 128, 2, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 64, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_64_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_64_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_128_128_S_qkv_64_custom_mask_sm80_kernel_nl_tiled", 65536, 128, 128, 3, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 80, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_80_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_80_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_80_causal_sm80_kernel_nl_tiled", 81920, 128, 64, 1, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 80, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_80_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_80_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_80_sliding_window_causal_sm80_kernel_nl_tiled", 81920, 128, 64, 2, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 80, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_80_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_80_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_80_custom_mask_sm80_kernel_nl_tiled", 81920, 128, 64, 3, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 96, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_96_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_96_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_96_causal_sm80_kernel_nl_tiled", 81920, 128, 64, 1, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 96, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_96_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_96_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_96_sliding_window_causal_sm80_kernel_nl_tiled", 81920, 128, 64, 2, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 96, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_96_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_96_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_96_custom_mask_sm80_kernel_nl_tiled", 81920, 128, 64, 3, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 104, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_104_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_104_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_104_causal_sm80_kernel_nl_tiled", 81920, 128, 64, 1, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 104, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_104_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_104_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_104_sliding_window_causal_sm80_kernel_nl_tiled", 81920, 128, 64, 2, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 104, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_104_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_104_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_104_custom_mask_sm80_kernel_nl_tiled", 81920, 128, 64, 3, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_causal_sm80_kernel_nl_tiled", 81920, 128, 64, 1, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_sliding_window_causal_sm80_kernel_nl_tiled", 81920, 128, 64, 2, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_custom_mask_sm80_kernel_nl_tiled", 81920, 128, 64, 3, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 160, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_160_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_160_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_160_causal_sm80_kernel_nl_tiled", 81920, 128, 64, 1, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 160, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_160_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_160_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_160_sliding_window_causal_sm80_kernel_nl_tiled", 81920, 128, 64, 2, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 160, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_160_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_160_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_160_custom_mask_sm80_kernel_nl_tiled", 81920, 128, 64, 3, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 192, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_192_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_192_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_192_causal_sm80_kernel_nl_tiled", 81920, 128, 64, 1, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 192, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_192_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_192_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_192_sliding_window_causal_sm80_kernel_nl_tiled", 81920, 128, 64, 2, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 192, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_192_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_192_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_192_custom_mask_sm80_kernel_nl_tiled", 81920, 128, 64, 3, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 256, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_256_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_256_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_256_causal_sm80_kernel_nl_tiled", 81920, 128, 64, 1, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 256, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_256_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_256_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_256_sliding_window_causal_sm80_kernel_nl_tiled", 81920, 128, 64, 2, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 256, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_256_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_256_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_256_custom_mask_sm80_kernel_nl_tiled", 81920, 128, 64, 3, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 64, 16, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_16_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_16_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_16_causal_sm80_kernel_nl", 6144, 128, 64, 1, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 16, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_16_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_16_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_16_sliding_window_causal_sm80_kernel_nl", 6144, 128, 64, 2, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 16, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_16_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_16_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_16_custom_mask_sm80_kernel_nl", 6144, 128, 64, 3, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 32, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_32_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_32_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_32_sm80_kernel_nl", 12288, 128, 64, 0, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 32, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_32_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_32_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_32_causal_sm80_kernel_nl", 12288, 128, 64, 1, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 32, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_32_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_32_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_32_sliding_window_causal_sm80_kernel_nl", 12288, 128, 64, 2, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 32, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_32_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_32_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_64_S_qkv_32_custom_mask_sm80_kernel_nl", 12288, 128, 64, 3, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 40, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_40_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_40_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_40_causal_sm80_kernel_nl", 16384, 128, 64, 1, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 40, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_40_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_40_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_40_sliding_window_causal_sm80_kernel_nl", 16384, 128, 64, 2, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 40, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_40_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_40_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_40_custom_mask_sm80_kernel_nl", 16384, 128, 64, 3, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 48, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_48_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_48_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_48_causal_sm80_kernel_nl", 16384, 128, 64, 1, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 48, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_48_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_48_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_48_sliding_window_causal_sm80_kernel_nl", 16384, 128, 64, 2, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 48, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_48_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_48_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_48_custom_mask_sm80_kernel_nl", 16384, 128, 64, 3, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 64, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_64_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_64_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_64_sm80_kernel_nl", 16384, 128, 64, 0, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 64, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_64_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_64_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_64_causal_sm80_kernel_nl", 16384, 128, 64, 1, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 64, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_64_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_64_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_64_sliding_window_causal_sm80_kernel_nl", 16384, 128, 64, 2, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 64, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_64_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_64_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_64_custom_mask_sm80_kernel_nl", 16384, 128, 64, 3, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 80, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_80_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_80_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_80_causal_sm80_kernel_nl", 32768, 128, 64, 1, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 80, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_80_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_80_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_80_sliding_window_causal_sm80_kernel_nl", 32768, 128, 64, 2, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 80, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_80_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_80_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_80_custom_mask_sm80_kernel_nl", 32768, 128, 64, 3, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 96, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_96_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_96_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_96_causal_sm80_kernel_nl", 32768, 128, 64, 1, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 96, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_96_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_96_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_96_sliding_window_causal_sm80_kernel_nl", 32768, 128, 64, 2, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 96, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_96_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_96_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_96_custom_mask_sm80_kernel_nl", 32768, 128, 64, 3, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 104, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_104_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_104_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_104_causal_sm80_kernel_nl", 32768, 128, 64, 1, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 104, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_104_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_104_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_104_sliding_window_causal_sm80_kernel_nl", 32768, 128, 64, 2, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 104, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_104_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_104_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_104_custom_mask_sm80_kernel_nl", 32768, 128, 64, 3, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_128_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_128_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_128_causal_sm80_kernel_nl", 32768, 128, 64, 1, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_128_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_128_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_128_sliding_window_causal_sm80_kernel_nl", 32768, 128, 64, 2, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_128_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_128_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_128_custom_mask_sm80_kernel_nl", 32768, 128, 64, 3, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 160, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_160_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_160_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_160_causal_sm80_kernel_nl", 49152, 128, 64, 1, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 160, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_160_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_160_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_160_sliding_window_causal_sm80_kernel_nl", 49152, 128, 64, 2, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 160, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_160_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_160_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_160_custom_mask_sm80_kernel_nl", 49152, 128, 64, 3, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 192, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_192_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_192_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_192_causal_sm80_kernel_nl", 49152, 128, 64, 1, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 192, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_192_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_192_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_192_sliding_window_causal_sm80_kernel_nl", 49152, 128, 64, 2, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 192, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_192_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_192_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_192_custom_mask_sm80_kernel_nl", 49152, 128, 64, 3, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 256, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_256_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_256_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_256_causal_sm80_kernel_nl", 49152, 128, 64, 1, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 256, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_256_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_256_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_256_sliding_window_causal_sm80_kernel_nl", 49152, 128, 64, 2, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 256, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_256_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_256_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_256_custom_mask_sm80_kernel_nl", 49152, 128, 64, 3, 0, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_qk_tanh_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_qk_tanh_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_causal_qk_tanh_sm80_kernel_nl_tiled", 81920, 128, 64, 1, 0, false, true, false, true, true, true, true},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_qk_tanh_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_qk_tanh_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_sliding_window_causal_qk_tanh_sm80_kernel_nl_tiled", 81920, 128, 64, 2, 0, false, true, false, true, true, true, true},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_qk_tanh_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_qk_tanh_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_128_custom_mask_qk_tanh_sm80_kernel_nl_tiled", 81920, 128, 64, 3, 0, false, true, false, true, true, true, true},
{ DATA_TYPE_FP16, 0, 64, 128, 256, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_256_qk_tanh_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_256_qk_tanh_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_256_causal_qk_tanh_sm80_kernel_nl_tiled", 81920, 128, 64, 1, 0, false, true, false, true, true, true, true},
{ DATA_TYPE_FP16, 0, 64, 128, 256, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_256_qk_tanh_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_256_qk_tanh_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_256_sliding_window_causal_qk_tanh_sm80_kernel_nl_tiled", 81920, 128, 64, 2, 0, false, true, false, true, true, true, true},
{ DATA_TYPE_FP16, 0, 64, 128, 256, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_256_qk_tanh_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_256_qk_tanh_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_qkv_256_custom_mask_qk_tanh_sm80_kernel_nl_tiled", 81920, 128, 64, 3, 0, false, true, false, true, true, true, true},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_128_qk_tanh_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_128_qk_tanh_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_128_causal_qk_tanh_sm80_kernel_nl", 32768, 128, 64, 1, 0, false, true, false, true, true, false, true},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_128_qk_tanh_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_128_qk_tanh_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_128_sliding_window_causal_qk_tanh_sm80_kernel_nl", 32768, 128, 64, 2, 0, false, true, false, true, true, false, true},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_128_qk_tanh_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_128_qk_tanh_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_qkv_128_custom_mask_qk_tanh_sm80_kernel_nl", 32768, 128, 64, 3, 0, false, true, false, true, true, false, true},
{ DATA_TYPE_FP16, 0, 64, 16, 256, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_256_qk_tanh_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_256_qk_tanh_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_256_causal_qk_tanh_sm80_kernel_nl", 49152, 128, 64, 1, 0, false, true, false, true, true, false, true},
{ DATA_TYPE_FP16, 0, 64, 16, 256, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_256_qk_tanh_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_256_qk_tanh_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_256_sliding_window_causal_qk_tanh_sm80_kernel_nl", 49152, 128, 64, 2, 0, false, true, false, true, true, false, true},
{ DATA_TYPE_FP16, 0, 64, 16, 256, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_256_qk_tanh_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_256_qk_tanh_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_16_S_qkv_256_custom_mask_qk_tanh_sm80_kernel_nl", 49152, 128, 64, 3, 0, false, true, false, true, true, false, true},
{ DATA_TYPE_FP16, 0, 128, 128, 32, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_kv_32_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_kv_32_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_128_128_S_q_kv_32_sm80_kernel_nl_tiled", 32768, 128, 128, 0, 1, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 32, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_kv_32_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_kv_32_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_128_128_S_q_kv_32_custom_mask_sm80_kernel_nl_tiled", 32768, 128, 128, 3, 1, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 64, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_kv_64_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_kv_64_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_128_128_S_q_kv_64_sm80_kernel_nl_tiled", 65536, 128, 128, 0, 1, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 64, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_kv_64_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_kv_64_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_128_128_S_q_kv_64_custom_mask_sm80_kernel_nl_tiled", 65536, 128, 128, 3, 1, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_kv_128_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_kv_128_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_q_kv_128_sm80_kernel_nl_tiled", 81920, 128, 64, 0, 1, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_kv_128_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_kv_128_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_q_kv_128_custom_mask_sm80_kernel_nl_tiled", 81920, 128, 64, 3, 1, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 64, 32, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_kv_32_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_kv_32_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_64_S_q_kv_32_sm80_kernel_nl", 12288, 128, 64, 0, 1, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 32, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_kv_32_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_kv_32_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_64_S_q_kv_32_custom_mask_sm80_kernel_nl", 12288, 128, 64, 3, 1, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 64, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_kv_64_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_kv_64_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_q_kv_64_sm80_kernel_nl", 16384, 128, 64, 0, 1, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 64, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_kv_64_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_kv_64_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_q_kv_64_custom_mask_sm80_kernel_nl", 16384, 128, 64, 3, 1, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_kv_128_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_kv_128_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_q_kv_128_sm80_kernel_nl", 32768, 128, 64, 0, 1, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_kv_128_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_kv_128_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_q_kv_128_custom_mask_sm80_kernel_nl", 32768, 128, 64, 3, 1, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_kv_128_qk_tanh_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_kv_128_qk_tanh_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_q_kv_128_qk_tanh_sm80_kernel_nl_tiled", 81920, 128, 64, 0, 1, false, true, false, true, true, true, true},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_kv_128_qk_tanh_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_kv_128_qk_tanh_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_q_kv_128_custom_mask_qk_tanh_sm80_kernel_nl_tiled", 81920, 128, 64, 3, 1, false, true, false, true, true, true, true},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_kv_128_qk_tanh_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_kv_128_qk_tanh_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_q_kv_128_qk_tanh_sm80_kernel_nl", 32768, 128, 64, 0, 1, false, true, false, true, true, false, true},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_kv_128_qk_tanh_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_kv_128_qk_tanh_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_q_kv_128_custom_mask_qk_tanh_sm80_kernel_nl", 32768, 128, 64, 3, 1, false, true, false, true, true, false, true},
{ DATA_TYPE_FP16, 0, 128, 128, 16, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_16_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_16_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_16_causal_sm80_kernel_nl_tiled", 16384, 128, 128, 1, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 16, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_16_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_16_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_16_sliding_window_causal_sm80_kernel_nl_tiled", 16384, 128, 128, 2, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 16, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_16_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_16_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_16_custom_mask_sm80_kernel_nl_tiled", 16384, 128, 128, 3, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 32, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_32_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_32_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_32_sm80_kernel_nl_tiled", 32768, 128, 128, 0, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 32, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_32_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_32_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_32_causal_sm80_kernel_nl_tiled", 32768, 128, 128, 1, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 32, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_32_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_32_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_32_sliding_window_causal_sm80_kernel_nl_tiled", 32768, 128, 128, 2, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 32, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_32_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_32_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_32_custom_mask_sm80_kernel_nl_tiled", 32768, 128, 128, 3, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 40, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_40_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_40_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_40_causal_sm80_kernel_nl_tiled", 65536, 128, 128, 1, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 40, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_40_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_40_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_40_sliding_window_causal_sm80_kernel_nl_tiled", 65536, 128, 128, 2, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 40, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_40_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_40_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_40_custom_mask_sm80_kernel_nl_tiled", 65536, 128, 128, 3, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 48, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_48_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_48_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_48_causal_sm80_kernel_nl_tiled", 65536, 128, 128, 1, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 48, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_48_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_48_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_48_sliding_window_causal_sm80_kernel_nl_tiled", 65536, 128, 128, 2, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 48, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_48_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_48_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_48_custom_mask_sm80_kernel_nl_tiled", 65536, 128, 128, 3, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 64, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_64_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_64_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_64_sm80_kernel_nl_tiled", 65536, 128, 128, 0, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 64, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_64_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_64_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_64_causal_sm80_kernel_nl_tiled", 65536, 128, 128, 1, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 64, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_64_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_64_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_64_sliding_window_causal_sm80_kernel_nl_tiled", 65536, 128, 128, 2, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 64, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_64_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_64_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_128_128_S_q_paged_kv_64_custom_mask_sm80_kernel_nl_tiled", 65536, 128, 128, 3, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 80, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_80_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_80_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_80_causal_sm80_kernel_nl_tiled", 81920, 128, 64, 1, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 80, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_80_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_80_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_80_sliding_window_causal_sm80_kernel_nl_tiled", 81920, 128, 64, 2, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 80, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_80_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_80_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_80_custom_mask_sm80_kernel_nl_tiled", 81920, 128, 64, 3, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 96, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_96_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_96_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_96_causal_sm80_kernel_nl_tiled", 81920, 128, 64, 1, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 96, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_96_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_96_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_96_sliding_window_causal_sm80_kernel_nl_tiled", 81920, 128, 64, 2, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 96, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_96_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_96_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_96_custom_mask_sm80_kernel_nl_tiled", 81920, 128, 64, 3, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 104, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_104_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_104_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_104_causal_sm80_kernel_nl_tiled", 81920, 128, 64, 1, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 104, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_104_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_104_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_104_sliding_window_causal_sm80_kernel_nl_tiled", 81920, 128, 64, 2, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 104, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_104_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_104_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_104_custom_mask_sm80_kernel_nl_tiled", 81920, 128, 64, 3, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_sm80_kernel_nl_tiled", 81920, 128, 64, 0, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_causal_sm80_kernel_nl_tiled", 81920, 128, 64, 1, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_sliding_window_causal_sm80_kernel_nl_tiled", 81920, 128, 64, 2, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_custom_mask_sm80_kernel_nl_tiled", 81920, 128, 64, 3, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 160, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_160_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_160_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_160_causal_sm80_kernel_nl_tiled", 81920, 128, 64, 1, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 160, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_160_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_160_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_160_sliding_window_causal_sm80_kernel_nl_tiled", 81920, 128, 64, 2, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 160, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_160_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_160_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_160_custom_mask_sm80_kernel_nl_tiled", 81920, 128, 64, 3, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 192, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_192_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_192_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_192_causal_sm80_kernel_nl_tiled", 81920, 128, 64, 1, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 192, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_192_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_192_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_192_sliding_window_causal_sm80_kernel_nl_tiled", 81920, 128, 64, 2, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 192, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_192_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_192_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_192_custom_mask_sm80_kernel_nl_tiled", 81920, 128, 64, 3, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 256, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_256_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_256_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_256_causal_sm80_kernel_nl_tiled", 81920, 128, 64, 1, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 256, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_256_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_256_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_256_sliding_window_causal_sm80_kernel_nl_tiled", 81920, 128, 64, 2, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 256, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_256_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_256_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_256_custom_mask_sm80_kernel_nl_tiled", 81920, 128, 64, 3, 2, false, true, false, true, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 64, 16, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_16_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_16_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_16_causal_sm80_kernel_nl", 6144, 128, 64, 1, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 16, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_16_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_16_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_16_sliding_window_causal_sm80_kernel_nl", 6144, 128, 64, 2, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 16, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_16_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_16_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_16_custom_mask_sm80_kernel_nl", 6144, 128, 64, 3, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 32, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_32_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_32_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_32_sm80_kernel_nl", 12288, 128, 64, 0, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 32, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_32_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_32_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_32_causal_sm80_kernel_nl", 12288, 128, 64, 1, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 32, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_32_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_32_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_32_sliding_window_causal_sm80_kernel_nl", 12288, 128, 64, 2, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 32, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_32_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_32_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_64_S_q_paged_kv_32_custom_mask_sm80_kernel_nl", 12288, 128, 64, 3, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 40, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_40_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_40_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_40_causal_sm80_kernel_nl", 16384, 128, 64, 1, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 40, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_40_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_40_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_40_sliding_window_causal_sm80_kernel_nl", 16384, 128, 64, 2, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 40, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_40_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_40_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_40_custom_mask_sm80_kernel_nl", 16384, 128, 64, 3, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 48, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_48_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_48_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_48_causal_sm80_kernel_nl", 16384, 128, 64, 1, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 48, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_48_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_48_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_48_sliding_window_causal_sm80_kernel_nl", 16384, 128, 64, 2, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 48, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_48_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_48_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_48_custom_mask_sm80_kernel_nl", 16384, 128, 64, 3, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 64, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_64_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_64_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_64_sm80_kernel_nl", 16384, 128, 64, 0, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 64, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_64_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_64_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_64_causal_sm80_kernel_nl", 16384, 128, 64, 1, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 64, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_64_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_64_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_64_sliding_window_causal_sm80_kernel_nl", 16384, 128, 64, 2, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 64, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_64_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_64_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_64_custom_mask_sm80_kernel_nl", 16384, 128, 64, 3, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 80, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_80_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_80_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_80_causal_sm80_kernel_nl", 32768, 128, 64, 1, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 80, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_80_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_80_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_80_sliding_window_causal_sm80_kernel_nl", 32768, 128, 64, 2, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 80, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_80_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_80_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_80_custom_mask_sm80_kernel_nl", 32768, 128, 64, 3, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 96, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_96_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_96_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_96_causal_sm80_kernel_nl", 32768, 128, 64, 1, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 96, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_96_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_96_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_96_sliding_window_causal_sm80_kernel_nl", 32768, 128, 64, 2, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 96, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_96_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_96_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_96_custom_mask_sm80_kernel_nl", 32768, 128, 64, 3, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 104, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_104_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_104_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_104_causal_sm80_kernel_nl", 32768, 128, 64, 1, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 104, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_104_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_104_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_104_sliding_window_causal_sm80_kernel_nl", 32768, 128, 64, 2, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 104, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_104_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_104_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_104_custom_mask_sm80_kernel_nl", 32768, 128, 64, 3, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_128_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_128_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_128_sm80_kernel_nl", 32768, 128, 64, 0, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_128_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_128_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_128_causal_sm80_kernel_nl", 32768, 128, 64, 1, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_128_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_128_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_128_sliding_window_causal_sm80_kernel_nl", 32768, 128, 64, 2, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_128_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_128_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_128_custom_mask_sm80_kernel_nl", 32768, 128, 64, 3, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 160, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_160_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_160_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_160_causal_sm80_kernel_nl", 49152, 128, 64, 1, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 160, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_160_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_160_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_160_sliding_window_causal_sm80_kernel_nl", 49152, 128, 64, 2, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 160, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_160_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_160_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_160_custom_mask_sm80_kernel_nl", 49152, 128, 64, 3, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 192, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_192_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_192_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_192_causal_sm80_kernel_nl", 49152, 128, 64, 1, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 192, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_192_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_192_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_192_sliding_window_causal_sm80_kernel_nl", 49152, 128, 64, 2, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 192, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_192_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_192_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_192_custom_mask_sm80_kernel_nl", 49152, 128, 64, 3, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 256, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_256_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_256_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_256_causal_sm80_kernel_nl", 49152, 128, 64, 1, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 256, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_256_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_256_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_256_sliding_window_causal_sm80_kernel_nl", 49152, 128, 64, 2, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 256, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_256_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_256_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_256_custom_mask_sm80_kernel_nl", 49152, 128, 64, 3, 2, false, true, false, true, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_qk_tanh_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_qk_tanh_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_qk_tanh_sm80_kernel_nl_tiled", 81920, 128, 64, 0, 2, false, true, false, true, true, true, true},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_qk_tanh_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_qk_tanh_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_causal_qk_tanh_sm80_kernel_nl_tiled", 81920, 128, 64, 1, 2, false, true, false, true, true, true, true},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_qk_tanh_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_qk_tanh_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_sliding_window_causal_qk_tanh_sm80_kernel_nl_tiled", 81920, 128, 64, 2, 2, false, true, false, true, true, true, true},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_qk_tanh_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_qk_tanh_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_128_custom_mask_qk_tanh_sm80_kernel_nl_tiled", 81920, 128, 64, 3, 2, false, true, false, true, true, true, true},
{ DATA_TYPE_FP16, 0, 64, 128, 256, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_256_qk_tanh_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_256_qk_tanh_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_256_causal_qk_tanh_sm80_kernel_nl_tiled", 81920, 128, 64, 1, 2, false, true, false, true, true, true, true},
{ DATA_TYPE_FP16, 0, 64, 128, 256, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_256_qk_tanh_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_256_qk_tanh_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_256_sliding_window_causal_qk_tanh_sm80_kernel_nl_tiled", 81920, 128, 64, 2, 2, false, true, false, true, true, true, true},
{ DATA_TYPE_FP16, 0, 64, 128, 256, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_256_qk_tanh_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_256_qk_tanh_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_128_S_q_paged_kv_256_custom_mask_qk_tanh_sm80_kernel_nl_tiled", 81920, 128, 64, 3, 2, false, true, false, true, true, true, true},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_128_qk_tanh_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_128_qk_tanh_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_128_qk_tanh_sm80_kernel_nl", 32768, 128, 64, 0, 2, false, true, false, true, true, false, true},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_128_qk_tanh_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_128_qk_tanh_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_128_causal_qk_tanh_sm80_kernel_nl", 32768, 128, 64, 1, 2, false, true, false, true, true, false, true},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_128_qk_tanh_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_128_qk_tanh_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_128_sliding_window_causal_qk_tanh_sm80_kernel_nl", 32768, 128, 64, 2, 2, false, true, false, true, true, false, true},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_128_qk_tanh_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_128_qk_tanh_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_32_S_q_paged_kv_128_custom_mask_qk_tanh_sm80_kernel_nl", 32768, 128, 64, 3, 2, false, true, false, true, true, false, true},
{ DATA_TYPE_FP16, 0, 64, 16, 256, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_256_qk_tanh_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_256_qk_tanh_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_256_causal_qk_tanh_sm80_kernel_nl", 49152, 128, 64, 1, 2, false, true, false, true, true, false, true},
{ DATA_TYPE_FP16, 0, 64, 16, 256, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_256_qk_tanh_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_256_qk_tanh_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_256_sliding_window_causal_qk_tanh_sm80_kernel_nl", 49152, 128, 64, 2, 2, false, true, false, true, true, false, true},
{ DATA_TYPE_FP16, 0, 64, 16, 256, kSM_80,  cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_256_qk_tanh_sm80_cu_cubin, cubin_fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_256_qk_tanh_sm80_cu_cubin_len, "fmha_v2_flash_attention_fp16_fp32_64_16_S_q_paged_kv_256_custom_mask_qk_tanh_sm80_kernel_nl", 49152, 128, 64, 3, 2, false, true, false, true, true, false, true},
#endif

#ifndef EXCLUDE_SM_86
{ DATA_TYPE_FP16, 0, 128, 128, 16, kSM_86,  cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_16_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_16_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_qkv_16_causal_sm86_kernel_nl_tiled", 16384, 128, 128, 1, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 16, kSM_86,  cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_16_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_16_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_qkv_16_sliding_window_causal_sm86_kernel_nl_tiled", 16384, 128, 128, 2, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 16, kSM_86,  cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_16_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_16_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_qkv_16_custom_mask_sm86_kernel_nl_tiled", 16384, 128, 128, 3, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 32, kSM_86,  cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_32_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_32_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_qkv_32_sm86_kernel_nl_tiled", 32768, 128, 128, 0, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 32, kSM_86,  cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_32_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_32_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_qkv_32_causal_sm86_kernel_nl_tiled", 32768, 128, 128, 1, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 32, kSM_86,  cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_32_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_32_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_qkv_32_sliding_window_causal_sm86_kernel_nl_tiled", 32768, 128, 128, 2, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 32, kSM_86,  cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_32_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_32_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_qkv_32_custom_mask_sm86_kernel_nl_tiled", 32768, 128, 128, 3, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 40, kSM_86,  cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_40_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_40_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_qkv_40_causal_sm86_kernel_nl_tiled", 65536, 128, 128, 1, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 40, kSM_86,  cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_40_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_40_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_qkv_40_sliding_window_causal_sm86_kernel_nl_tiled", 65536, 128, 128, 2, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 40, kSM_86,  cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_40_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_40_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_qkv_40_custom_mask_sm86_kernel_nl_tiled", 65536, 128, 128, 3, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 48, kSM_86,  cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_48_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_48_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_qkv_48_causal_sm86_kernel_nl_tiled", 65536, 128, 128, 1, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 48, kSM_86,  cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_48_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_48_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_qkv_48_sliding_window_causal_sm86_kernel_nl_tiled", 65536, 128, 128, 2, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 48, kSM_86,  cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_48_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_48_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_qkv_48_custom_mask_sm86_kernel_nl_tiled", 65536, 128, 128, 3, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 64, kSM_86,  cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_64_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_64_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_qkv_64_sm86_kernel_nl_tiled", 65536, 128, 128, 0, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 64, kSM_86,  cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_64_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_64_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_qkv_64_causal_sm86_kernel_nl_tiled", 65536, 128, 128, 1, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 64, kSM_86,  cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_64_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_64_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_qkv_64_sliding_window_causal_sm86_kernel_nl_tiled", 65536, 128, 128, 2, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 64, kSM_86,  cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_64_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_qkv_64_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_qkv_64_custom_mask_sm86_kernel_nl_tiled", 65536, 128, 128, 3, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 80, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_80_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_80_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_80_causal_sm86_kernel_nl_tiled", 81920, 128, 64, 1, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 80, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_80_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_80_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_80_sliding_window_causal_sm86_kernel_nl_tiled", 81920, 128, 64, 2, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 80, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_80_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_80_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_80_custom_mask_sm86_kernel_nl_tiled", 81920, 128, 64, 3, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 96, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_96_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_96_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_96_causal_sm86_kernel_nl_tiled", 81920, 128, 64, 1, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 96, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_96_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_96_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_96_sliding_window_causal_sm86_kernel_nl_tiled", 81920, 128, 64, 2, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 96, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_96_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_96_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_96_custom_mask_sm86_kernel_nl_tiled", 81920, 128, 64, 3, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 104, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_104_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_104_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_104_causal_sm86_kernel_nl_tiled", 81920, 128, 64, 1, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 104, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_104_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_104_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_104_sliding_window_causal_sm86_kernel_nl_tiled", 81920, 128, 64, 2, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 104, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_104_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_104_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_104_custom_mask_sm86_kernel_nl_tiled", 81920, 128, 64, 3, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_128_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_128_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_128_causal_sm86_kernel_nl_tiled", 81920, 128, 64, 1, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_128_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_128_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_128_sliding_window_causal_sm86_kernel_nl_tiled", 81920, 128, 64, 2, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_128_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_128_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_128_custom_mask_sm86_kernel_nl_tiled", 81920, 128, 64, 3, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 160, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_160_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_160_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_160_causal_sm86_kernel_nl_tiled", 81920, 128, 64, 1, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 160, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_160_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_160_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_160_sliding_window_causal_sm86_kernel_nl_tiled", 81920, 128, 64, 2, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 160, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_160_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_160_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_160_custom_mask_sm86_kernel_nl_tiled", 81920, 128, 64, 3, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 192, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_192_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_192_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_192_causal_sm86_kernel_nl_tiled", 81920, 128, 64, 1, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 192, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_192_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_192_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_192_sliding_window_causal_sm86_kernel_nl_tiled", 81920, 128, 64, 2, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 192, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_192_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_192_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_192_custom_mask_sm86_kernel_nl_tiled", 81920, 128, 64, 3, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 256, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_256_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_256_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_256_causal_sm86_kernel_nl_tiled", 81920, 128, 64, 1, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 256, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_256_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_256_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_256_sliding_window_causal_sm86_kernel_nl_tiled", 81920, 128, 64, 2, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 256, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_256_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_256_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_256_custom_mask_sm86_kernel_nl_tiled", 81920, 128, 64, 3, 0, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 64, 16, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_16_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_16_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_64_S_qkv_16_causal_sm86_kernel_nl", 6144, 128, 64, 1, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 16, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_16_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_16_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_64_S_qkv_16_sliding_window_causal_sm86_kernel_nl", 6144, 128, 64, 2, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 16, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_16_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_16_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_64_S_qkv_16_custom_mask_sm86_kernel_nl", 6144, 128, 64, 3, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 32, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_32_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_32_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_64_S_qkv_32_sm86_kernel_nl", 12288, 128, 64, 0, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 32, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_32_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_32_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_64_S_qkv_32_causal_sm86_kernel_nl", 12288, 128, 64, 1, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 32, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_32_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_32_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_64_S_qkv_32_sliding_window_causal_sm86_kernel_nl", 12288, 128, 64, 2, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 32, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_32_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_64_S_qkv_32_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_64_S_qkv_32_custom_mask_sm86_kernel_nl", 12288, 128, 64, 3, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 40, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_40_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_40_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_40_causal_sm86_kernel_nl", 16384, 128, 64, 1, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 40, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_40_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_40_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_40_sliding_window_causal_sm86_kernel_nl", 16384, 128, 64, 2, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 40, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_40_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_40_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_40_custom_mask_sm86_kernel_nl", 16384, 128, 64, 3, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 48, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_48_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_48_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_48_causal_sm86_kernel_nl", 16384, 128, 64, 1, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 48, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_48_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_48_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_48_sliding_window_causal_sm86_kernel_nl", 16384, 128, 64, 2, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 48, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_48_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_48_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_48_custom_mask_sm86_kernel_nl", 16384, 128, 64, 3, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 64, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_64_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_64_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_64_sm86_kernel_nl", 16384, 128, 64, 0, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 64, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_64_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_64_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_64_causal_sm86_kernel_nl", 16384, 128, 64, 1, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 64, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_64_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_64_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_64_sliding_window_causal_sm86_kernel_nl", 16384, 128, 64, 2, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 64, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_64_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_64_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_64_custom_mask_sm86_kernel_nl", 16384, 128, 64, 3, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 80, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_80_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_80_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_80_causal_sm86_kernel_nl", 32768, 128, 64, 1, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 80, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_80_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_80_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_80_sliding_window_causal_sm86_kernel_nl", 32768, 128, 64, 2, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 80, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_80_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_80_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_80_custom_mask_sm86_kernel_nl", 32768, 128, 64, 3, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 96, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_96_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_96_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_96_causal_sm86_kernel_nl", 32768, 128, 64, 1, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 96, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_96_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_96_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_96_sliding_window_causal_sm86_kernel_nl", 32768, 128, 64, 2, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 96, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_96_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_96_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_96_custom_mask_sm86_kernel_nl", 32768, 128, 64, 3, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 104, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_104_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_104_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_104_causal_sm86_kernel_nl", 32768, 128, 64, 1, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 104, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_104_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_104_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_104_sliding_window_causal_sm86_kernel_nl", 32768, 128, 64, 2, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 104, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_104_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_104_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_104_custom_mask_sm86_kernel_nl", 32768, 128, 64, 3, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_128_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_128_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_128_causal_sm86_kernel_nl", 32768, 128, 64, 1, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_128_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_128_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_128_sliding_window_causal_sm86_kernel_nl", 32768, 128, 64, 2, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_128_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_128_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_128_custom_mask_sm86_kernel_nl", 32768, 128, 64, 3, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 160, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_160_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_160_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_16_S_qkv_160_causal_sm86_kernel_nl", 49152, 128, 64, 1, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 160, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_160_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_160_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_16_S_qkv_160_sliding_window_causal_sm86_kernel_nl", 49152, 128, 64, 2, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 160, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_160_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_160_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_16_S_qkv_160_custom_mask_sm86_kernel_nl", 49152, 128, 64, 3, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 192, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_192_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_192_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_16_S_qkv_192_causal_sm86_kernel_nl", 49152, 128, 64, 1, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 192, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_192_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_192_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_16_S_qkv_192_sliding_window_causal_sm86_kernel_nl", 49152, 128, 64, 2, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 192, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_192_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_192_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_16_S_qkv_192_custom_mask_sm86_kernel_nl", 49152, 128, 64, 3, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 256, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_256_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_256_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_16_S_qkv_256_causal_sm86_kernel_nl", 49152, 128, 64, 1, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 256, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_256_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_256_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_16_S_qkv_256_sliding_window_causal_sm86_kernel_nl", 49152, 128, 64, 2, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 256, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_256_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_256_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_16_S_qkv_256_custom_mask_sm86_kernel_nl", 49152, 128, 64, 3, 0, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_128_qk_tanh_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_128_qk_tanh_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_128_causal_qk_tanh_sm86_kernel_nl_tiled", 81920, 128, 64, 1, 0, false, true, false, false, true, true, true},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_128_qk_tanh_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_128_qk_tanh_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_128_sliding_window_causal_qk_tanh_sm86_kernel_nl_tiled", 81920, 128, 64, 2, 0, false, true, false, false, true, true, true},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_128_qk_tanh_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_128_qk_tanh_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_128_custom_mask_qk_tanh_sm86_kernel_nl_tiled", 81920, 128, 64, 3, 0, false, true, false, false, true, true, true},
{ DATA_TYPE_FP16, 0, 64, 128, 256, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_256_qk_tanh_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_256_qk_tanh_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_256_causal_qk_tanh_sm86_kernel_nl_tiled", 81920, 128, 64, 1, 0, false, true, false, false, true, true, true},
{ DATA_TYPE_FP16, 0, 64, 128, 256, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_256_qk_tanh_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_256_qk_tanh_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_256_sliding_window_causal_qk_tanh_sm86_kernel_nl_tiled", 81920, 128, 64, 2, 0, false, true, false, false, true, true, true},
{ DATA_TYPE_FP16, 0, 64, 128, 256, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_256_qk_tanh_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_qkv_256_qk_tanh_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_qkv_256_custom_mask_qk_tanh_sm86_kernel_nl_tiled", 81920, 128, 64, 3, 0, false, true, false, false, true, true, true},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_128_qk_tanh_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_128_qk_tanh_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_128_causal_qk_tanh_sm86_kernel_nl", 32768, 128, 64, 1, 0, false, true, false, false, true, false, true},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_128_qk_tanh_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_128_qk_tanh_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_128_sliding_window_causal_qk_tanh_sm86_kernel_nl", 32768, 128, 64, 2, 0, false, true, false, false, true, false, true},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_128_qk_tanh_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_qkv_128_qk_tanh_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_qkv_128_custom_mask_qk_tanh_sm86_kernel_nl", 32768, 128, 64, 3, 0, false, true, false, false, true, false, true},
{ DATA_TYPE_FP16, 0, 64, 16, 256, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_256_qk_tanh_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_256_qk_tanh_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_16_S_qkv_256_causal_qk_tanh_sm86_kernel_nl", 49152, 128, 64, 1, 0, false, true, false, false, true, false, true},
{ DATA_TYPE_FP16, 0, 64, 16, 256, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_256_qk_tanh_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_256_qk_tanh_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_16_S_qkv_256_sliding_window_causal_qk_tanh_sm86_kernel_nl", 49152, 128, 64, 2, 0, false, true, false, false, true, false, true},
{ DATA_TYPE_FP16, 0, 64, 16, 256, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_256_qk_tanh_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_16_S_qkv_256_qk_tanh_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_16_S_qkv_256_custom_mask_qk_tanh_sm86_kernel_nl", 49152, 128, 64, 3, 0, false, true, false, false, true, false, true},
{ DATA_TYPE_FP16, 0, 128, 128, 32, kSM_86,  cubin_fmha_v2_flash_attention_fp16_128_128_S_q_kv_32_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_q_kv_32_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_q_kv_32_sm86_kernel_nl_tiled", 32768, 128, 128, 0, 1, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 32, kSM_86,  cubin_fmha_v2_flash_attention_fp16_128_128_S_q_kv_32_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_q_kv_32_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_q_kv_32_custom_mask_sm86_kernel_nl_tiled", 32768, 128, 128, 3, 1, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 64, kSM_86,  cubin_fmha_v2_flash_attention_fp16_128_128_S_q_kv_64_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_q_kv_64_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_q_kv_64_sm86_kernel_nl_tiled", 65536, 128, 128, 0, 1, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 64, kSM_86,  cubin_fmha_v2_flash_attention_fp16_128_128_S_q_kv_64_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_q_kv_64_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_q_kv_64_custom_mask_sm86_kernel_nl_tiled", 65536, 128, 128, 3, 1, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_kv_128_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_kv_128_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_kv_128_sm86_kernel_nl_tiled", 81920, 128, 64, 0, 1, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_kv_128_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_kv_128_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_kv_128_custom_mask_sm86_kernel_nl_tiled", 81920, 128, 64, 3, 1, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 64, 32, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_64_S_q_kv_32_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_64_S_q_kv_32_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_64_S_q_kv_32_sm86_kernel_nl", 12288, 128, 64, 0, 1, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 32, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_64_S_q_kv_32_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_64_S_q_kv_32_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_64_S_q_kv_32_custom_mask_sm86_kernel_nl", 12288, 128, 64, 3, 1, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 64, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_kv_64_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_kv_64_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_kv_64_sm86_kernel_nl", 16384, 128, 64, 0, 1, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 64, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_kv_64_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_kv_64_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_kv_64_custom_mask_sm86_kernel_nl", 16384, 128, 64, 3, 1, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_kv_128_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_kv_128_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_kv_128_sm86_kernel_nl", 32768, 128, 64, 0, 1, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_kv_128_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_kv_128_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_kv_128_custom_mask_sm86_kernel_nl", 32768, 128, 64, 3, 1, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_kv_128_qk_tanh_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_kv_128_qk_tanh_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_kv_128_qk_tanh_sm86_kernel_nl_tiled", 81920, 128, 64, 0, 1, false, true, false, false, true, true, true},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_kv_128_qk_tanh_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_kv_128_qk_tanh_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_kv_128_custom_mask_qk_tanh_sm86_kernel_nl_tiled", 81920, 128, 64, 3, 1, false, true, false, false, true, true, true},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_kv_128_qk_tanh_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_kv_128_qk_tanh_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_kv_128_qk_tanh_sm86_kernel_nl", 32768, 128, 64, 0, 1, false, true, false, false, true, false, true},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_kv_128_qk_tanh_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_kv_128_qk_tanh_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_kv_128_custom_mask_qk_tanh_sm86_kernel_nl", 32768, 128, 64, 3, 1, false, true, false, false, true, false, true},
{ DATA_TYPE_FP16, 0, 128, 128, 16, kSM_86,  cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_16_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_16_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_16_causal_sm86_kernel_nl_tiled", 16384, 128, 128, 1, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 16, kSM_86,  cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_16_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_16_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_16_sliding_window_causal_sm86_kernel_nl_tiled", 16384, 128, 128, 2, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 16, kSM_86,  cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_16_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_16_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_16_custom_mask_sm86_kernel_nl_tiled", 16384, 128, 128, 3, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 32, kSM_86,  cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_32_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_32_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_32_sm86_kernel_nl_tiled", 32768, 128, 128, 0, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 32, kSM_86,  cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_32_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_32_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_32_causal_sm86_kernel_nl_tiled", 32768, 128, 128, 1, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 32, kSM_86,  cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_32_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_32_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_32_sliding_window_causal_sm86_kernel_nl_tiled", 32768, 128, 128, 2, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 32, kSM_86,  cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_32_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_32_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_32_custom_mask_sm86_kernel_nl_tiled", 32768, 128, 128, 3, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 40, kSM_86,  cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_40_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_40_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_40_causal_sm86_kernel_nl_tiled", 65536, 128, 128, 1, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 40, kSM_86,  cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_40_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_40_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_40_sliding_window_causal_sm86_kernel_nl_tiled", 65536, 128, 128, 2, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 40, kSM_86,  cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_40_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_40_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_40_custom_mask_sm86_kernel_nl_tiled", 65536, 128, 128, 3, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 48, kSM_86,  cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_48_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_48_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_48_causal_sm86_kernel_nl_tiled", 65536, 128, 128, 1, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 48, kSM_86,  cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_48_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_48_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_48_sliding_window_causal_sm86_kernel_nl_tiled", 65536, 128, 128, 2, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 48, kSM_86,  cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_48_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_48_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_48_custom_mask_sm86_kernel_nl_tiled", 65536, 128, 128, 3, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 64, kSM_86,  cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_64_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_64_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_64_sm86_kernel_nl_tiled", 65536, 128, 128, 0, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 64, kSM_86,  cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_64_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_64_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_64_causal_sm86_kernel_nl_tiled", 65536, 128, 128, 1, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 64, kSM_86,  cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_64_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_64_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_64_sliding_window_causal_sm86_kernel_nl_tiled", 65536, 128, 128, 2, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 128, 128, 64, kSM_86,  cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_64_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_64_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_128_128_S_q_paged_kv_64_custom_mask_sm86_kernel_nl_tiled", 65536, 128, 128, 3, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 80, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_80_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_80_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_80_causal_sm86_kernel_nl_tiled", 81920, 128, 64, 1, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 80, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_80_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_80_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_80_sliding_window_causal_sm86_kernel_nl_tiled", 81920, 128, 64, 2, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 80, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_80_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_80_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_80_custom_mask_sm86_kernel_nl_tiled", 81920, 128, 64, 3, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 96, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_96_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_96_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_96_causal_sm86_kernel_nl_tiled", 81920, 128, 64, 1, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 96, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_96_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_96_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_96_sliding_window_causal_sm86_kernel_nl_tiled", 81920, 128, 64, 2, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 96, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_96_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_96_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_96_custom_mask_sm86_kernel_nl_tiled", 81920, 128, 64, 3, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 104, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_104_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_104_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_104_causal_sm86_kernel_nl_tiled", 81920, 128, 64, 1, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 104, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_104_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_104_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_104_sliding_window_causal_sm86_kernel_nl_tiled", 81920, 128, 64, 2, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 104, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_104_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_104_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_104_custom_mask_sm86_kernel_nl_tiled", 81920, 128, 64, 3, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_sm86_kernel_nl_tiled", 81920, 128, 64, 0, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_causal_sm86_kernel_nl_tiled", 81920, 128, 64, 1, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_sliding_window_causal_sm86_kernel_nl_tiled", 81920, 128, 64, 2, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_custom_mask_sm86_kernel_nl_tiled", 81920, 128, 64, 3, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 160, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_160_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_160_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_160_causal_sm86_kernel_nl_tiled", 81920, 128, 64, 1, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 160, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_160_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_160_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_160_sliding_window_causal_sm86_kernel_nl_tiled", 81920, 128, 64, 2, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 160, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_160_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_160_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_160_custom_mask_sm86_kernel_nl_tiled", 81920, 128, 64, 3, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 192, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_192_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_192_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_192_causal_sm86_kernel_nl_tiled", 81920, 128, 64, 1, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 192, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_192_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_192_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_192_sliding_window_causal_sm86_kernel_nl_tiled", 81920, 128, 64, 2, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 192, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_192_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_192_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_192_custom_mask_sm86_kernel_nl_tiled", 81920, 128, 64, 3, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 256, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_256_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_256_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_256_causal_sm86_kernel_nl_tiled", 81920, 128, 64, 1, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 256, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_256_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_256_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_256_sliding_window_causal_sm86_kernel_nl_tiled", 81920, 128, 64, 2, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 128, 256, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_256_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_256_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_256_custom_mask_sm86_kernel_nl_tiled", 81920, 128, 64, 3, 2, false, true, false, false, true, true, false},
{ DATA_TYPE_FP16, 0, 64, 64, 16, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_16_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_16_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_16_causal_sm86_kernel_nl", 6144, 128, 64, 1, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 16, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_16_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_16_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_16_sliding_window_causal_sm86_kernel_nl", 6144, 128, 64, 2, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 16, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_16_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_16_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_16_custom_mask_sm86_kernel_nl", 6144, 128, 64, 3, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 32, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_32_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_32_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_32_sm86_kernel_nl", 12288, 128, 64, 0, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 32, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_32_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_32_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_32_causal_sm86_kernel_nl", 12288, 128, 64, 1, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 32, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_32_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_32_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_32_sliding_window_causal_sm86_kernel_nl", 12288, 128, 64, 2, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 64, 32, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_32_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_32_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_64_S_q_paged_kv_32_custom_mask_sm86_kernel_nl", 12288, 128, 64, 3, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 40, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_40_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_40_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_40_causal_sm86_kernel_nl", 16384, 128, 64, 1, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 40, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_40_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_40_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_40_sliding_window_causal_sm86_kernel_nl", 16384, 128, 64, 2, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 40, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_40_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_40_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_40_custom_mask_sm86_kernel_nl", 16384, 128, 64, 3, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 48, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_48_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_48_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_48_causal_sm86_kernel_nl", 16384, 128, 64, 1, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 48, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_48_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_48_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_48_sliding_window_causal_sm86_kernel_nl", 16384, 128, 64, 2, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 48, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_48_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_48_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_48_custom_mask_sm86_kernel_nl", 16384, 128, 64, 3, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 64, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_64_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_64_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_64_sm86_kernel_nl", 16384, 128, 64, 0, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 64, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_64_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_64_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_64_causal_sm86_kernel_nl", 16384, 128, 64, 1, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 64, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_64_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_64_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_64_sliding_window_causal_sm86_kernel_nl", 16384, 128, 64, 2, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 64, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_64_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_64_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_64_custom_mask_sm86_kernel_nl", 16384, 128, 64, 3, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 80, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_80_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_80_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_80_causal_sm86_kernel_nl", 32768, 128, 64, 1, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 80, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_80_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_80_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_80_sliding_window_causal_sm86_kernel_nl", 32768, 128, 64, 2, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 80, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_80_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_80_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_80_custom_mask_sm86_kernel_nl", 32768, 128, 64, 3, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 96, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_96_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_96_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_96_causal_sm86_kernel_nl", 32768, 128, 64, 1, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 96, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_96_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_96_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_96_sliding_window_causal_sm86_kernel_nl", 32768, 128, 64, 2, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 96, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_96_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_96_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_96_custom_mask_sm86_kernel_nl", 32768, 128, 64, 3, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 104, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_104_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_104_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_104_causal_sm86_kernel_nl", 32768, 128, 64, 1, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 104, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_104_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_104_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_104_sliding_window_causal_sm86_kernel_nl", 32768, 128, 64, 2, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 104, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_104_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_104_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_104_custom_mask_sm86_kernel_nl", 32768, 128, 64, 3, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_128_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_128_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_128_sm86_kernel_nl", 32768, 128, 64, 0, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_128_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_128_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_128_causal_sm86_kernel_nl", 32768, 128, 64, 1, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_128_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_128_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_128_sliding_window_causal_sm86_kernel_nl", 32768, 128, 64, 2, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_128_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_128_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_128_custom_mask_sm86_kernel_nl", 32768, 128, 64, 3, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 160, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_160_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_160_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_160_causal_sm86_kernel_nl", 49152, 128, 64, 1, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 160, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_160_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_160_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_160_sliding_window_causal_sm86_kernel_nl", 49152, 128, 64, 2, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 160, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_160_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_160_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_160_custom_mask_sm86_kernel_nl", 49152, 128, 64, 3, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 192, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_192_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_192_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_192_causal_sm86_kernel_nl", 49152, 128, 64, 1, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 192, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_192_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_192_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_192_sliding_window_causal_sm86_kernel_nl", 49152, 128, 64, 2, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 192, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_192_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_192_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_192_custom_mask_sm86_kernel_nl", 49152, 128, 64, 3, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 256, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_256_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_256_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_256_causal_sm86_kernel_nl", 49152, 128, 64, 1, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 256, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_256_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_256_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_256_sliding_window_causal_sm86_kernel_nl", 49152, 128, 64, 2, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 16, 256, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_256_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_256_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_256_custom_mask_sm86_kernel_nl", 49152, 128, 64, 3, 2, false, true, false, false, true, false, false},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_qk_tanh_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_qk_tanh_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_qk_tanh_sm86_kernel_nl_tiled", 81920, 128, 64, 0, 2, false, true, false, false, true, true, true},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_qk_tanh_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_qk_tanh_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_causal_qk_tanh_sm86_kernel_nl_tiled", 81920, 128, 64, 1, 2, false, true, false, false, true, true, true},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_qk_tanh_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_qk_tanh_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_sliding_window_causal_qk_tanh_sm86_kernel_nl_tiled", 81920, 128, 64, 2, 2, false, true, false, false, true, true, true},
{ DATA_TYPE_FP16, 0, 64, 128, 128, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_qk_tanh_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_qk_tanh_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_128_custom_mask_qk_tanh_sm86_kernel_nl_tiled", 81920, 128, 64, 3, 2, false, true, false, false, true, true, true},
{ DATA_TYPE_FP16, 0, 64, 128, 256, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_256_qk_tanh_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_256_qk_tanh_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_256_causal_qk_tanh_sm86_kernel_nl_tiled", 81920, 128, 64, 1, 2, false, true, false, false, true, true, true},
{ DATA_TYPE_FP16, 0, 64, 128, 256, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_256_qk_tanh_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_256_qk_tanh_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_256_sliding_window_causal_qk_tanh_sm86_kernel_nl_tiled", 81920, 128, 64, 2, 2, false, true, false, false, true, true, true},
{ DATA_TYPE_FP16, 0, 64, 128, 256, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_256_qk_tanh_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_256_qk_tanh_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_128_S_q_paged_kv_256_custom_mask_qk_tanh_sm86_kernel_nl_tiled", 81920, 128, 64, 3, 2, false, true, false, false, true, true, true},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_128_qk_tanh_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_128_qk_tanh_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_128_qk_tanh_sm86_kernel_nl", 32768, 128, 64, 0, 2, false, true, false, false, true, false, true},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_128_qk_tanh_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_128_qk_tanh_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_128_causal_qk_tanh_sm86_kernel_nl", 32768, 128, 64, 1, 2, false, true, false, false, true, false, true},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_128_qk_tanh_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_128_qk_tanh_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_128_sliding_window_causal_qk_tanh_sm86_kernel_nl", 32768, 128, 64, 2, 2, false, true, false, false, true, false, true},
{ DATA_TYPE_FP16, 0, 64, 32, 128, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_128_qk_tanh_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_128_qk_tanh_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_32_S_q_paged_kv_128_custom_mask_qk_tanh_sm86_kernel_nl", 32768, 128, 64, 3, 2, false, true, false, false, true, false, true},
{ DATA_TYPE_FP16, 0, 64, 16, 256, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_256_qk_tanh_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_256_qk_tanh_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_256_causal_qk_tanh_sm86_kernel_nl", 49152, 128, 64, 1, 2, false, true, false, false, true, false, true},
{ DATA_TYPE_FP16, 0, 64, 16, 256, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_256_qk_tanh_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_256_qk_tanh_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_256_sliding_window_causal_qk_tanh_sm86_kernel_nl", 49152, 128, 64, 2, 2, false, true, false, false, true, false, true},
{ DATA_TYPE_FP16, 0, 64, 16, 256, kSM_86,  cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_256_qk_tanh_sm86_cu_cubin, cubin_fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_256_qk_tanh_sm86_cu_cubin_len, "fmha_v2_flash_attention_fp16_64_16_S_q_paged_kv_256_custom_mask_qk_tanh_sm86_kernel_nl", 49152, 128, 64, 3, 2, false, true, false, false, true, false, true},
{ DATA_TYPE_BF16, 0, 128, 128, 16, kSM_86,  cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_16_sm86_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_16_sm86_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_qkv_16_causal_sm86_kernel_nl_tiled", 16384, 128, 128, 1, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 16, kSM_86,  cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_16_sm86_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_16_sm86_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_qkv_16_sliding_window_causal_sm86_kernel_nl_tiled", 16384, 128, 128, 2, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 16, kSM_86,  cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_16_sm86_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_16_sm86_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_qkv_16_custom_mask_sm86_kernel_nl_tiled", 16384, 128, 128, 3, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 32, kSM_86,  cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_32_sm86_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_32_sm86_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_qkv_32_sm86_kernel_nl_tiled", 32768, 128, 128, 0, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 32, kSM_86,  cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_32_sm86_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_32_sm86_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_qkv_32_causal_sm86_kernel_nl_tiled", 32768, 128, 128, 1, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 32, kSM_86,  cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_32_sm86_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_32_sm86_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_qkv_32_sliding_window_causal_sm86_kernel_nl_tiled", 32768, 128, 128, 2, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 32, kSM_86,  cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_32_sm86_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_32_sm86_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_qkv_32_custom_mask_sm86_kernel_nl_tiled", 32768, 128, 128, 3, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 40, kSM_86,  cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_40_sm86_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_40_sm86_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_qkv_40_causal_sm86_kernel_nl_tiled", 65536, 128, 128, 1, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 40, kSM_86,  cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_40_sm86_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_40_sm86_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_qkv_40_sliding_window_causal_sm86_kernel_nl_tiled", 65536, 128, 128, 2, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 40, kSM_86,  cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_40_sm86_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_40_sm86_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_qkv_40_custom_mask_sm86_kernel_nl_tiled", 65536, 128, 128, 3, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 48, kSM_86,  cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_48_sm86_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_48_sm86_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_qkv_48_causal_sm86_kernel_nl_tiled", 65536, 128, 128, 1, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 48, kSM_86,  cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_48_sm86_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_48_sm86_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_qkv_48_sliding_window_causal_sm86_kernel_nl_tiled", 65536, 128, 128, 2, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 48, kSM_86,  cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_48_sm86_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_48_sm86_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_qkv_48_custom_mask_sm86_kernel_nl_tiled", 65536, 128, 128, 3, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 64, kSM_86,  cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_64_sm86_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_64_sm86_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_qkv_64_sm86_kernel_nl_tiled", 65536, 128, 128, 0, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 64, kSM_86,  cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_64_sm86_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_64_sm86_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_qkv_64_causal_sm86_kernel_nl_tiled", 65536, 128, 128, 1, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 64, kSM_86,  cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_64_sm86_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_64_sm86_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_qkv_64_sliding_window_causal_sm86_kernel_nl_tiled", 65536, 128, 128, 2, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 128, 128, 64, kSM_86,  cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_64_sm86_cu_cubin, cubin_fmha_v2_flash_attention_bf16_128_128_S_qkv_64_sm86_cu_cubin_len, "fmha_v2_flash_attention_bf16_128_128_S_qkv_64_custom_mask_sm86_kernel_nl_tiled", 65536, 128, 128, 3, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 80, kSM_86,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_80_sm86_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_80_sm86_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_80_causal_sm86_kernel_nl_tiled", 81920, 128, 64, 1, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 80, kSM_86,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_80_sm86_cu_cubin, cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_80_sm86_cu_cubin_len, "fmha_v2_flash_attention_bf16_64_128_S_qkv_80_sliding_window_causal_sm86_kernel_nl_tiled", 81920, 128, 64, 2, 0, false, true, false, true, true, true, false},
{ DATA_TYPE_BF16, 0, 64, 128, 80, kSM_86,  cubin_fmha_v2_flash_attention_bf16_64_128_S_qkv_