################################################################################
# Autogenerated by build_tools/bazel_to_cmake/bazel_to_cmake.py from           #
# runtime/src/iree/builtins/ukernel/arch/x86_64/BUILD.bazel                    #
#                                                                              #
# Use iree_cmake_extra_content from iree/build_defs.oss.bzl to add arbitrary   #
# CMake-only content.                                                          #
#                                                                              #
# To disable autogeneration for this file entirely, delete this header.        #
################################################################################

iree_add_all_subdirs()

iree_compiler_targeting_iree_arch(_IREE_UKERNEL_BITCODE_BUILD_X86_64 "x86_64")
if(_IREE_UKERNEL_BITCODE_BUILD_X86_64)

iree_bitcode_library(
  NAME
    ukernel_bitcode_arch_x86_64_entry_points
  ARCH
    x86_64
  INTERNAL_HDRS
    "${PROJECT_BINARY_DIR}/runtime/src/iree/builtins/ukernel/internal_headers_filegroup.stamp"
    "${PROJECT_BINARY_DIR}/runtime/src/iree/schemas/cpu_data_headers_filegroup.stamp"
    "common_x86_64.h"
    "mmt4d_x86_64_internal.h"
    "mmt4d_x86_64_tiles.inl"
    "pack_x86_64_internal.h"
    "unpack_x86_64_internal.h"
  SRCS
    "mmt4d_x86_64_entry_point.c"
    "pack_x86_64_entry_point.c"
    "unpack_x86_64_entry_point.c"
)

iree_bitcode_library(
  NAME
    ukernel_bitcode_arch_x86_64_avx2_fma
  ARCH
    x86_64
  INTERNAL_HDRS
    "${PROJECT_BINARY_DIR}/runtime/src/iree/builtins/ukernel/internal_headers_filegroup.stamp"
    "${PROJECT_BINARY_DIR}/runtime/src/iree/schemas/cpu_data_headers_filegroup.stamp"
    "common_x86_64.h"
    "mmt4d_x86_64_internal.h"
    "mmt4d_x86_64_tiles.inl"
    "pack_x86_64_internal.h"
    "unpack_x86_64_internal.h"
  SRCS
    "mmt4d_x86_64_avx2_fma.c"
    "pack_x86_64_avx2_fma.c"
    "unpack_x86_64_avx2_fma.c"
  COPTS
    "-mavx"
    "-mavx2"
    "-mfma"
    "-mf16c"
)

iree_bitcode_library(
  NAME
    ukernel_bitcode_arch_x86_64_avx512_base
  ARCH
    x86_64
  INTERNAL_HDRS
    "${PROJECT_BINARY_DIR}/runtime/src/iree/builtins/ukernel/internal_headers_filegroup.stamp"
    "${PROJECT_BINARY_DIR}/runtime/src/iree/schemas/cpu_data_headers_filegroup.stamp"
    "common_x86_64.h"
    "mmt4d_x86_64_internal.h"
    "mmt4d_x86_64_tiles.inl"
    "pack_x86_64_internal.h"
    "unpack_x86_64_internal.h"
  SRCS
    "mmt4d_x86_64_avx512_base.c"
    "pack_x86_64_avx512_base.c"
    "unpack_x86_64_avx512_base.c"
  COPTS
    "-mavx"
    "-mavx2"
    "-mfma"
    "-mf16c"
    "-mavx512f"
    "-mavx512vl"
    "-mavx512cd"
    "-mavx512bw"
    "-mavx512dq"
)

iree_bitcode_library(
  NAME
    ukernel_bitcode_arch_x86_64_avx512_vnni
  ARCH
    x86_64
  INTERNAL_HDRS
    "${PROJECT_BINARY_DIR}/runtime/src/iree/builtins/ukernel/internal_headers_filegroup.stamp"
    "${PROJECT_BINARY_DIR}/runtime/src/iree/schemas/cpu_data_headers_filegroup.stamp"
    "common_x86_64.h"
    "mmt4d_x86_64_internal.h"
    "mmt4d_x86_64_tiles.inl"
    "pack_x86_64_internal.h"
    "unpack_x86_64_internal.h"
  SRCS
    "mmt4d_x86_64_avx512_vnni.c"
  COPTS
    "-mavx"
    "-mavx2"
    "-mfma"
    "-mf16c"
    "-mavx512f"
    "-mavx512vl"
    "-mavx512cd"
    "-mavx512bw"
    "-mavx512dq"
    "-mavx512vnni"
)

iree_bitcode_library(
  NAME
    ukernel_bitcode_arch_x86_64_avx512_bf16
  ARCH
    x86_64
  INTERNAL_HDRS
    "${PROJECT_BINARY_DIR}/runtime/src/iree/builtins/ukernel/internal_headers_filegroup.stamp"
    "${PROJECT_BINARY_DIR}/runtime/src/iree/schemas/cpu_data_headers_filegroup.stamp"
    "common_x86_64.h"
    "mmt4d_x86_64_internal.h"
    "mmt4d_x86_64_tiles.inl"
    "pack_x86_64_internal.h"
    "unpack_x86_64_internal.h"
  SRCS
    "mmt4d_x86_64_avx512_bf16.c"
  COPTS
    "-mavx"
    "-mavx2"
    "-mfma"
    "-mf16c"
    "-mavx512f"
    "-mavx512vl"
    "-mavx512cd"
    "-mavx512bw"
    "-mavx512dq"
    "-mavx512bf16"
)

iree_link_bitcode(
  NAME
    ukernel_bitcode_arch_x86_64
  SRCS
    "ukernel_bitcode_arch_x86_64_avx2_fma.bc"
    "ukernel_bitcode_arch_x86_64_avx512_base.bc"
    "ukernel_bitcode_arch_x86_64_avx512_bf16.bc"
    "ukernel_bitcode_arch_x86_64_avx512_vnni.bc"
    "ukernel_bitcode_arch_x86_64_entry_points.bc"

)

elseif(IREE_BUILD_COMPILER AND IREE_TARGET_BACKEND_LLVM_CPU)
iree_make_empty_file("${CMAKE_CURRENT_BINARY_DIR}/ukernel_bitcode_arch_x86_64.bc")
endif()  # _IREE_UKERNEL_BITCODE_BUILD_X86_64

### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ###

if (NOT (IREE_ARCH STREQUAL "x86_64"))
  return()
endif()

# Target CPUs supporting AVX2+FMA3. That includes Intel Haswell (2013) and newer
# and AMD Excavator (2015) and newer. There is no current plan to look after
# SIMD performance on x86 microarchitectures not supporting this.
iree_select_compiler_opts(IREE_UK_COPTS_X86_64_AVX2_FMA
  CLANG_OR_GCC
    "-mavx2"
    "-mfma"
    "-mf16c"
  MSVC_OR_CLANG_CL
    "/arch:AVX2"
)

# TODO: Support AVX-VNNI and/or AVX-VNNI-INT8?
# 1. AVX-VNNI is the 256-bit backport of AVX-512-VNNI. It is supported in Alder
#    Lake (2022).
# 2  AVX-VNNI-INT8 is a future ISA extension not yet mentioned in the Intel SDM,
#    but we can glean the following information from these links: it will be
#    supported in Sierra Forest (2024) it will introduce VPDPBSSD, the missing
#    signed*signed counterpart to VNNI's VPDPBUSD, meaning we will finally be
#    able to use the 8bit path, so that VNNI-IN8 will be a >= 2x win for us over
#    VNNI.
#    https://en.wikipedia.org/wiki/Sierra_Forest
#    https://gcc.gnu.org/pipermail/gcc-patches/2022-October/603546.html
#    https://gcc.gnu.org/git/?p=gcc.git;a=commitdiff;h=406675947d26ccbc2108e9689a2918bb36f61a63


# Target CPUs supporting the typical baseline AVX-512 features (F+VL+CD+BW+DQ).
# That includes Intel Skylake/server (2017) and AMD Zen4 (2022) while recent
# non-server x86 microarchitectures not supporting this continue to exist.
iree_select_compiler_opts(IREE_UK_COPTS_X86_64_AVX512_BASE
  CLANG_OR_GCC
    "-mavx512f"
    "-mavx512vl"
    "-mavx512cd"
    "-mavx512bw"
    "-mavx512dq"
  MSVC_OR_CLANG_CL
    "/arch:AVX512"
)

# Target CPUs supporting AVX-512-VNNI. That includes Intel Cascade
# Lake (2019) and newer, and AMD Zen4 (2022).
iree_select_compiler_opts(IREE_UK_COPTS_X86_64_AVX512_VNNI_RELATIVE
  CLANG_OR_GCC
    "-mavx512vnni"
  CLANG_CL
    "/clang:-mavx512vnni"
)
set(IREE_UK_COPTS_X86_64_AVX512_VNNI
  "${IREE_UK_COPTS_X86_64_AVX512_BASE}"
  "${IREE_UK_COPTS_X86_64_AVX512_VNNI_RELATIVE}"
)

# Target CPUs supporting AVX-512-BF16 feature. That includes Intel Cooper
# Lake (2020) and newer, and AMD Zen4 (2022).
iree_select_compiler_opts(IREE_UK_COPTS_X86_64_AVX512_BF16_RELATIVE
  CLANG_OR_GCC
    "-mavx512bf16"
  CLANG_CL
    "/clang:-mavx512bf16"
)
set(IREE_UK_COPTS_X86_64_AVX512_BF16
  "${IREE_UK_COPTS_X86_64_AVX512_BASE}"
  "${IREE_UK_COPTS_X86_64_AVX512_BF16_RELATIVE}"
)

# CPU features that we will try checking compiler support for, unless
# we set them to OFF below.
set(IREE_UK_TRY_X86_64_AVX2_FMA ON)
set(IREE_UK_TRY_X86_64_AVX512_BASE ON)
set(IREE_UK_TRY_X86_64_AVX512_VNNI ON)
set(IREE_UK_TRY_X86_64_AVX512_BF16 ON)

# On some compilers, we don't even want to try checking compiler support for
# features that we know are not working. Often, a compiler supports a flag but
# there is a bug in its support for that feature. In the case of MSVC, the
# /arch: flag is often too coarse-grained to be meaningful.

# GCC version check
if((CMAKE_C_COMPILER_ID STREQUAL GNU) AND
   (CMAKE_C_COMPILER_VERSION VERSION_LESS 12))
  # Old GCC versions have incompatible x86 intrinsics. Supporting them
  # is not considered worth it. By not defining these tokens, we leave these
  # code paths out. At least GCC 9 is known to be problematic, while GCC 12 is
  # known working, so the current test is based on that.
  set(IREE_UK_TRY_X86_64_AVX2_FMA OFF)
  set(IREE_UK_TRY_X86_64_AVX512_BASE OFF)
  set(IREE_UK_TRY_X86_64_AVX512_VNNI OFF)
  set(IREE_UK_TRY_X86_64_AVX512_BF16 OFF)
endif()  # GCC version check

# MSVC version check for AVX-512-BF16
if(MSVC_VERSION AND ("${MSVC_VERSION}" VERSION_LESS 1937))
  # Missing _mm512_cvtpbh_ps intrinsic at _MSC_VER=1930.
  set(IREE_UK_TRY_X86_64_AVX512_BF16 OFF)
endif()  # MSVC version check for AVX-512-BF16

# clang-cl version check for vnni bf16 bug.
# Version 16-17 crash compiling the file and in clang-cl we can't use the
# inline asm workaround: https://github.com/llvm/llvm-project/issues/68117.
if((CMAKE_C_COMPILER_ID MATCHES "Clang" AND
    CMAKE_C_SIMULATE_ID MATCHES "MSVC") AND
   (CMAKE_C_COMPILER_VERSION VERSION_LESS 18))
  set(IREE_UK_TRY_X86_64_AVX512_BF16 OFF)
endif()

# Now check compiler support for what we've decided to try.
# Some instructions are not available with baseline arch flags and need to be
# tested via compilation. We include the intrinsics used by the kernels whose
# presence indicates availability of all required intrinsics (vs testing each).

if(IREE_UK_TRY_X86_64_AVX2_FMA)
  check_cxx_compiler_flag(
    "${IREE_UK_COPTS_X86_64_AVX2_FMA}"
    IREE_UK_BUILD_X86_64_AVX2_FMA
  )
else()
  set(IREE_UK_BUILD_X86_64_AVX2_FMA OFF)
endif()

if(IREE_UK_TRY_X86_64_AVX512_BASE)
  check_cxx_compiler_flag(
    "${IREE_UK_COPTS_X86_64_AVX512_BASE}"
    IREE_UK_BUILD_X86_64_AVX512_BASE
  )
else()
  set(IREE_UK_BUILD_X86_64_AVX512_BASE OFF)
endif()

if(IREE_UK_TRY_X86_64_AVX512_VNNI)
  string(REPLACE ";" " " CMAKE_REQUIRED_FLAGS "${IREE_UK_COPTS_X86_64_AVX512_VNNI}")
  string(JOIN "\n" IREE_UK_BUILD_X86_64_AVX512_VNNI_TEST
    "#include <immintrin.h>"
    "int main() {"
    "  __m512i a, b, d;"
    "  _mm512_dpwssd_epi32(d, a, b);"
    "  return 0;"
    "}"
  )
  check_c_source_compiles(
    "${IREE_UK_BUILD_X86_64_AVX512_VNNI_TEST}"
    IREE_UK_BUILD_X86_64_AVX512_VNNI
  )
  unset(CMAKE_REQUIRED_FLAGS)
else()
  set(IREE_UK_BUILD_X86_64_AVX512_VNNI OFF)
endif()

if(IREE_UK_TRY_X86_64_AVX512_BF16)
  string(REPLACE ";" " " CMAKE_REQUIRED_FLAGS "${IREE_UK_COPTS_X86_64_AVX512_BF16}")
  string(JOIN "\n" IREE_UK_BUILD_X86_64_AVX512_BF16_TEST
    "#include <immintrin.h>"
    "int main() {"
    "  __m512 a;"
    "  __m256bh b = _mm512_cvtneps_pbh(a);"
    "  return 0;"
    "}"
  )
  check_c_source_compiles(
    "${IREE_UK_BUILD_X86_64_AVX512_BF16_TEST}"
    IREE_UK_BUILD_X86_64_AVX512_BF16
  )
  unset(CMAKE_REQUIRED_FLAGS)
else()
  set(IREE_UK_BUILD_X86_64_AVX512_BF16 OFF)
endif()

# Now generate the configured header. This needs to happen after all
# IREE_UK_BUILD_X86_64_* variables have been set.
configure_file("config_x86_64.h.in" "config_x86_64.h")

iree_cc_library(
  NAME
    common_x86_64
  HDRS
    "common_x86_64.h"
  DEPS
    iree::builtins::ukernel::internal_headers
    iree::schemas::cpu_data
)

set(IREE_UK_X86_64_DEPS "")

if(IREE_UK_BUILD_X86_64_AVX2_FMA)
iree_cc_library(
  NAME
    x86_64_avx2_fma
  SRCS
    "mmt4d_x86_64_avx2_fma.c"
    "pack_x86_64_avx2_fma.c"
    "unpack_x86_64_avx2_fma.c"
  COPTS
    "${IREE_UK_COPTS_X86_64_AVX2_FMA}"
  DEPS
    iree::builtins::ukernel::internal_headers
)
list(APPEND IREE_UK_X86_64_DEPS "::x86_64_avx2_fma")
endif()  # IREE_UK_BUILD_X86_64_AVX2_FMA

if(IREE_UK_BUILD_X86_64_AVX512_BASE)
iree_cc_library(
  NAME
    x86_64_avx512_base
  SRCS
    "mmt4d_x86_64_avx512_base.c"
    "pack_x86_64_avx512_base.c"
    "unpack_x86_64_avx512_base.c"
  COPTS
    "${IREE_UK_COPTS_X86_64_AVX512_BASE}"
  DEPS
    iree::builtins::ukernel::internal_headers
)
list(APPEND IREE_UK_X86_64_DEPS "::x86_64_avx512_base")
endif()  # IREE_UK_BUILD_X86_64_AVX512_BASE

if(IREE_UK_BUILD_X86_64_AVX512_VNNI)
iree_cc_library(
  NAME
    x86_64_avx512_vnni
  SRCS
    "mmt4d_x86_64_avx512_vnni.c"
  COPTS
    "${IREE_UK_COPTS_X86_64_AVX512_VNNI}"
  DEPS
    iree::builtins::ukernel::internal_headers
)
list(APPEND IREE_UK_X86_64_DEPS "::x86_64_avx512_vnni")
endif()  # IREE_UK_BUILD_X86_64_AVX512_VNNI

if(IREE_UK_BUILD_X86_64_AVX512_BF16)
iree_cc_library(
  NAME
    x86_64_avx512_bf16
  SRCS
    "mmt4d_x86_64_avx512_bf16.c"
  COPTS
    "${IREE_UK_COPTS_X86_64_AVX512_BF16}"
  DEPS
    iree::builtins::ukernel::internal_headers
)
list(APPEND IREE_UK_X86_64_DEPS "::x86_64_avx512_bf16")
endif()  # IREE_UK_BUILD_X86_64_AVX512_BF16

iree_cc_library(
  NAME
    x86_64
  SRCS
    "mmt4d_x86_64_entry_point.c"
    "pack_x86_64_entry_point.c"
    "query_tile_sizes_x86_64_entry_point.c"
    "unpack_x86_64_entry_point.c"
  DEPS
    ::common_x86_64
    iree::base::core_headers
    iree::builtins::ukernel::internal_headers
    ${IREE_UK_X86_64_DEPS}
  PUBLIC
)

set(IREE_UK_ARCH_DEPS "iree::builtins::ukernel::arch::x86_64" PARENT_SCOPE)
