diff --git a/compiler/plugins/target/ROCM/test/gpu_encoding_attrs.mlir b/compiler/plugins/target/ROCM/test/gpu_encoding_attrs.mlir index de4b171bf39b..8d55d5ff65c4 100644 --- a/compiler/plugins/target/ROCM/test/gpu_encoding_attrs.mlir +++ b/compiler/plugins/target/ROCM/test/gpu_encoding_attrs.mlir @@ -11,7 +11,7 @@ // RUN: --iree-hip-target=gfx90a --iree-hip-encoding-layout-resolver=none %s | FileCheck %s --check-prefix=NONE // PAD: #hal.executable.target<"rocm" -// PAD-SAME: iree.encoding.resolver = #iree_gpu.gpu_pad_layout +// PAD-SAME: iree.encoding.resolver = #iree_gpu.gpu_pad_layout<> // DATA-TILING: #hal.executable.target<"rocm" // DATA-TILING-SAME: iree.encoding.resolver = #iree_gpu.gpu_encoding_layout<> diff --git a/compiler/src/iree/compiler/Codegen/Dialect/GPU/IR/IREEGPUAttrs.td b/compiler/src/iree/compiler/Codegen/Dialect/GPU/IR/IREEGPUAttrs.td index a5b102412af8..cd233c86527c 100644 --- a/compiler/src/iree/compiler/Codegen/Dialect/GPU/IR/IREEGPUAttrs.td +++ b/compiler/src/iree/compiler/Codegen/Dialect/GPU/IR/IREEGPUAttrs.td @@ -322,8 +322,8 @@ def IREEGPU_GPUPadLayoutAttr : AttrDef { let parameters = (ins // Relevant target properties that will later allow us to decide the // serialized pad layout. - "uint32_t":$cache_line_bytes, - "uint32_t":$cache_sets + OptionalParameter<"std::optional">:$cache_line_bytes, + OptionalParameter<"std::optional">:$cache_sets ); } diff --git a/compiler/src/iree/compiler/Codegen/Dialect/GPU/TargetUtils/KnownTargets.cpp b/compiler/src/iree/compiler/Codegen/Dialect/GPU/TargetUtils/KnownTargets.cpp index 677d6665a63c..106c9d2103d6 100644 --- a/compiler/src/iree/compiler/Codegen/Dialect/GPU/TargetUtils/KnownTargets.cpp +++ b/compiler/src/iree/compiler/Codegen/Dialect/GPU/TargetUtils/KnownTargets.cpp @@ -740,6 +740,14 @@ std::optional getAndroidProfileDetails(StringRef target) { // Query functions //===----------------------------------------------------------------------===// +std::optional getL1CacheInfo(TargetAttr target) { + // TODO(kuhar): Add L1 cache query for other HIP targets. + if (!target || !llvm::is_contained({"gfx90a", "gfx942"}, target.getArch())) { + return std::nullopt; + } + return L1CacheInfo{/*cacheLineBytes=*/128, /*cacheSets=*/4}; +} + TargetAttr getMetalTargetDetails(MLIRContext *context) { return createTargetAttr(*getAppleTargetDetails(), /*arch=*/"apple", /*features=*/"spirv:v1.3,cap:Shader", context); @@ -775,12 +783,10 @@ Attribute getHIPTargetEncodingLayoutAttr(TargetAttr target, return IREE::GPU::GPUEncodingLayoutAttr::get(target.getContext(), {}); } - // GPUPadLayoutAttr is only enabled for CDNA2 and CDNA3 for the time being. - // TODO(kuhar): Enable for other HIP targets. - if (resolver == kPadEncodingLayoutResolverName && - llvm::is_contained({"gfx90a", "gfx942"}, target.getArch())) { - return IREE::GPU::GPUPadLayoutAttr::get( - target.getContext(), /*cacheLineBytes=*/128, /*cacheSets=*/4); + if (resolver == kPadEncodingLayoutResolverName) { + return IREE::GPU::GPUPadLayoutAttr::get(target.getContext(), + /*cache_line_bytes=*/std::nullopt, + /*cache_sets=*/std::nullopt); } return nullptr; } diff --git a/compiler/src/iree/compiler/Codegen/Dialect/GPU/TargetUtils/KnownTargets.h b/compiler/src/iree/compiler/Codegen/Dialect/GPU/TargetUtils/KnownTargets.h index a5d3ca9e05a4..ccb553172c8a 100644 --- a/compiler/src/iree/compiler/Codegen/Dialect/GPU/TargetUtils/KnownTargets.h +++ b/compiler/src/iree/compiler/Codegen/Dialect/GPU/TargetUtils/KnownTargets.h @@ -16,6 +16,14 @@ constexpr char kNoEncodingLayoutResolverName[] = "none"; constexpr char kPadEncodingLayoutResolverName[] = "pad"; constexpr char kDataTilingEncodingLayoutResolverName[] = "data-tiling"; +struct L1CacheInfo { + uint32_t cacheLineBytes; + uint32_t cacheSets; +}; + +// Returns the L1 cache information for the `target`. +std::optional getL1CacheInfo(TargetAttr target); + // Returns a TargetAttr to target Metal via SPIR-V CodeGen. TargetAttr getMetalTargetDetails(MLIRContext *context); diff --git a/compiler/src/iree/compiler/Codegen/ExternalInterfaces/BUILD.bazel b/compiler/src/iree/compiler/Codegen/ExternalInterfaces/BUILD.bazel index 326c97a4c66b..04c1c8c84bfa 100644 --- a/compiler/src/iree/compiler/Codegen/ExternalInterfaces/BUILD.bazel +++ b/compiler/src/iree/compiler/Codegen/ExternalInterfaces/BUILD.bazel @@ -31,6 +31,7 @@ iree_compiler_cc_library( "//compiler/src/iree/compiler/Codegen/Dialect/Codegen/IR:IREECodegenDialect", "//compiler/src/iree/compiler/Codegen/Dialect/Codegen/Utils", "//compiler/src/iree/compiler/Codegen/Dialect/GPU/IR:IREEGPUDialect", + "//compiler/src/iree/compiler/Codegen/Dialect/GPU/TargetUtils:KnownTargets", "//compiler/src/iree/compiler/Codegen/Utils", "//compiler/src/iree/compiler/Dialect/Encoding/IR", "@llvm-project//llvm:Support", diff --git a/compiler/src/iree/compiler/Codegen/ExternalInterfaces/CMakeLists.txt b/compiler/src/iree/compiler/Codegen/ExternalInterfaces/CMakeLists.txt index 181b029402e1..5d78dbc0f020 100644 --- a/compiler/src/iree/compiler/Codegen/ExternalInterfaces/CMakeLists.txt +++ b/compiler/src/iree/compiler/Codegen/ExternalInterfaces/CMakeLists.txt @@ -33,6 +33,7 @@ iree_cc_library( iree::compiler::Codegen::Dialect::Codegen::IR::IREECodegenDialect iree::compiler::Codegen::Dialect::Codegen::Utils iree::compiler::Codegen::Dialect::GPU::IR::IREEGPUDialect + iree::compiler::Codegen::Dialect::GPU::TargetUtils::KnownTargets iree::compiler::Codegen::Utils iree::compiler::Dialect::Encoding::IR PUBLIC diff --git a/compiler/src/iree/compiler/Codegen/ExternalInterfaces/GPUEncodingExternalModels.cpp b/compiler/src/iree/compiler/Codegen/ExternalInterfaces/GPUEncodingExternalModels.cpp index 27148ef83266..f778307ba83b 100644 --- a/compiler/src/iree/compiler/Codegen/ExternalInterfaces/GPUEncodingExternalModels.cpp +++ b/compiler/src/iree/compiler/Codegen/ExternalInterfaces/GPUEncodingExternalModels.cpp @@ -28,6 +28,7 @@ #include "iree/compiler/Codegen/Dialect/GPU/IR/IREEGPUAttrs.h" #include "iree/compiler/Codegen/Dialect/GPU/IR/IREEGPUDialect.h" #include "iree/compiler/Codegen/Dialect/GPU/IR/IREEGPUOps.h" +#include "iree/compiler/Codegen/Dialect/GPU/TargetUtils/KnownTargets.h" #include "iree/compiler/Codegen/ExternalInterfaces/Utils.h" #include "iree/compiler/Codegen/Utils/GPUUtils.h" #include "iree/compiler/Dialect/Encoding/IR/EncodingOps.h" @@ -409,10 +410,15 @@ struct GPUPadEncodingLayoutResolverAttrInterface final : Encoding::EncodingLayoutResolverAttrInterface::ExternalModel< GPUPadEncodingLayoutResolverAttrInterface, GPUPadLayoutAttr> { Attribute cloneWithSimplifiedConfig(Attribute attr, - DictionaryAttr /*config*/) const { - // This attribute is self-contained and does not need to look anything up - // from the target `config`. - return attr; + DictionaryAttr config) const { + MLIRContext *ctx = attr.getContext(); + IREE::GPU::TargetAttr gpuTarget = getGPUTargetAttr(config); + std::optional cache = + IREE::GPU::getL1CacheInfo(gpuTarget); + if (!cache) { + return IREE::Encoding::IdentityEncodingAttr::get(ctx); + } + return GPUPadLayoutAttr::get(ctx, cache->cacheLineBytes, cache->cacheSets); } Attribute getLayout(Attribute attr, RankedTensorType type) const { @@ -429,6 +435,10 @@ struct GPUPadEncodingLayoutResolverAttrInterface final return noPaddingAttr; } + if (!padLayoutAttr.getCacheLineBytes() || !padLayoutAttr.getCacheSets()) { + return noPaddingAttr; + } + const int64_t operandIndex = encodingAttr.getOperandIndex().getInt(); if (!llvm::is_contained({0, 1}, operandIndex)) { // We only have to pad matmul operands. @@ -481,7 +491,7 @@ struct GPUPadEncodingLayoutResolverAttrInterface final } const int64_t elementBits = type.getElementTypeBitWidth(); - const int64_t cacheLineBytes = padLayoutAttr.getCacheLineBytes(); + const int64_t cacheLineBytes = *padLayoutAttr.getCacheLineBytes(); if (elementBits % 8 != 0 || elementBits > cacheLineBytes) { // We do not support unaligned element types. return noPaddingAttr; @@ -492,7 +502,7 @@ struct GPUPadEncodingLayoutResolverAttrInterface final // cache line, but not a multiple of cache line * cache sets. This way the // next 'row' will start at a different cache set. const int64_t cacheSetSpanBytes = - padLayoutAttr.getCacheSets() * cacheLineBytes; + *padLayoutAttr.getCacheSets() * cacheLineBytes; const int64_t dimSizeInBytes = type.getDimSize(*padDimensionIndex) * (elementBits / 8); if (dimSizeInBytes < cacheSetSpanBytes) { diff --git a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/specialize_encodings.mlir b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/specialize_encodings.mlir index 40de4d4896f2..d502775ca1d9 100644 --- a/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/specialize_encodings.mlir +++ b/compiler/src/iree/compiler/Dialect/Stream/Transforms/test/specialize_encodings.mlir @@ -89,7 +89,19 @@ util.func public @gpu_with_encoding_layout(%d0: index, %d1: index) -> index { #map2 = affine_map<(m, n, k) -> (m, n)> #map3 = affine_map<(m, n, k) -> (n, k)> #executable_target_rocm_hsaco_fb = #hal.executable.target<"rocm", "rocm-hsaco-fb", {abi = "hip", - iree.encoding.resolver = #iree_gpu.gpu_pad_layout, ukernels = "none"}> + iree.encoding.resolver = #iree_gpu.gpu_pad_layout<>, + iree.gpu.target = #iree_gpu.target], + subgroup_size_choices = [64], + max_workgroup_sizes = [1024, 1024, 1024], + max_thread_count_per_workgroup = 1024, + max_workgroup_memory_bytes = 65536, + max_workgroup_counts = [2147483647, 2147483647, 2147483647]>>}> #device_target_local_0_ = #hal.device.target<"local", {ordinal = 0 : index}, [#executable_target_rocm_hsaco_fb]> : !hal.device #encodingA = #iree_encoding.encoding #encodingB = #iree_encoding.encoding