8000 [mlir] added gpu.shuffle mode UP support (#137300) · llvm/llvm-project@60a1f5a · GitHub
[go: up one dir, main page]
More Web Proxy on the site http://driver.im/
Skip to content

Commit 60a1f5a

Browse files
authored
[mlir] added gpu.shuffle mode UP support (#137300)
Added support for `gpu.shuffle` mode `UP` Signed-off-by: xintin <gaurav.verma@amd.com>
1 parent 683c3b8 commit 60a1f5a

File tree

2 files changed

+22
-4
lines changed

2 files changed

+22
-4
lines changed

mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -149,10 +149,13 @@ struct GPUShuffleOpLowering : public ConvertOpToLLVMPattern<gpu::ShuffleOp> {
149149
Value widthOrZeroIfOutside =
150150
rewriter.create<LLVM::AndOp>(loc, int32Type, add, negwidth);
151151
Value dstLane;
152-
// TODO: Add support for gpu::ShuffleMode::UP and gpu::ShuffleMode::DOWN.
153152
// TODO: Use ds_swizzle for XOR when step/offsets are constants for better
154153
// perf.
155154
switch (op.getMode()) {
155+
case gpu::ShuffleMode::UP:
156+
dstLane = rewriter.create<LLVM::SubOp>(loc, int32Type, srcLaneId,
157+
adaptor.getOffset());
158+
break;
156159
case gpu::ShuffleMode::DOWN:
157160
dstLane = rewriter.create<LLVM::AddOp>(loc, int32Type, srcLaneId,
158161
adaptor.getOffset());

mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -660,7 +660,7 @@ gpu.module @test_module {
660660

661661
gpu.module @test_module {
662662
// CHECK-LABEL: func @gpu_shuffle()
663-
func.func @gpu_shuffle() -> (f32, f32, f32) {
663+
func.func @gpu_shuffle() -> (f32, f32, f32, f32) {
664664
// CHECK: %[[#VALUE:]] = llvm.mlir.constant(1.000000e+00 : f32) : f32
665665
%arg0 = arith.constant 1.0 : f32
666666
// CHECK: %[[#OFFSET:]] = llvm.mlir.constant(4 : i32) : i32
@@ -693,7 +693,22 @@ gpu.module @test_module {
693693
// CHECK: %[[#CAST_VALUE:]] = llvm.bitcast %[[#VALUE]] : f32 to i32
694694
// CHECK: %[[#PERMUTE:]] = rocdl.ds_bpermute %[[#ALIGNED_DST_LANE]], %[[#CAST_VALUE]] : (i32, i32) -> i32
695695
// CHECK: %[[#CAST_SHFL_VALUE:]] = llvm.bitcast %[[#PERMUTE]] : i32 to f32
696-
%shfli, %predi = gpu.shuffle idx %arg0, %arg1, %arg2 : f32
696+
%shfli, %predi = gpu.shuffle idx %arg0, %arg1, %arg2 : f32
697+
// *** UP mode shuffle ***
698+
// CHECK: %[[#LANE_ID:]] = rocdl.mbcnt.hi
699+
// CHECK: %[[#ZERO:]] = llvm.mlir.constant(0 : i32) : i32
700+
// CHECK: %[[#NEG_WIDTH:]] = llvm.sub %[[#ZERO]], %[[#WIDTH]] : i32
701+
// CHECK: %[[#ADD:]] = llvm.add %[[#LANE_ID]], %[[#WIDTH]] : i32
702+
// CHECK: %[[#WARP_OR_ZERO:]] = llvm.and %[[#ADD]], %[[#NEG_WIDTH]] : i32
703+
// CHECK: %[[#UP:]] = llvm.sub %[[#LANE_ID]], %{{.*}} : i32
704+
// CHECK: %[[#CMP:]] = llvm.icmp "slt" %[[#UP]], %[[#WARP_OR_ZERO]] : i32
705+
// CHECK: %[[#DST_LANE:]] = llvm.select %[[#CMP]], %[[#UP]], %{{.*}} : i1, i32
706+
// CHECK: %[[#TWO:]] = llvm.mlir.constant(2 : i32) : i32
707+
// CHECK: %[[#ALIGNED_DST_LANE:]] = llvm.shl %[[#DST_LANE]], %[[#TWO]] : i32
708+
// CHECK: %[[#CAST_VALUE:]] = llvm.bitcast %[[#VALUE]] : f32 to i32
709+
// CHECK: %[[#PERMUTE:]] = rocdl.ds_bpermute %[[#ALIGNED_DST_LANE]], %[[#CAST_VALUE]] : (i32, i32) -> i32
710+
// CHECK: %[[#CAST_SHFL_VALUE:]] = llvm.bitcast %[[#PERMUTE]] : i32 to f32
711+
%shflu, %predu = gpu.shuffle up %arg0, %arg1, %arg2 : f32
697712
// CHECK: %[[#LANE_ID:]] = rocdl.mbcnt.hi
698713
// CHECK: %[[#ZERO:]] = llvm.mlir.constant(0 : i32) : i32
699714
// CHECK: %[[#NEG_WIDTH:]] = llvm.sub %[[#ZERO]], %[[#WIDTH]] : i32
@@ -708,7 +723,7 @@ gpu.module @test_module {
708723
// CHECK: %[[#PERMUTE:]] = rocdl.ds_bpermute %[[#ALIGNED_DST_LANE]], %[[#CAST_VALUE]] : (i32, i32) -> i32
709724
// CHECK: %[[#CAST_SHFL_VALUE:]] = llvm.bitcast %[[#PERMUTE]] : i32 to f32
710725
%shfld, %predd = gpu.shuffle down %arg0, %arg1, %arg2 : f32
711-
func.return %shfl, %shfli, %shfld : f32, f32, f32
726+
func.return %shfl, %shfli, %shflu, %shfld : f32, f32, f32, f32
712727
}
713728

714729
// CHECK-LABEL: func @gpu_shuffle_vec

0 commit comments

Comments
 (0)
0