[llvm] [AMDGPU] Introduce "amdgpu-uniform-intrinsic-combine" pass to combine uniform AMDGPU lane Intrinsics. (PR #116953)
Pankaj Dwivedi via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 5 00:33:55 PST 2025
================
@@ -0,0 +1,535 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -amdgpu-enable-uniform-intrinsic-combine=0 -O3 -S < %s | FileCheck %s -check-prefix=CURRENT-CHECK
+; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -passes=amdgpu-uniform-intrinsic-combine -S < %s | FileCheck %s -check-prefix=PASS-CHECK
+; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -passes=amdgpu-uniform-intrinsic-combine,early-cse,instcombine,simplifycfg -S < %s | FileCheck %s -check-prefix=DCE-CHECK
+
+define protected amdgpu_kernel void @trivial_waterfall_eq_zero(ptr addrspace(1) %out) {
+; CURRENT-CHECK-LABEL: define protected amdgpu_kernel void @trivial_waterfall_eq_zero(
+; CURRENT-CHECK-SAME: ptr addrspace(1) writeonly captures(none) [[OUT:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; CURRENT-CHECK-NEXT: [[ENTRY:.*:]]
+; CURRENT-CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.amdgcn.ballot.i32(i1 true)
+; CURRENT-CHECK-NEXT: [[IS_DONE_PEEL:%.*]] = icmp eq i32 [[TMP0]], 0
+; CURRENT-CHECK-NEXT: br i1 [[IS_DONE_PEEL]], label %[[EXIT:.*]], label %[[IF_PEEL:.*]]
+; CURRENT-CHECK: [[IF_PEEL]]:
+; CURRENT-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4
+; CURRENT-CHECK-NEXT: br label %[[EXIT]]
+; CURRENT-CHECK: [[EXIT]]:
+; CURRENT-CHECK-NEXT: ret void
+;
+; PASS-CHECK-LABEL: define protected amdgpu_kernel void @trivial_waterfall_eq_zero(
+; PASS-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0:[0-9]+]] {
+; PASS-CHECK-NEXT: [[ENTRY:.*]]:
+; PASS-CHECK-NEXT: br label %[[WHILE:.*]]
+; PASS-CHECK: [[WHILE]]:
+; PASS-CHECK-NEXT: [[DONE:%.*]] = phi i1 [ false, %[[ENTRY]] ], [ true, %[[IF:.*]] ]
+; PASS-CHECK-NEXT: [[NOT_DONE:%.*]] = xor i1 [[DONE]], true
+; PASS-CHECK-NEXT: [[BALLOT:%.*]] = tail call i64 @llvm.amdgcn.ballot.i64(i1 [[NOT_DONE]])
+; PASS-CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[NOT_DONE]], true
+; PASS-CHECK-NEXT: [[IS_DONE:%.*]] = icmp eq i64 [[BALLOT]], 0
+; PASS-CHECK-NEXT: br i1 [[TMP0]], label %[[EXIT:.*]], label %[[IF]]
+; PASS-CHECK: [[IF]]:
+; PASS-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4
+; PASS-CHECK-NEXT: br label %[[WHILE]]
+; PASS-CHECK: [[EXIT]]:
+; PASS-CHECK-NEXT: ret void
+;
+; DCE-CHECK-LABEL: define protected amdgpu_kernel void @trivial_waterfall_eq_zero(
+; DCE-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0:[0-9]+]] {
+; DCE-CHECK-NEXT: [[ENTRY:.*:]]
+; DCE-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4
+; DCE-CHECK-NEXT: ret void
+;
+entry:
+ br label %while
+
+while:
+ %done = phi i1 [ 0, %entry ], [ 1, %if ]
+ %not_done = xor i1 %done, true
+ %ballot = tail call i64 @llvm.amdgcn.ballot.i64(i1 %not_done)
+ %is_done = icmp eq i64 %ballot, 0 ; in this case is_done = !not_done
+ br i1 %is_done, label %exit, label %if
+
+if:
+ store i32 5, ptr addrspace(1) %out
+ br label %while
+
+exit:
+ ret void
+}
+
+define protected amdgpu_kernel void @trivial_waterfall_eq_zero_swap_op(ptr addrspace(1) %out) {
+; CURRENT-CHECK-LABEL: define protected amdgpu_kernel void @trivial_waterfall_eq_zero_swap_op(
+; CURRENT-CHECK-SAME: ptr addrspace(1) writeonly captures(none) [[OUT:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CURRENT-CHECK-NEXT: [[ENTRY:.*:]]
+; CURRENT-CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.amdgcn.ballot.i32(i1 true)
+; CURRENT-CHECK-NEXT: [[IS_DONE_PEEL:%.*]] = icmp eq i32 [[TMP0]], 0
+; CURRENT-CHECK-NEXT: br i1 [[IS_DONE_PEEL]], label %[[EXIT:.*]], label %[[IF_PEEL:.*]]
+; CURRENT-CHECK: [[IF_PEEL]]:
+; CURRENT-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4
+; CURRENT-CHECK-NEXT: br label %[[EXIT]]
+; CURRENT-CHECK: [[EXIT]]:
+; CURRENT-CHECK-NEXT: ret void
+;
+; PASS-CHECK-LABEL: define protected amdgpu_kernel void @trivial_waterfall_eq_zero_swap_op(
+; PASS-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] {
+; PASS-CHECK-NEXT: [[ENTRY:.*]]:
+; PASS-CHECK-NEXT: br label %[[WHILE:.*]]
+; PASS-CHECK: [[WHILE]]:
+; PASS-CHECK-NEXT: [[DONE:%.*]] = phi i1 [ false, %[[ENTRY]] ], [ true, %[[IF:.*]] ]
+; PASS-CHECK-NEXT: [[NOT_DONE:%.*]] = xor i1 [[DONE]], true
+; PASS-CHECK-NEXT: [[BALLOT:%.*]] = tail call i64 @llvm.amdgcn.ballot.i64(i1 [[NOT_DONE]])
+; PASS-CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[NOT_DONE]], true
+; PASS-CHECK-NEXT: [[IS_DONE:%.*]] = icmp eq i64 0, [[BALLOT]]
+; PASS-CHECK-NEXT: br i1 [[TMP0]], label %[[EXIT:.*]], label %[[IF]]
+; PASS-CHECK: [[IF]]:
+; PASS-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4
+; PASS-CHECK-NEXT: br label %[[WHILE]]
+; PASS-CHECK: [[EXIT]]:
+; PASS-CHECK-NEXT: ret void
+;
+; DCE-CHECK-LABEL: define protected amdgpu_kernel void @trivial_waterfall_eq_zero_swap_op(
+; DCE-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] {
+; DCE-CHECK-NEXT: [[ENTRY:.*:]]
+; DCE-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4
+; DCE-CHECK-NEXT: ret void
+;
+entry:
+ br label %while
+
+while:
+ %done = phi i1 [ 0, %entry ], [ 1, %if ]
+ %not_done = xor i1 %done, true
+ %ballot = tail call i64 @llvm.amdgcn.ballot.i64(i1 %not_done)
+ %is_done = icmp eq i64 0, %ballot ; in this case is_done = !not_done
+ br i1 %is_done, label %exit, label %if
+
+if:
+ store i32 5, ptr addrspace(1) %out
+ br label %while
+
+exit:
+ ret void
+}
+
+define protected amdgpu_kernel void @trivial_waterfall_ne_one(ptr addrspace(1) %out) {
+; CURRENT-CHECK-LABEL: define protected amdgpu_kernel void @trivial_waterfall_ne_one(
+; CURRENT-CHECK-SAME: ptr addrspace(1) writeonly captures(none) initializes((0, 4)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] {
+; CURRENT-CHECK-NEXT: [[ENTRY:.*:]]
+; CURRENT-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4
+; CURRENT-CHECK-NEXT: br label %[[WHILE:.*]]
+; CURRENT-CHECK: [[WHILE]]:
+; CURRENT-CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.amdgcn.ballot.i32(i1 true)
+; CURRENT-CHECK-NEXT: [[IS_DONE_NOT:%.*]] = icmp eq i32 [[TMP0]], 1
+; CURRENT-CHECK-NEXT: br i1 [[IS_DONE_NOT]], label %[[EXIT:.*]], label %[[WHILE]], !llvm.loop [[LOOP0:![0-9]+]]
+; CURRENT-CHECK: [[EXIT]]:
+; CURRENT-CHECK-NEXT: ret void
+;
+; PASS-CHECK-LABEL: define protected amdgpu_kernel void @trivial_waterfall_ne_one(
+; PASS-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] {
+; PASS-CHECK-NEXT: [[ENTRY:.*]]:
+; PASS-CHECK-NEXT: br label %[[WHILE:.*]]
+; PASS-CHECK: [[WHILE]]:
+; PASS-CHECK-NEXT: [[DONE:%.*]] = phi i1 [ false, %[[ENTRY]] ], [ true, %[[IF:.*]] ]
+; PASS-CHECK-NEXT: [[BALLOT:%.*]] = tail call i64 @llvm.amdgcn.ballot.i64(i1 [[DONE]])
+; PASS-CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[DONE]], true
+; PASS-CHECK-NEXT: [[IS_DONE:%.*]] = icmp ne i64 [[BALLOT]], 1
+; PASS-CHECK-NEXT: br i1 [[TMP0]], label %[[IF]], label %[[EXIT:.*]]
+; PASS-CHECK: [[IF]]:
+; PASS-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4
+; PASS-CHECK-NEXT: br label %[[WHILE]]
+; PASS-CHECK: [[EXIT]]:
+; PASS-CHECK-NEXT: ret void
+;
+; DCE-CHECK-LABEL: define protected amdgpu_kernel void @trivial_waterfall_ne_one(
+; DCE-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] {
+; DCE-CHECK-NEXT: [[ENTRY:.*:]]
+; DCE-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4
+; DCE-CHECK-NEXT: ret void
+;
+entry:
+ br label %while
+
+while:
+ %done = phi i1 [ 0, %entry ], [ 1, %if ]
+ %ballot = tail call i64 @llvm.amdgcn.ballot.i64(i1 %done)
+ %is_done = icmp ne i64 %ballot, 1 ; in this case is_done = !done
+ br i1 %is_done, label %if, label %exit
+
+if:
+ store i32 5, ptr addrspace(1) %out
+ br label %while
+
+exit:
+ ret void
+}
+
+define protected amdgpu_kernel void @trivial_waterfall_ne_one_swap_op(ptr addrspace(1) %out) {
+; CURRENT-CHECK-LABEL: define protected amdgpu_kernel void @trivial_waterfall_ne_one_swap_op(
+; CURRENT-CHECK-SAME: ptr addrspace(1) writeonly captures(none) initializes((0, 4)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR1]] {
+; CURRENT-CHECK-NEXT: [[ENTRY:.*:]]
+; CURRENT-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4
+; CURRENT-CHECK-NEXT: br label %[[WHILE:.*]]
+; CURRENT-CHECK: [[WHILE]]:
+; CURRENT-CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.amdgcn.ballot.i32(i1 true)
+; CURRENT-CHECK-NEXT: [[IS_DONE_NOT:%.*]] = icmp eq i32 [[TMP0]], 1
+; CURRENT-CHECK-NEXT: br i1 [[IS_DONE_NOT]], label %[[EXIT:.*]], label %[[WHILE]], !llvm.loop [[LOOP2:![0-9]+]]
+; CURRENT-CHECK: [[EXIT]]:
+; CURRENT-CHECK-NEXT: ret void
+;
+; PASS-CHECK-LABEL: define protected amdgpu_kernel void @trivial_waterfall_ne_one_swap_op(
+; PASS-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] {
+; PASS-CHECK-NEXT: [[ENTRY:.*]]:
+; PASS-CHECK-NEXT: br label %[[WHILE:.*]]
+; PASS-CHECK: [[WHILE]]:
+; PASS-CHECK-NEXT: [[DONE:%.*]] = phi i1 [ false, %[[ENTRY]] ], [ true, %[[IF:.*]] ]
+; PASS-CHECK-NEXT: [[BALLOT:%.*]] = tail call i64 @llvm.amdgcn.ballot.i64(i1 [[DONE]])
+; PASS-CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[DONE]], true
+; PASS-CHECK-NEXT: [[IS_DONE:%.*]] = icmp ne i64 1, [[BALLOT]]
+; PASS-CHECK-NEXT: br i1 [[TMP0]], label %[[IF]], label %[[EXIT:.*]]
+; PASS-CHECK: [[IF]]:
+; PASS-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4
+; PASS-CHECK-NEXT: br label %[[WHILE]]
+; PASS-CHECK: [[EXIT]]:
+; PASS-CHECK-NEXT: ret void
+;
+; DCE-CHECK-LABEL: define protected amdgpu_kernel void @trivial_waterfall_ne_one_swap_op(
+; DCE-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] {
+; DCE-CHECK-NEXT: [[ENTRY:.*:]]
+; DCE-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4
+; DCE-CHECK-NEXT: ret void
+;
+entry:
+ br label %while
+
+while:
+ %done = phi i1 [ 0, %entry ], [ 1, %if ]
+ %ballot = tail call i64 @llvm.amdgcn.ballot.i64(i1 %done)
+ %is_done = icmp ne i64 1, %ballot ; in this case is_done = !done
+ br i1 %is_done, label %if, label %exit
+
+if:
+ store i32 5, ptr addrspace(1) %out
+ br label %while
+
+exit:
+ ret void
+}
+
+define protected amdgpu_kernel void @trivial_waterfall_eq_one(ptr addrspace(1) %out) {
+; CURRENT-CHECK-LABEL: define protected amdgpu_kernel void @trivial_waterfall_eq_one(
+; CURRENT-CHECK-SAME: ptr addrspace(1) writeonly captures(none) initializes((0, 4)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR1]] {
+; CURRENT-CHECK-NEXT: [[ENTRY:.*:]]
+; CURRENT-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4
+; CURRENT-CHECK-NEXT: br label %[[WHILE:.*]]
+; CURRENT-CHECK: [[WHILE]]:
+; CURRENT-CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.amdgcn.ballot.i32(i1 true)
+; CURRENT-CHECK-NEXT: [[IS_DONE:%.*]] = icmp eq i32 [[TMP0]], 1
+; CURRENT-CHECK-NEXT: br i1 [[IS_DONE]], label %[[EXIT:.*]], label %[[WHILE]], !llvm.loop [[LOOP3:![0-9]+]]
+; CURRENT-CHECK: [[EXIT]]:
+; CURRENT-CHECK-NEXT: ret void
+;
+; PASS-CHECK-LABEL: define protected amdgpu_kernel void @trivial_waterfall_eq_one(
+; PASS-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] {
+; PASS-CHECK-NEXT: [[ENTRY:.*]]:
+; PASS-CHECK-NEXT: br label %[[WHILE:.*]]
+; PASS-CHECK: [[WHILE]]:
+; PASS-CHECK-NEXT: [[DONE:%.*]] = phi i1 [ false, %[[ENTRY]] ], [ true, %[[IF:.*]] ]
+; PASS-CHECK-NEXT: [[BALLOT:%.*]] = tail call i64 @llvm.amdgcn.ballot.i64(i1 [[DONE]])
+; PASS-CHECK-NEXT: [[IS_DONE:%.*]] = icmp eq i64 [[BALLOT]], 1
+; PASS-CHECK-NEXT: br i1 [[DONE]], label %[[EXIT:.*]], label %[[IF]]
+; PASS-CHECK: [[IF]]:
+; PASS-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4
+; PASS-CHECK-NEXT: br label %[[WHILE]]
+; PASS-CHECK: [[EXIT]]:
+; PASS-CHECK-NEXT: ret void
+;
+; DCE-CHECK-LABEL: define protected amdgpu_kernel void @trivial_waterfall_eq_one(
+; DCE-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] {
+; DCE-CHECK-NEXT: [[ENTRY:.*:]]
+; DCE-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4
+; DCE-CHECK-NEXT: ret void
+;
+entry:
+ br label %while
+
+while:
+ %done = phi i1 [ 0, %entry ], [ 1, %if ]
+ %ballot = tail call i64 @llvm.amdgcn.ballot.i64(i1 %done)
+ %is_done = icmp eq i64 %ballot, 1 ; in this case is_done = done
+ br i1 %is_done, label %exit, label %if
+
+if:
+ store i32 5, ptr addrspace(1) %out
+ br label %while
+
+exit:
+ ret void
+}
+
+define protected amdgpu_kernel void @trivial_waterfall_eq_one_swap_op(ptr addrspace(1) %out) {
+; CURRENT-CHECK-LABEL: define protected amdgpu_kernel void @trivial_waterfall_eq_one_swap_op(
+; CURRENT-CHECK-SAME: ptr addrspace(1) writeonly captures(none) initializes((0, 4)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR1]] {
+; CURRENT-CHECK-NEXT: [[ENTRY:.*:]]
+; CURRENT-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4
+; CURRENT-CHECK-NEXT: br label %[[WHILE:.*]]
+; CURRENT-CHECK: [[WHILE]]:
+; CURRENT-CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.amdgcn.ballot.i32(i1 true)
+; CURRENT-CHECK-NEXT: [[IS_DONE:%.*]] = icmp eq i32 [[TMP0]], 1
+; CURRENT-CHECK-NEXT: br i1 [[IS_DONE]], label %[[EXIT:.*]], label %[[WHILE]], !llvm.loop [[LOOP4:![0-9]+]]
+; CURRENT-CHECK: [[EXIT]]:
+; CURRENT-CHECK-NEXT: ret void
+;
+; PASS-CHECK-LABEL: define protected amdgpu_kernel void @trivial_waterfall_eq_one_swap_op(
+; PASS-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] {
+; PASS-CHECK-NEXT: [[ENTRY:.*]]:
+; PASS-CHECK-NEXT: br label %[[WHILE:.*]]
+; PASS-CHECK: [[WHILE]]:
+; PASS-CHECK-NEXT: [[DONE:%.*]] = phi i1 [ false, %[[ENTRY]] ], [ true, %[[IF:.*]] ]
+; PASS-CHECK-NEXT: [[BALLOT:%.*]] = tail call i64 @llvm.amdgcn.ballot.i64(i1 [[DONE]])
+; PASS-CHECK-NEXT: [[IS_DONE:%.*]] = icmp eq i64 1, [[BALLOT]]
+; PASS-CHECK-NEXT: br i1 [[DONE]], label %[[EXIT:.*]], label %[[IF]]
+; PASS-CHECK: [[IF]]:
+; PASS-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4
+; PASS-CHECK-NEXT: br label %[[WHILE]]
+; PASS-CHECK: [[EXIT]]:
+; PASS-CHECK-NEXT: ret void
+;
+; DCE-CHECK-LABEL: define protected amdgpu_kernel void @trivial_waterfall_eq_one_swap_op(
+; DCE-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] {
+; DCE-CHECK-NEXT: [[ENTRY:.*:]]
+; DCE-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4
+; DCE-CHECK-NEXT: ret void
+;
+entry:
+ br label %while
+
+while:
+ %done = phi i1 [ 0, %entry ], [ 1, %if ]
+ %ballot = tail call i64 @llvm.amdgcn.ballot.i64(i1 %done)
+ %is_done = icmp eq i64 1, %ballot ; in this case is_done = done
+ br i1 %is_done, label %exit, label %if
+
+if:
+ store i32 5, ptr addrspace(1) %out
+ br label %while
+
+exit:
+ ret void
+}
+
+define protected amdgpu_kernel void @trivial_waterfall_ne_zero(ptr addrspace(1) %out) {
+; CURRENT-CHECK-LABEL: define protected amdgpu_kernel void @trivial_waterfall_ne_zero(
+; CURRENT-CHECK-SAME: ptr addrspace(1) writeonly captures(none) initializes((0, 4)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR1]] {
+; CURRENT-CHECK-NEXT: [[ENTRY:.*:]]
+; CURRENT-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4
+; CURRENT-CHECK-NEXT: br label %[[WHILE:.*]]
+; CURRENT-CHECK: [[WHILE]]:
+; CURRENT-CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.amdgcn.ballot.i32(i1 true)
+; CURRENT-CHECK-NEXT: [[IS_DONE_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
+; CURRENT-CHECK-NEXT: br i1 [[IS_DONE_NOT]], label %[[WHILE]], label %[[EXIT:.*]], !llvm.loop [[LOOP5:![0-9]+]]
+; CURRENT-CHECK: [[EXIT]]:
+; CURRENT-CHECK-NEXT: ret void
+;
+; PASS-CHECK-LABEL: define protected amdgpu_kernel void @trivial_waterfall_ne_zero(
+; PASS-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] {
+; PASS-CHECK-NEXT: [[ENTRY:.*]]:
+; PASS-CHECK-NEXT: br label %[[WHILE:.*]]
+; PASS-CHECK: [[WHILE]]:
+; PASS-CHECK-NEXT: [[DONE:%.*]] = phi i1 [ false, %[[ENTRY]] ], [ true, %[[IF:.*]] ]
+; PASS-CHECK-NEXT: [[BALLOT:%.*]] = tail call i64 @llvm.amdgcn.ballot.i64(i1 [[DONE]])
+; PASS-CHECK-NEXT: [[IS_DONE:%.*]] = icmp ne i64 0, [[BALLOT]]
+; PASS-CHECK-NEXT: br i1 [[DONE]], label %[[EXIT:.*]], label %[[IF]]
+; PASS-CHECK: [[IF]]:
+; PASS-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4
+; PASS-CHECK-NEXT: br label %[[WHILE]]
+; PASS-CHECK: [[EXIT]]:
+; PASS-CHECK-NEXT: ret void
+;
+; DCE-CHECK-LABEL: define protected amdgpu_kernel void @trivial_waterfall_ne_zero(
+; DCE-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] {
+; DCE-CHECK-NEXT: [[ENTRY:.*:]]
+; DCE-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4
+; DCE-CHECK-NEXT: ret void
+;
+entry:
+ br label %while
+
+while:
+ %done = phi i1 [ 0, %entry ], [ 1, %if ]
+ %ballot = tail call i64 @llvm.amdgcn.ballot.i64(i1 %done)
+ %is_done = icmp ne i64 0, %ballot ; in this case is_done = done
+ br i1 %is_done, label %exit, label %if
+
+if:
+ store i32 5, ptr addrspace(1) %out
+ br label %while
+
+exit:
+ ret void
+}
+
+define protected amdgpu_kernel void @trivial_waterfall_ne_zero_swap(ptr addrspace(1) %out) {
+; CURRENT-CHECK-LABEL: define protected amdgpu_kernel void @trivial_waterfall_ne_zero_swap(
+; CURRENT-CHECK-SAME: ptr addrspace(1) writeonly captures(none) initializes((0, 4)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR1]] {
+; CURRENT-CHECK-NEXT: [[ENTRY:.*:]]
+; CURRENT-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4
+; CURRENT-CHECK-NEXT: br label %[[WHILE:.*]]
+; CURRENT-CHECK: [[WHILE]]:
+; CURRENT-CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.amdgcn.ballot.i32(i1 true)
+; CURRENT-CHECK-NEXT: [[IS_DONE_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
+; CURRENT-CHECK-NEXT: br i1 [[IS_DONE_NOT]], label %[[WHILE]], label %[[EXIT:.*]], !llvm.loop [[LOOP6:![0-9]+]]
+; CURRENT-CHECK: [[EXIT]]:
+; CURRENT-CHECK-NEXT: ret void
+;
+; PASS-CHECK-LABEL: define protected amdgpu_kernel void @trivial_waterfall_ne_zero_swap(
+; PASS-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] {
+; PASS-CHECK-NEXT: [[ENTRY:.*]]:
+; PASS-CHECK-NEXT: br label %[[WHILE:.*]]
+; PASS-CHECK: [[WHILE]]:
+; PASS-CHECK-NEXT: [[DONE:%.*]] = phi i1 [ false, %[[ENTRY]] ], [ true, %[[IF:.*]] ]
+; PASS-CHECK-NEXT: [[BALLOT:%.*]] = tail call i64 @llvm.amdgcn.ballot.i64(i1 [[DONE]])
+; PASS-CHECK-NEXT: [[IS_DONE:%.*]] = icmp ne i64 [[BALLOT]], 0
+; PASS-CHECK-NEXT: br i1 [[DONE]], label %[[EXIT:.*]], label %[[IF]]
+; PASS-CHECK: [[IF]]:
+; PASS-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4
+; PASS-CHECK-NEXT: br label %[[WHILE]]
+; PASS-CHECK: [[EXIT]]:
+; PASS-CHECK-NEXT: ret void
+;
+; DCE-CHECK-LABEL: define protected amdgpu_kernel void @trivial_waterfall_ne_zero_swap(
+; DCE-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] {
+; DCE-CHECK-NEXT: [[ENTRY:.*:]]
+; DCE-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4
+; DCE-CHECK-NEXT: ret void
+;
+entry:
+ br label %while
+
+while:
+ %done = phi i1 [ 0, %entry ], [ 1, %if ]
+ %ballot = tail call i64 @llvm.amdgcn.ballot.i64(i1 %done)
+ %is_done = icmp ne i64 %ballot, 0 ; in this case is_done = done
+ br i1 %is_done, label %exit, label %if
+
+if:
+ store i32 5, ptr addrspace(1) %out
+ br label %while
+
+exit:
+ ret void
+}
+
+define protected amdgpu_kernel void @waterfall(ptr addrspace(1) %out) {
+; CURRENT-CHECK-LABEL: define protected amdgpu_kernel void @waterfall(
+; CURRENT-CHECK-SAME: ptr addrspace(1) writeonly captures(none) [[OUT:%.*]]) local_unnamed_addr #[[ATTR1]] {
+; CURRENT-CHECK-NEXT: [[ENTRY:.*]]:
+; CURRENT-CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
+; CURRENT-CHECK-NEXT: br label %[[WHILE:.*]]
+; CURRENT-CHECK: [[WHILE]]:
+; CURRENT-CHECK-NEXT: [[DONE:%.*]] = phi i1 [ false, %[[ENTRY]] ], [ [[IS_FIRST_ACTIVE_ID:%.*]], %[[WHILE_BACKEDGE:.*]] ]
+; CURRENT-CHECK-NEXT: [[NOT_DONE:%.*]] = xor i1 [[DONE]], true
+; CURRENT-CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.amdgcn.ballot.i32(i1 [[NOT_DONE]])
+; CURRENT-CHECK-NEXT: [[IS_DONE:%.*]] = icmp eq i32 [[TMP1]], 0
+; CURRENT-CHECK-NEXT: br i1 [[IS_DONE]], label %[[EXIT:.*]], label %[[IF:.*]]
+; CURRENT-CHECK: [[IF]]:
+; CURRENT-CHECK-NEXT: [[FIRST_ACTIVE_ID:%.*]] = tail call noundef i32 @llvm.amdgcn.readfirstlane.i32(i32 [[TMP0]])
+; CURRENT-CHECK-NEXT: [[IS_FIRST_ACTIVE_ID]] = icmp eq i32 [[TMP0]], [[FIRST_ACTIVE_ID]]
+; CURRENT-CHECK-NEXT: br i1 [[IS_FIRST_ACTIVE_ID]], label %[[WORK:.*]], label %[[WHILE_BACKEDGE]]
+; CURRENT-CHECK: [[WORK]]:
+; CURRENT-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4
+; CURRENT-CHECK-NEXT: br label %[[WHILE_BACKEDGE]]
+; CURRENT-CHECK: [[WHILE_BACKEDGE]]:
+; CURRENT-CHECK-NEXT: br label %[[WHILE]]
+; CURRENT-CHECK: [[EXIT]]:
+; CURRENT-CHECK-NEXT: ret void
+;
+; PASS-CHECK-LABEL: define protected amdgpu_kernel void @waterfall(
+; PASS-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] {
+; PASS-CHECK-NEXT: [[ENTRY:.*]]:
+; PASS-CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
+; PASS-CHECK-NEXT: [[TID:%.*]] = tail call noundef i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 [[TMP0]])
+; PASS-CHECK-NEXT: br label %[[WHILE:.*]]
+; PASS-CHECK: [[WHILE]]:
+; PASS-CHECK-NEXT: [[DONE:%.*]] = phi i1 [ false, %[[ENTRY]] ], [ [[NEW_DONE:%.*]], %[[TAIL:.*]] ]
+; PASS-CHECK-NEXT: [[NOT_DONE:%.*]] = xor i1 [[DONE]], true
+; PASS-CHECK-NEXT: [[BALLOT:%.*]] = tail call i64 @llvm.amdgcn.ballot.i64(i1 [[NOT_DONE]])
+; PASS-CHECK-NEXT: [[IS_DONE:%.*]] = icmp eq i64 [[BALLOT]], 0
+; PASS-CHECK-NEXT: br i1 [[IS_DONE]], label %[[EXIT:.*]], label %[[IF:.*]]
+; PASS-CHECK: [[IF]]:
+; PASS-CHECK-NEXT: [[FIRST_ACTIVE_ID:%.*]] = tail call noundef i32 @llvm.amdgcn.readfirstlane.i32(i32 [[TID]])
+; PASS-CHECK-NEXT: [[IS_FIRST_ACTIVE_ID:%.*]] = icmp eq i32 [[TID]], [[FIRST_ACTIVE_ID]]
+; PASS-CHECK-NEXT: br i1 [[IS_FIRST_ACTIVE_ID]], label %[[WORK:.*]], label %[[TAIL]]
+; PASS-CHECK: [[WORK]]:
+; PASS-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4
+; PASS-CHECK-NEXT: br label %[[TAIL]]
+; PASS-CHECK: [[TAIL]]:
+; PASS-CHECK-NEXT: [[NEW_DONE]] = phi i1 [ true, %[[WORK]] ], [ false, %[[IF]] ]
+; PASS-CHECK-NEXT: br label %[[WHILE]]
+; PASS-CHECK: [[EXIT]]:
+; PASS-CHECK-NEXT: ret void
+;
+; DCE-CHECK-LABEL: define protected amdgpu_kernel void @waterfall(
+; DCE-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] {
+; DCE-CHECK-NEXT: [[ENTRY:.*]]:
+; DCE-CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
+; DCE-CHECK-NEXT: br label %[[WHILE:.*]]
+; DCE-CHECK: [[WHILE]]:
+; DCE-CHECK-NEXT: [[DONE:%.*]] = phi i1 [ false, %[[ENTRY]] ], [ [[IS_FIRST_ACTIVE_ID:%.*]], %[[TAIL:.*]] ]
+; DCE-CHECK-NEXT: [[NOT_DONE:%.*]] = xor i1 [[DONE]], true
+; DCE-CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[NOT_DONE]])
+; DCE-CHECK-NEXT: [[IS_DONE:%.*]] = icmp eq i32 [[TMP1]], 0
+; DCE-CHECK-NEXT: br i1 [[IS_DONE]], label %[[EXIT:.*]], label %[[IF:.*]]
+; DCE-CHECK: [[IF]]:
+; DCE-CHECK-NEXT: [[FIRST_ACTIVE_ID:%.*]] = tail call noundef i32 @llvm.amdgcn.readfirstlane.i32(i32 [[TMP0]])
+; DCE-CHECK-NEXT: [[IS_FIRST_ACTIVE_ID]] = icmp eq i32 [[TMP0]], [[FIRST_ACTIVE_ID]]
+; DCE-CHECK-NEXT: br i1 [[IS_FIRST_ACTIVE_ID]], label %[[WORK:.*]], label %[[TAIL]]
+; DCE-CHECK: [[WORK]]:
+; DCE-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4
+; DCE-CHECK-NEXT: br label %[[TAIL]]
+; DCE-CHECK: [[TAIL]]:
+; DCE-CHECK-NEXT: br label %[[WHILE]]
+; DCE-CHECK: [[EXIT]]:
+; DCE-CHECK-NEXT: ret void
+;
+entry:
+ %1 = tail call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
+ %tid = tail call noundef i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %1)
+ br label %while
+
+while:
+ %done = phi i1 [ false, %entry ], [ %new_done, %tail ]
+ %not_done = xor i1 %done, true
+ %ballot = tail call i64 @llvm.amdgcn.ballot.i64(i1 %not_done)
+ %is_done = icmp eq i64 %ballot, 0
+ br i1 %is_done, label %exit, label %if
+
+if:
+ %first_active_id = tail call noundef i32 @llvm.amdgcn.readfirstlane.i32(i32 %tid)
+ %is_first_active_id = icmp eq i32 %tid, %first_active_id
+ br i1 %is_first_active_id, label %work, label %tail
+
+work:
+ store i32 5, ptr addrspace(1) %out
+ br label %tail
+
+tail:
+ %new_done = phi i1 [ true, %work ], [ false, %if ]
+ br label %while
+
+exit:
+ ret void
+}
+
----------------
PankajDwivedi-25 wrote:
Do you have any equivalent hip test? what is mymask here?
https://github.com/llvm/llvm-project/pull/116953
More information about the llvm-commits
mailing list