[clang-tools-extra] Improve selection of conditional branch on amdgcn.ballot!=0 condition in SelectionDAG. (PR #68714)

Fri Oct 27 06:07:52 PDT 2023

================
@@ -396,3 +396,115 @@ true:
 false:
   ret i32 33
 }
+
+declare i32 @llvm.amdgcn.icmp.i32(i1, i1, i32)
+
+define amdgpu_cs i32 @branch_divergent_simulated_negated_ballot_ne_zero_and(i32 %v1, i32 %v2) {
+; CHECK-LABEL: branch_divergent_simulated_negated_ballot_ne_zero_and:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 12, v0
+; CHECK-NEXT:    v_cmp_lt_u32_e64 s0, 34, v1
+; CHECK-NEXT:    s_and_b32 vcc_lo, vcc_lo, s0
+; CHECK-NEXT:    s_cbranch_vccnz .LBB20_2
+; CHECK-NEXT:  ; %bb.1: ; %true
+; CHECK-NEXT:    s_mov_b32 s0, 42
+; CHECK-NEXT:    s_branch .LBB20_3
+; CHECK-NEXT:  .LBB20_2: ; %false
+; CHECK-NEXT:    s_mov_b32 s0, 33
+; CHECK-NEXT:    s_branch .LBB20_3
+; CHECK-NEXT:  .LBB20_3:
+  %v1c = icmp ult i32 %v1, 12
+  %v2c = icmp ugt i32 %v2, 34
+  %c = and i1 %v1c, %v2c
+  %ballot = call i32 @llvm.amdgcn.icmp.i32(i1 %c, i1 0, i32 32) ; ICMP_EQ == 32
+  %ballot_ne_zero = icmp ne i32 %ballot, 0
+  br i1 %ballot_ne_zero, label %true, label %false
+true:
+  ret i32 42
+false:
+  ret i32 33
+}
+
+define amdgpu_cs i32 @branch_uniform_simulated_negated_ballot_ne_zero_and(i32 inreg %v1, i32 inreg %v2) {
+; CHECK-LABEL: branch_uniform_simulated_negated_ballot_ne_zero_and:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_cmp_lt_u32 s0, 12
+; CHECK-NEXT:    s_cselect_b32 s0, -1, 0
+; CHECK-NEXT:    s_cmp_gt_u32 s1, 34
+; CHECK-NEXT:    s_cselect_b32 s1, -1, 0
+; CHECK-NEXT:    s_and_b32 s0, s0, s1
----------------
nhaehnle wrote:

This sequence could be improved to:
```
  s_cmp_lt_u32 s0, 12
  s_cselect_b32 s0, -1, 0
  s_cmp_gt_u32 s1, 34
  s_cselect_b32 s0, s0, 0
```
By selecting into vcc(_lo) instead, we could even avoid the AND-with-exec that follows.

Not something for this patch, just an observation. Perhaps you could add a TODO.

(In general, LLVM knows to do boolean logic -> select folds. The fact that they don't fire here is probably a pass ordering issue.)

https://github.com/llvm/llvm-project/pull/68714