[llvm] [AMDGPU] (x or y) xor -1 -> x nor y (PR #130264)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 7 01:23:26 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Ana Mihajlovic (mihajlovicana)
<details>
<summary>Changes</summary>
Added pattern so s_nor is selected for ((i1 x or i1 y) xor -1) instead of s_or and s_xor
---
Full diff: https://github.com/llvm/llvm-project/pull/130264.diff
3 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/SOPInstructions.td (+14)
- (added) llvm/test/CodeGen/AMDGPU/isel-nor-32.ll (+68)
- (added) llvm/test/CodeGen/AMDGPU/isel-nor-64.ll (+68)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td
index 5e62ceac281b8..eb994d9e5687a 100644
--- a/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -1919,6 +1919,20 @@ def : ScalarNot2Pat<S_ORN2_B32, or, v2i16>;
def : ScalarNot2Pat<S_ORN2_B64, or, v4i16>;
def : ScalarNot2Pat<S_ORN2_B64, or, v2i32>;
+let WaveSizePredicate = isWave32 in {
+def : GCNPat<
+ (i1 (not (or_oneuse i1:$src0, i1:$src1))),
+ (S_NOR_B32 i1:$src0, i1:$src1)
+>;
+}
+
+let WaveSizePredicate = isWave64 in {
+def : GCNPat<
+ (i1 (not (or_oneuse i1:$src0, i1:$src1))),
+ (S_NOR_B64 i1:$src0, i1:$src1)
+>;
+}
+
//===----------------------------------------------------------------------===//
// Target-specific instruction encodings.
//===----------------------------------------------------------------------===//
diff --git a/llvm/test/CodeGen/AMDGPU/isel-nor-32.ll b/llvm/test/CodeGen/AMDGPU/isel-nor-32.ll
new file mode 100644
index 0000000000000..983a335568cc1
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/isel-nor-32.ll
@@ -0,0 +1,68 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
+; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -mattr="+wavefrontsize32,-wavefrontsize64" -o - < %s | FileCheck %s
+; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -mattr="+wavefrontsize32,-wavefrontsize64" -o - < %s | FileCheck %s
+
+define amdgpu_ps void @divergent_i1_phi_if_else(ptr addrspace(1) %out, i32 %tid, i32 %a, i32 %b, i32 %c, i32 %d) {
+; CHECK-LABEL: divergent_i1_phi_if_else:
+; CHECK: ; %bb.0: ; %entry
+; CHECK-NEXT: v_cmp_le_u32_e64 s0, v3, v4
+; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; CHECK-NEXT: s_mov_b32 s2, s0
+; CHECK-NEXT: s_and_saveexec_b32 s1, s0
+; CHECK-NEXT: ; %bb.1: ; %C
+; CHECK-NEXT: v_cmp_eq_u32_e32 vcc_lo, v3, v5
+; CHECK-NEXT: s_and_not1_b32 s2, s0, exec_lo
+; CHECK-NEXT: s_and_b32 s3, vcc_lo, exec_lo
+; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-NEXT: s_or_b32 s2, s2, s3
+; CHECK-NEXT: ; %bb.2: ; %MergeCF
+; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s1
+; CHECK-NEXT: s_nor_b32 s1, s0, s2
+; CHECK-NEXT: ; implicit-def: $sgpr0
+; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; CHECK-NEXT: s_and_saveexec_b32 s2, s1
+; CHECK-NEXT: s_xor_b32 s1, exec_lo, s2
+; CHECK-NEXT: ; %bb.3: ; %B
+; CHECK-NEXT: v_cmp_gt_u32_e64 s0, 2, v2
+; CHECK-NEXT: ; implicit-def: $vgpr2
+; CHECK-NEXT: ; %bb.4: ; %Flow
+; CHECK-NEXT: s_and_not1_saveexec_b32 s1, s1
+; CHECK-NEXT: ; %bb.5: ; %A
+; CHECK-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2
+; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
+; CHECK-NEXT: s_and_not1_b32 s0, s0, exec_lo
+; CHECK-NEXT: s_and_b32 s2, vcc_lo, exec_lo
+; CHECK-NEXT: s_or_b32 s0, s0, s2
+; CHECK-NEXT: ; %bb.6: ; %exit
+; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s1
+; CHECK-NEXT: v_cndmask_b32_e64 v2, 2, 1, s0
+; CHECK-NEXT: global_store_b32 v[0:1], v2, off
+; CHECK-NEXT: s_endpgm
+entry:
+ %x = icmp ule i32 %a, %b
+ br i1 %x, label %C, label %MergeCF
+
+C:
+ %y = icmp eq i32 %a, %c
+ br label %MergeCF
+
+MergeCF:
+ %z = phi i1 [ %x, %entry ], [ %y, %C ]
+ %w = icmp ule i32 %a, %b
+ %cmp = or i1 %w, %z
+ br i1 %cmp, label %A, label %B
+
+A:
+ %val_A = icmp uge i32 %tid, 1
+ br label %exit
+
+B:
+ %val_B = icmp ult i32 %tid, 2
+ br label %exit
+
+exit:
+ %phi = phi i1 [ %val_A, %A ], [ %val_B, %B ]
+ %sel = select i1 %phi, i32 1, i32 2
+ store i32 %sel, ptr addrspace(1) %out
+ ret void
+}
diff --git a/llvm/test/CodeGen/AMDGPU/isel-nor-64.ll b/llvm/test/CodeGen/AMDGPU/isel-nor-64.ll
new file mode 100644
index 0000000000000..88e0a05556b78
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/isel-nor-64.ll
@@ -0,0 +1,68 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
+; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -mattr="-wavefrontsize32,+wavefrontsize64" -o - < %s | FileCheck %s
+; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -mattr="-wavefrontsize32,+wavefrontsize64" -o - < %s | FileCheck %s
+
+define amdgpu_ps void @divergent_i1_phi_if_else(ptr addrspace(1) %out, i32 %tid, i32 %a, i32 %b, i32 %c, i32 %d) {
+; CHECK-LABEL: divergent_i1_phi_if_else:
+; CHECK: ; %bb.0: ; %entry
+; CHECK-NEXT: v_cmp_le_u32_e64 s[0:1], v3, v4
+; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; CHECK-NEXT: s_mov_b64 s[4:5], s[0:1]
+; CHECK-NEXT: s_and_saveexec_b64 s[2:3], s[0:1]
+; CHECK-NEXT: ; %bb.1: ; %C
+; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5
+; CHECK-NEXT: s_and_not1_b64 s[4:5], s[0:1], exec
+; CHECK-NEXT: s_and_b64 s[6:7], vcc, exec
+; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-NEXT: s_or_b64 s[4:5], s[4:5], s[6:7]
+; CHECK-NEXT: ; %bb.2: ; %MergeCF
+; CHECK-NEXT: s_or_b64 exec, exec, s[2:3]
+; CHECK-NEXT: s_nor_b64 s[2:3], s[0:1], s[4:5]
+; CHECK-NEXT: ; implicit-def: $sgpr0_sgpr1
+; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; CHECK-NEXT: s_and_saveexec_b64 s[4:5], s[2:3]
+; CHECK-NEXT: s_xor_b64 s[2:3], exec, s[4:5]
+; CHECK-NEXT: ; %bb.3: ; %B
+; CHECK-NEXT: v_cmp_gt_u32_e64 s[0:1], 2, v2
+; CHECK-NEXT: ; implicit-def: $vgpr2
+; CHECK-NEXT: ; %bb.4: ; %Flow
+; CHECK-NEXT: s_and_not1_saveexec_b64 s[2:3], s[2:3]
+; CHECK-NEXT: ; %bb.5: ; %A
+; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2
+; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
+; CHECK-NEXT: s_and_not1_b64 s[0:1], s[0:1], exec
+; CHECK-NEXT: s_and_b64 s[4:5], vcc, exec
+; CHECK-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5]
+; CHECK-NEXT: ; %bb.6: ; %exit
+; CHECK-NEXT: s_or_b64 exec, exec, s[2:3]
+; CHECK-NEXT: v_cndmask_b32_e64 v2, 2, 1, s[0:1]
+; CHECK-NEXT: global_store_b32 v[0:1], v2, off
+; CHECK-NEXT: s_endpgm
+entry:
+ %x = icmp ule i32 %a, %b
+ br i1 %x, label %C, label %MergeCF
+
+C:
+ %y = icmp eq i32 %a, %c
+ br label %MergeCF
+
+MergeCF:
+ %z = phi i1 [ %x, %entry ], [ %y, %C ]
+ %w = icmp ule i32 %a, %b
+ %cmp = or i1 %w, %z
+ br i1 %cmp, label %A, label %B
+
+A:
+ %val_A = icmp uge i32 %tid, 1
+ br label %exit
+
+B:
+ %val_B = icmp ult i32 %tid, 2
+ br label %exit
+
+exit:
+ %phi = phi i1 [ %val_A, %A ], [ %val_B, %B ]
+ %sel = select i1 %phi, i32 1, i32 2
+ store i32 %sel, ptr addrspace(1) %out
+ ret void
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/130264
More information about the llvm-commits
mailing list