[llvm] [InstSimplify] Fold xor using implied conditions (PR #75609)

Yingwei Zheng via llvm-commits llvm-commits at lists.llvm.org
Fri Dec 15 05:33:26 PST 2023


https://github.com/dtcxzyw created https://github.com/llvm/llvm-project/pull/75609

This patch folds `xor X, Y` into `true` if we prove `X` is not equal to `Y`.
This can be improved by handling other cases (e.g., `InvalidTable[0][1] && InvalidTable[1][0] --> false`). But I don't see the benefit of that.

Fixes #70928.


>From f505a2f8f2d8cf6d17713c3a4703a1cd35f32d66 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Fri, 15 Dec 2023 21:21:09 +0800
Subject: [PATCH 1/2] [InstSimplify] Add pre-commit tests from PR70928. NFC.

---
 llvm/test/Transforms/InstCombine/xor-icmps.ll | 86 +++++++++++++++++++
 1 file changed, 86 insertions(+)

diff --git a/llvm/test/Transforms/InstCombine/xor-icmps.ll b/llvm/test/Transforms/InstCombine/xor-icmps.ll
index c85993ea9a7e0d..2476bb06120950 100644
--- a/llvm/test/Transforms/InstCombine/xor-icmps.ll
+++ b/llvm/test/Transforms/InstCombine/xor-icmps.ll
@@ -171,3 +171,89 @@ define i1 @xor_icmp_ptr(ptr %c, ptr %d) {
   ret i1 %xor
 }
 
+; Tests from PR70928
+define i1 @xor_icmp_true1(i32 %x, i32 %y) {
+; CHECK-LABEL: @xor_icmp_true1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret i1 true
+;
+entry:
+  %add = add nsw i32 %y, 1
+  %cmp1 = icmp sgt i32 %x, %y
+  %cmp2 = icmp slt i32 %x, %add
+  %xor = xor i1 %cmp1, %cmp2
+  ret i1 %xor
+}
+
+define i1 @xor_icmp_true2(i32 %x, i32 %y) {
+; CHECK-LABEL: @xor_icmp_true2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret i1 true
+;
+entry:
+  %sub = add nsw i32 %y, -1
+  %cmp1 = icmp slt i32 %x, %y
+  %cmp2 = icmp sgt i32 %x, %sub
+  %xor = xor i1 %cmp1, %cmp2
+  ret i1 %xor
+}
+
+define i1 @xor_icmp_true3(i32 %a) {
+; CHECK-LABEL: @xor_icmp_true3(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[A:%.*]], 5
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp slt i32 [[A]], 6
+; CHECK-NEXT:    [[CMP3:%.*]] = xor i1 [[CMP]], [[CMP1]]
+; CHECK-NEXT:    ret i1 [[CMP3]]
+;
+entry:
+  %cmp = icmp sgt i32 %a, 5
+  %cmp1 = icmp slt i32 %a, 6
+  %cmp3 = xor i1 %cmp, %cmp1
+  ret i1 %cmp3
+}
+
+define i1 @xor_icmp_true4(i32 %a) {
+; CHECK-LABEL: @xor_icmp_true4(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[A:%.*]], 5
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp sgt i32 [[A]], 4
+; CHECK-NEXT:    [[CMP3:%.*]] = xor i1 [[CMP]], [[CMP1]]
+; CHECK-NEXT:    ret i1 [[CMP3]]
+;
+entry:
+  %cmp = icmp slt i32 %a, 5
+  %cmp1 = icmp sgt i32 %a, 4
+  %cmp3 = xor i1 %cmp, %cmp1
+  ret i1 %cmp3
+}
+
+define i1 @xor_icmp_true4_commuted(i32 %a) {
+; CHECK-LABEL: @xor_icmp_true4_commuted(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[A:%.*]], 5
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp sgt i32 [[A]], 4
+; CHECK-NEXT:    [[CMP3:%.*]] = xor i1 [[CMP1]], [[CMP]]
+; CHECK-NEXT:    ret i1 [[CMP3]]
+;
+entry:
+  %cmp = icmp slt i32 %a, 5
+  %cmp1 = icmp sgt i32 %a, 4
+  %cmp3 = xor i1 %cmp1, %cmp
+  ret i1 %cmp3
+}
+
+define i1 @xor_icmp_failed_to_imply(i32 %a) {
+; CHECK-LABEL: @xor_icmp_failed_to_imply(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[A:%.*]], 7
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp sgt i32 [[A]], 4
+; CHECK-NEXT:    [[CMP3:%.*]] = xor i1 [[CMP]], [[CMP1]]
+; CHECK-NEXT:    ret i1 [[CMP3]]
+;
+entry:
+  %cmp = icmp slt i32 %a, 7
+  %cmp1 = icmp sgt i32 %a, 4
+  %cmp3 = xor i1 %cmp, %cmp1
+  ret i1 %cmp3
+}

>From 1d0d7fa017a337da4c24bb697fd9a97bcf9001f7 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Fri, 15 Dec 2023 21:27:29 +0800
Subject: [PATCH 2/2] [InstSimplify] Fold xor using implied conditions

---
 llvm/lib/Analysis/InstructionSimplify.cpp     | 16 +++++++++
 llvm/test/CodeGen/AMDGPU/wave32.ll            | 35 +++++--------------
 llvm/test/Transforms/InstCombine/xor-icmps.ll | 15 ++------
 3 files changed, 27 insertions(+), 39 deletions(-)

diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index 2a45acf63aa2ca..26ae9b472ff3d6 100644
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -2563,6 +2563,22 @@ static Value *simplifyXorInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
   if (Value *V = simplifyByDomEq(Instruction::Xor, Op0, Op1, Q, MaxRecurse))
     return V;
 
+  if (Op0->getType()->isIntOrIntVectorTy(1)) {
+    bool InvalidTable[2][2] = {};
+    if (std::optional<bool> Implied = isImpliedCondition(Op0, Op1, Q.DL, false))
+      InvalidTable[0][!*Implied] = true;
+    if (std::optional<bool> Implied = isImpliedCondition(Op0, Op1, Q.DL, true))
+      InvalidTable[1][!*Implied] = true;
+    if (std::optional<bool> Implied = isImpliedCondition(Op1, Op0, Q.DL, false))
+      InvalidTable[!*Implied][0] = true;
+    if (std::optional<bool> Implied = isImpliedCondition(Op1, Op0, Q.DL, true))
+      InvalidTable[!*Implied][1] = true;
+
+    if (InvalidTable[0][0] && InvalidTable[1][1])
+      return ConstantInt::getTrue(Op0->getType());
+    // NOTE: There would be no benefit to handle other cases.
+  }
+
   return nullptr;
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/wave32.ll b/llvm/test/CodeGen/AMDGPU/wave32.ll
index c3ac778f82e049..2b723662fb1cc2 100644
--- a/llvm/test/CodeGen/AMDGPU/wave32.ll
+++ b/llvm/test/CodeGen/AMDGPU/wave32.ll
@@ -237,33 +237,14 @@ define amdgpu_kernel void @test_vop3_cmp_f32_sop_and(ptr addrspace(1) %arg) {
 }
 
 define amdgpu_kernel void @test_vop3_cmp_i32_sop_xor(ptr addrspace(1) %arg) {
-; GFX1032-LABEL: test_vop3_cmp_i32_sop_xor:
-; GFX1032:       ; %bb.0:
-; GFX1032-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
-; GFX1032-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX1032-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1032-NEXT:    global_load_dword v1, v0, s[2:3]
-; GFX1032-NEXT:    s_waitcnt vmcnt(0)
-; GFX1032-NEXT:    v_cmp_lt_i32_e32 vcc_lo, 0, v1
-; GFX1032-NEXT:    v_cmp_gt_i32_e64 s0, 1, v1
-; GFX1032-NEXT:    s_xor_b32 s0, vcc_lo, s0
-; GFX1032-NEXT:    v_cndmask_b32_e64 v1, 2, 1, s0
-; GFX1032-NEXT:    global_store_dword v0, v1, s[2:3]
-; GFX1032-NEXT:    s_endpgm
-;
-; GFX1064-LABEL: test_vop3_cmp_i32_sop_xor:
-; GFX1064:       ; %bb.0:
-; GFX1064-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
-; GFX1064-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX1064-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1064-NEXT:    global_load_dword v1, v0, s[2:3]
-; GFX1064-NEXT:    s_waitcnt vmcnt(0)
-; GFX1064-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v1
-; GFX1064-NEXT:    v_cmp_gt_i32_e64 s[0:1], 1, v1
-; GFX1064-NEXT:    s_xor_b64 s[0:1], vcc, s[0:1]
-; GFX1064-NEXT:    v_cndmask_b32_e64 v1, 2, 1, s[0:1]
-; GFX1064-NEXT:    global_store_dword v0, v1, s[2:3]
-; GFX1064-NEXT:    s_endpgm
+; GCN-LABEL: test_vop3_cmp_i32_sop_xor:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GCN-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GCN-NEXT:    v_mov_b32_e32 v1, 1
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    global_store_dword v0, v1, s[0:1]
+; GCN-NEXT:    s_endpgm
   %lid = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds i32, ptr addrspace(1) %arg, i32 %lid
   %load = load i32, ptr addrspace(1) %gep, align 4
diff --git a/llvm/test/Transforms/InstCombine/xor-icmps.ll b/llvm/test/Transforms/InstCombine/xor-icmps.ll
index 2476bb06120950..8ef5465d32e845 100644
--- a/llvm/test/Transforms/InstCombine/xor-icmps.ll
+++ b/llvm/test/Transforms/InstCombine/xor-icmps.ll
@@ -201,10 +201,7 @@ entry:
 define i1 @xor_icmp_true3(i32 %a) {
 ; CHECK-LABEL: @xor_icmp_true3(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[A:%.*]], 5
-; CHECK-NEXT:    [[CMP1:%.*]] = icmp slt i32 [[A]], 6
-; CHECK-NEXT:    [[CMP3:%.*]] = xor i1 [[CMP]], [[CMP1]]
-; CHECK-NEXT:    ret i1 [[CMP3]]
+; CHECK-NEXT:    ret i1 true
 ;
 entry:
   %cmp = icmp sgt i32 %a, 5
@@ -216,10 +213,7 @@ entry:
 define i1 @xor_icmp_true4(i32 %a) {
 ; CHECK-LABEL: @xor_icmp_true4(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[A:%.*]], 5
-; CHECK-NEXT:    [[CMP1:%.*]] = icmp sgt i32 [[A]], 4
-; CHECK-NEXT:    [[CMP3:%.*]] = xor i1 [[CMP]], [[CMP1]]
-; CHECK-NEXT:    ret i1 [[CMP3]]
+; CHECK-NEXT:    ret i1 true
 ;
 entry:
   %cmp = icmp slt i32 %a, 5
@@ -231,10 +225,7 @@ entry:
 define i1 @xor_icmp_true4_commuted(i32 %a) {
 ; CHECK-LABEL: @xor_icmp_true4_commuted(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[A:%.*]], 5
-; CHECK-NEXT:    [[CMP1:%.*]] = icmp sgt i32 [[A]], 4
-; CHECK-NEXT:    [[CMP3:%.*]] = xor i1 [[CMP1]], [[CMP]]
-; CHECK-NEXT:    ret i1 [[CMP3]]
+; CHECK-NEXT:    ret i1 true
 ;
 entry:
   %cmp = icmp slt i32 %a, 5



More information about the llvm-commits mailing list