[llvm] [InstSimplify] Fold xor using implied conditions (PR #75609)
Yingwei Zheng via llvm-commits
llvm-commits at lists.llvm.org
Fri Dec 15 05:33:26 PST 2023
https://github.com/dtcxzyw created https://github.com/llvm/llvm-project/pull/75609
This patch folds `xor X, Y` into `true` if we prove `X` is not equal to `Y`.
This can be improved by handling other cases (e.g., `InvalidTable[0][1] && InvalidTable[1][0] --> false`). But I don't see the benefit of that.
Fixes #70928.
>From f505a2f8f2d8cf6d17713c3a4703a1cd35f32d66 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Fri, 15 Dec 2023 21:21:09 +0800
Subject: [PATCH 1/2] [InstSimplify] Add pre-commit tests from PR70928. NFC.
---
llvm/test/Transforms/InstCombine/xor-icmps.ll | 86 +++++++++++++++++++
1 file changed, 86 insertions(+)
diff --git a/llvm/test/Transforms/InstCombine/xor-icmps.ll b/llvm/test/Transforms/InstCombine/xor-icmps.ll
index c85993ea9a7e0d..2476bb06120950 100644
--- a/llvm/test/Transforms/InstCombine/xor-icmps.ll
+++ b/llvm/test/Transforms/InstCombine/xor-icmps.ll
@@ -171,3 +171,89 @@ define i1 @xor_icmp_ptr(ptr %c, ptr %d) {
ret i1 %xor
}
+; Tests from PR70928
+define i1 @xor_icmp_true1(i32 %x, i32 %y) {
+; CHECK-LABEL: @xor_icmp_true1(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: ret i1 true
+;
+entry:
+ %add = add nsw i32 %y, 1
+ %cmp1 = icmp sgt i32 %x, %y
+ %cmp2 = icmp slt i32 %x, %add
+ %xor = xor i1 %cmp1, %cmp2
+ ret i1 %xor
+}
+
+define i1 @xor_icmp_true2(i32 %x, i32 %y) {
+; CHECK-LABEL: @xor_icmp_true2(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: ret i1 true
+;
+entry:
+ %sub = add nsw i32 %y, -1
+ %cmp1 = icmp slt i32 %x, %y
+ %cmp2 = icmp sgt i32 %x, %sub
+ %xor = xor i1 %cmp1, %cmp2
+ ret i1 %xor
+}
+
+define i1 @xor_icmp_true3(i32 %a) {
+; CHECK-LABEL: @xor_icmp_true3(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[A:%.*]], 5
+; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[A]], 6
+; CHECK-NEXT: [[CMP3:%.*]] = xor i1 [[CMP]], [[CMP1]]
+; CHECK-NEXT: ret i1 [[CMP3]]
+;
+entry:
+ %cmp = icmp sgt i32 %a, 5
+ %cmp1 = icmp slt i32 %a, 6
+ %cmp3 = xor i1 %cmp, %cmp1
+ ret i1 %cmp3
+}
+
+define i1 @xor_icmp_true4(i32 %a) {
+; CHECK-LABEL: @xor_icmp_true4(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[A:%.*]], 5
+; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[A]], 4
+; CHECK-NEXT: [[CMP3:%.*]] = xor i1 [[CMP]], [[CMP1]]
+; CHECK-NEXT: ret i1 [[CMP3]]
+;
+entry:
+ %cmp = icmp slt i32 %a, 5
+ %cmp1 = icmp sgt i32 %a, 4
+ %cmp3 = xor i1 %cmp, %cmp1
+ ret i1 %cmp3
+}
+
+define i1 @xor_icmp_true4_commuted(i32 %a) {
+; CHECK-LABEL: @xor_icmp_true4_commuted(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[A:%.*]], 5
+; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[A]], 4
+; CHECK-NEXT: [[CMP3:%.*]] = xor i1 [[CMP1]], [[CMP]]
+; CHECK-NEXT: ret i1 [[CMP3]]
+;
+entry:
+ %cmp = icmp slt i32 %a, 5
+ %cmp1 = icmp sgt i32 %a, 4
+ %cmp3 = xor i1 %cmp1, %cmp
+ ret i1 %cmp3
+}
+
+define i1 @xor_icmp_failed_to_imply(i32 %a) {
+; CHECK-LABEL: @xor_icmp_failed_to_imply(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[A:%.*]], 7
+; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[A]], 4
+; CHECK-NEXT: [[CMP3:%.*]] = xor i1 [[CMP]], [[CMP1]]
+; CHECK-NEXT: ret i1 [[CMP3]]
+;
+entry:
+ %cmp = icmp slt i32 %a, 7
+ %cmp1 = icmp sgt i32 %a, 4
+ %cmp3 = xor i1 %cmp, %cmp1
+ ret i1 %cmp3
+}
>From 1d0d7fa017a337da4c24bb697fd9a97bcf9001f7 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Fri, 15 Dec 2023 21:27:29 +0800
Subject: [PATCH 2/2] [InstSimplify] Fold xor using implied conditions
---
llvm/lib/Analysis/InstructionSimplify.cpp | 16 +++++++++
llvm/test/CodeGen/AMDGPU/wave32.ll | 35 +++++--------------
llvm/test/Transforms/InstCombine/xor-icmps.ll | 15 ++------
3 files changed, 27 insertions(+), 39 deletions(-)
diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index 2a45acf63aa2ca..26ae9b472ff3d6 100644
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -2563,6 +2563,22 @@ static Value *simplifyXorInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
if (Value *V = simplifyByDomEq(Instruction::Xor, Op0, Op1, Q, MaxRecurse))
return V;
+ if (Op0->getType()->isIntOrIntVectorTy(1)) {
+ bool InvalidTable[2][2] = {};
+ if (std::optional<bool> Implied = isImpliedCondition(Op0, Op1, Q.DL, false))
+ InvalidTable[0][!*Implied] = true;
+ if (std::optional<bool> Implied = isImpliedCondition(Op0, Op1, Q.DL, true))
+ InvalidTable[1][!*Implied] = true;
+ if (std::optional<bool> Implied = isImpliedCondition(Op1, Op0, Q.DL, false))
+ InvalidTable[!*Implied][0] = true;
+ if (std::optional<bool> Implied = isImpliedCondition(Op1, Op0, Q.DL, true))
+ InvalidTable[!*Implied][1] = true;
+
+ if (InvalidTable[0][0] && InvalidTable[1][1])
+ return ConstantInt::getTrue(Op0->getType());
+ // NOTE: There would be no benefit to handle other cases.
+ }
+
return nullptr;
}
diff --git a/llvm/test/CodeGen/AMDGPU/wave32.ll b/llvm/test/CodeGen/AMDGPU/wave32.ll
index c3ac778f82e049..2b723662fb1cc2 100644
--- a/llvm/test/CodeGen/AMDGPU/wave32.ll
+++ b/llvm/test/CodeGen/AMDGPU/wave32.ll
@@ -237,33 +237,14 @@ define amdgpu_kernel void @test_vop3_cmp_f32_sop_and(ptr addrspace(1) %arg) {
}
define amdgpu_kernel void @test_vop3_cmp_i32_sop_xor(ptr addrspace(1) %arg) {
-; GFX1032-LABEL: test_vop3_cmp_i32_sop_xor:
-; GFX1032: ; %bb.0:
-; GFX1032-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
-; GFX1032-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX1032-NEXT: s_waitcnt vmcnt(0)
-; GFX1032-NEXT: v_cmp_lt_i32_e32 vcc_lo, 0, v1
-; GFX1032-NEXT: v_cmp_gt_i32_e64 s0, 1, v1
-; GFX1032-NEXT: s_xor_b32 s0, vcc_lo, s0
-; GFX1032-NEXT: v_cndmask_b32_e64 v1, 2, 1, s0
-; GFX1032-NEXT: global_store_dword v0, v1, s[2:3]
-; GFX1032-NEXT: s_endpgm
-;
-; GFX1064-LABEL: test_vop3_cmp_i32_sop_xor:
-; GFX1064: ; %bb.0:
-; GFX1064-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
-; GFX1064-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX1064-NEXT: s_waitcnt vmcnt(0)
-; GFX1064-NEXT: v_cmp_lt_i32_e32 vcc, 0, v1
-; GFX1064-NEXT: v_cmp_gt_i32_e64 s[0:1], 1, v1
-; GFX1064-NEXT: s_xor_b64 s[0:1], vcc, s[0:1]
-; GFX1064-NEXT: v_cndmask_b32_e64 v1, 2, 1, s[0:1]
-; GFX1064-NEXT: global_store_dword v0, v1, s[2:3]
-; GFX1064-NEXT: s_endpgm
+; GCN-LABEL: test_vop3_cmp_i32_sop_xor:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GCN-NEXT: v_mov_b32_e32 v1, 1
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: global_store_dword v0, v1, s[0:1]
+; GCN-NEXT: s_endpgm
%lid = tail call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr inbounds i32, ptr addrspace(1) %arg, i32 %lid
%load = load i32, ptr addrspace(1) %gep, align 4
diff --git a/llvm/test/Transforms/InstCombine/xor-icmps.ll b/llvm/test/Transforms/InstCombine/xor-icmps.ll
index 2476bb06120950..8ef5465d32e845 100644
--- a/llvm/test/Transforms/InstCombine/xor-icmps.ll
+++ b/llvm/test/Transforms/InstCombine/xor-icmps.ll
@@ -201,10 +201,7 @@ entry:
define i1 @xor_icmp_true3(i32 %a) {
; CHECK-LABEL: @xor_icmp_true3(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[A:%.*]], 5
-; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[A]], 6
-; CHECK-NEXT: [[CMP3:%.*]] = xor i1 [[CMP]], [[CMP1]]
-; CHECK-NEXT: ret i1 [[CMP3]]
+; CHECK-NEXT: ret i1 true
;
entry:
%cmp = icmp sgt i32 %a, 5
@@ -216,10 +213,7 @@ entry:
define i1 @xor_icmp_true4(i32 %a) {
; CHECK-LABEL: @xor_icmp_true4(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[A:%.*]], 5
-; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[A]], 4
-; CHECK-NEXT: [[CMP3:%.*]] = xor i1 [[CMP]], [[CMP1]]
-; CHECK-NEXT: ret i1 [[CMP3]]
+; CHECK-NEXT: ret i1 true
;
entry:
%cmp = icmp slt i32 %a, 5
@@ -231,10 +225,7 @@ entry:
define i1 @xor_icmp_true4_commuted(i32 %a) {
; CHECK-LABEL: @xor_icmp_true4_commuted(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[A:%.*]], 5
-; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[A]], 4
-; CHECK-NEXT: [[CMP3:%.*]] = xor i1 [[CMP1]], [[CMP]]
-; CHECK-NEXT: ret i1 [[CMP3]]
+; CHECK-NEXT: ret i1 true
;
entry:
%cmp = icmp slt i32 %a, 5
More information about the llvm-commits
mailing list