[llvm] [InstCombine] Fix fail to fold (A >> C1) Pred C2 if shr is used multple times #83430 (PR #83563)
via llvm-commits
llvm-commits at lists.llvm.org
Sat Apr 13 00:32:49 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: None (SahilPatidar)
<details>
<summary>Changes</summary>
Resolve #<!-- -->83430
---
Full diff: https://github.com/llvm/llvm-project/pull/83563.diff
7 Files Affected:
- (modified) llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp (+4-4)
- (modified) llvm/test/Transforms/InstCombine/ashr-icmp-minmax-idiom-break.ll (+2-1)
- (modified) llvm/test/Transforms/InstCombine/icmp-shr-lt-gt.ll (+2-2)
- (modified) llvm/test/Transforms/InstCombine/icmp-shr.ll (+1-1)
- (modified) llvm/test/Transforms/PhaseOrdering/ARM/arm_mult_q15.ll (+24-22)
- (modified) llvm/test/Transforms/PhaseOrdering/icmp-ashr-breaking-select-idiom.ll (+7-3)
- (modified) llvm/test/Transforms/PhaseOrdering/loop-access-checks.ll (+1-1)
``````````diff
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 49e597171b1c6f..4eac73e8055589 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -2469,7 +2469,7 @@ Instruction *InstCombinerImpl::foldICmpShrConstant(ICmpInst &Cmp,
// constant-value-based preconditions in the folds below, then we could assert
// those conditions rather than checking them. This is difficult because of
// undef/poison (PR34838).
- if (IsAShr && Shr->hasOneUse()) {
+ if (IsAShr) {
if (IsExact || Pred == CmpInst::ICMP_SLT || Pred == CmpInst::ICMP_ULT) {
// When ShAmtC can be shifted losslessly:
// icmp PRED (ashr exact X, ShAmtC), C --> icmp PRED X, (C << ShAmtC)
@@ -7025,9 +7025,6 @@ Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) {
if (Instruction *Res = canonicalizeICmpPredicate(I))
return Res;
- if (Instruction *Res = foldICmpWithConstant(I))
- return Res;
-
if (Instruction *Res = foldICmpWithDominatingICmp(I))
return Res;
@@ -7057,6 +7054,9 @@ Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) {
return nullptr;
}
+ if (Instruction *Res = foldICmpWithConstant(I))
+ return Res;
+
// Do this after checking for min/max to prevent infinite looping.
if (Instruction *Res = foldICmpWithZero(I))
return Res;
diff --git a/llvm/test/Transforms/InstCombine/ashr-icmp-minmax-idiom-break.ll b/llvm/test/Transforms/InstCombine/ashr-icmp-minmax-idiom-break.ll
index c6d6e916b2c786..0c445b3be3740b 100644
--- a/llvm/test/Transforms/InstCombine/ashr-icmp-minmax-idiom-break.ll
+++ b/llvm/test/Transforms/InstCombine/ashr-icmp-minmax-idiom-break.ll
@@ -10,7 +10,8 @@ define i64 @dont_break_minmax_i64(i64 %conv, i64 %conv2) {
; CHECK-SAME: (i64 [[CONV:%.*]], i64 [[CONV2:%.*]]) {
; CHECK-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV2]]
; CHECK-NEXT: [[SHR:%.*]] = ashr i64 [[MUL]], 4
-; CHECK-NEXT: [[SPEC_SELECT_I:%.*]] = call i64 @llvm.smin.i64(i64 [[SHR]], i64 348731)
+; CHECK-NEXT: [[CMP4_I:%.*]] = icmp slt i64 [[MUL]], 5579712
+; CHECK-NEXT: [[SPEC_SELECT_I:%.*]] = select i1 [[CMP4_I]], i64 [[SHR]], i64 348731
; CHECK-NEXT: ret i64 [[SPEC_SELECT_I]]
;
%mul = mul nsw i64 %conv, %conv2
diff --git a/llvm/test/Transforms/InstCombine/icmp-shr-lt-gt.ll b/llvm/test/Transforms/InstCombine/icmp-shr-lt-gt.ll
index 1b8efe4351c6dc..08a763a50bf958 100644
--- a/llvm/test/Transforms/InstCombine/icmp-shr-lt-gt.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-shr-lt-gt.ll
@@ -900,7 +900,7 @@ define i1 @ashrsgt_01_00(i4 %x) {
define i1 @ashrsgt_01_00_multiuse(i4 %x, ptr %p) {
; CHECK-LABEL: @ashrsgt_01_00_multiuse(
; CHECK-NEXT: [[S:%.*]] = ashr i4 [[X:%.*]], 1
-; CHECK-NEXT: [[C:%.*]] = icmp sgt i4 [[S]], 0
+; CHECK-NEXT: [[C:%.*]] = icmp sgt i4 [[X]], 1
; CHECK-NEXT: store i4 [[S]], ptr [[P:%.*]], align 1
; CHECK-NEXT: ret i1 [[C]]
;
@@ -2442,7 +2442,7 @@ define i1 @ashr_sle_noexact(i8 %x) {
define i1 @ashr_00_00_ashr_extra_use(i8 %x, ptr %ptr) {
; CHECK-LABEL: @ashr_00_00_ashr_extra_use(
; CHECK-NEXT: [[S:%.*]] = ashr exact i8 [[X:%.*]], 3
-; CHECK-NEXT: [[C:%.*]] = icmp ult i8 [[S]], 11
+; CHECK-NEXT: [[C:%.*]] = icmp ult i8 [[X]], 88
; CHECK-NEXT: store i8 [[S]], ptr [[PTR:%.*]], align 1
; CHECK-NEXT: ret i1 [[C]]
;
diff --git a/llvm/test/Transforms/InstCombine/icmp-shr.ll b/llvm/test/Transforms/InstCombine/icmp-shr.ll
index 71b4f5a970c2f6..cdbe9c9e4986fd 100644
--- a/llvm/test/Transforms/InstCombine/icmp-shr.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-shr.ll
@@ -780,7 +780,7 @@ define i1 @ashr_ult_2(i4 %x) {
define i1 @ashr_ult_2_multiuse(i4 %x, ptr %p) {
; CHECK-LABEL: @ashr_ult_2_multiuse(
; CHECK-NEXT: [[S:%.*]] = ashr i4 [[X:%.*]], 1
-; CHECK-NEXT: [[R:%.*]] = icmp ult i4 [[S]], 2
+; CHECK-NEXT: [[R:%.*]] = icmp ult i4 [[X]], 4
; CHECK-NEXT: store i4 [[S]], ptr [[P:%.*]], align 1
; CHECK-NEXT: ret i1 [[R]]
;
diff --git a/llvm/test/Transforms/PhaseOrdering/ARM/arm_mult_q15.ll b/llvm/test/Transforms/PhaseOrdering/ARM/arm_mult_q15.ll
index 73bcee5fb74f0c..055a420ccfc65b 100644
--- a/llvm/test/Transforms/PhaseOrdering/ARM/arm_mult_q15.ll
+++ b/llvm/test/Transforms/PhaseOrdering/ARM/arm_mult_q15.ll
@@ -14,11 +14,11 @@ define void @arm_mult_q15(ptr %pSrcA, ptr %pSrcB, ptr noalias %pDst, i32 %blockS
; CHECK-NEXT: [[CMP_NOT2:%.*]] = icmp eq i32 [[BLOCKSIZE:%.*]], 0
; CHECK-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]]
; CHECK: while.body.preheader:
-; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[BLOCKSIZE]], 8
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[BLOCKSIZE]], 4
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[WHILE_BODY_PREHEADER16:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
-; CHECK-NEXT: [[N_VEC:%.*]] = and i32 [[BLOCKSIZE]], -8
-; CHECK-NEXT: [[IND_END:%.*]] = and i32 [[BLOCKSIZE]], 7
+; CHECK-NEXT: [[N_VEC:%.*]] = and i32 [[BLOCKSIZE]], -4
+; CHECK-NEXT: [[IND_END:%.*]] = and i32 [[BLOCKSIZE]], 3
; CHECK-NEXT: [[TMP0:%.*]] = shl i32 [[N_VEC]], 1
; CHECK-NEXT: [[IND_END7:%.*]] = getelementptr i8, ptr [[PSRCA:%.*]], i32 [[TMP0]]
; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[N_VEC]], 1
@@ -34,18 +34,19 @@ define void @arm_mult_q15(ptr %pSrcA, ptr %pSrcB, ptr noalias %pDst, i32 %blockS
; CHECK-NEXT: [[NEXT_GEP13:%.*]] = getelementptr i8, ptr [[PDST]], i32 [[TMP4]]
; CHECK-NEXT: [[TMP5:%.*]] = shl i32 [[INDEX]], 1
; CHECK-NEXT: [[NEXT_GEP14:%.*]] = getelementptr i8, ptr [[PSRCB]], i32 [[TMP5]]
-; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[NEXT_GEP]], align 2
-; CHECK-NEXT: [[TMP6:%.*]] = sext <8 x i16> [[WIDE_LOAD]] to <8 x i32>
-; CHECK-NEXT: [[WIDE_LOAD15:%.*]] = load <8 x i16>, ptr [[NEXT_GEP14]], align 2
-; CHECK-NEXT: [[TMP7:%.*]] = sext <8 x i16> [[WIDE_LOAD15]] to <8 x i32>
-; CHECK-NEXT: [[TMP8:%.*]] = mul nsw <8 x i32> [[TMP7]], [[TMP6]]
-; CHECK-NEXT: [[TMP9:%.*]] = ashr <8 x i32> [[TMP8]], <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
-; CHECK-NEXT: [[TMP10:%.*]] = tail call <8 x i32> @llvm.smin.v8i32(<8 x i32> [[TMP9]], <8 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>)
-; CHECK-NEXT: [[TMP11:%.*]] = trunc <8 x i32> [[TMP10]] to <8 x i16>
-; CHECK-NEXT: store <8 x i16> [[TMP11]], ptr [[NEXT_GEP13]], align 2
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
-; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[NEXT_GEP]], align 2
+; CHECK-NEXT: [[TMP6:%.*]] = sext <4 x i16> [[WIDE_LOAD]] to <4 x i32>
+; CHECK-NEXT: [[WIDE_LOAD15:%.*]] = load <4 x i16>, ptr [[NEXT_GEP14]], align 2
+; CHECK-NEXT: [[TMP7:%.*]] = sext <4 x i16> [[WIDE_LOAD15]] to <4 x i32>
+; CHECK-NEXT: [[TMP8:%.*]] = mul nsw <4 x i32> [[TMP7]], [[TMP6]]
+; CHECK-NEXT: [[TMP9:%.*]] = lshr <4 x i32> [[TMP8]], <i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT: [[TMP10:%.*]] = icmp slt <4 x i32> [[TMP8]], <i32 1073741824, i32 1073741824, i32 1073741824, i32 1073741824>
+; CHECK-NEXT: [[TMP11:%.*]] = trunc <4 x i32> [[TMP9]] to <4 x i16>
+; CHECK-NEXT: [[TMP12:%.*]] = select <4 x i1> [[TMP10]], <4 x i16> [[TMP11]], <4 x i16> <i16 32767, i16 32767, i16 32767, i16 32767>
+; CHECK-NEXT: store <4 x i16> [[TMP12]], ptr [[NEXT_GEP13]], align 2
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
+; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N_VEC]], [[BLOCKSIZE]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[WHILE_END]], label [[WHILE_BODY_PREHEADER16]]
@@ -61,15 +62,16 @@ define void @arm_mult_q15(ptr %pSrcA, ptr %pSrcB, ptr noalias %pDst, i32 %blockS
; CHECK-NEXT: [[PDST_ADDR_04:%.*]] = phi ptr [ [[INCDEC_PTR4:%.*]], [[WHILE_BODY]] ], [ [[PDST_ADDR_04_PH]], [[WHILE_BODY_PREHEADER16]] ]
; CHECK-NEXT: [[PSRCB_ADDR_03:%.*]] = phi ptr [ [[INCDEC_PTR1:%.*]], [[WHILE_BODY]] ], [ [[PSRCB_ADDR_03_PH]], [[WHILE_BODY_PREHEADER16]] ]
; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, ptr [[PSRCA_ADDR_05]], i32 2
-; CHECK-NEXT: [[TMP13:%.*]] = load i16, ptr [[PSRCA_ADDR_05]], align 2
-; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[TMP13]] to i32
+; CHECK-NEXT: [[TMP14:%.*]] = load i16, ptr [[PSRCA_ADDR_05]], align 2
+; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[TMP14]] to i32
; CHECK-NEXT: [[INCDEC_PTR1]] = getelementptr inbounds i8, ptr [[PSRCB_ADDR_03]], i32 2
-; CHECK-NEXT: [[TMP14:%.*]] = load i16, ptr [[PSRCB_ADDR_03]], align 2
-; CHECK-NEXT: [[CONV2:%.*]] = sext i16 [[TMP14]] to i32
+; CHECK-NEXT: [[TMP15:%.*]] = load i16, ptr [[PSRCB_ADDR_03]], align 2
+; CHECK-NEXT: [[CONV2:%.*]] = sext i16 [[TMP15]] to i32
; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[CONV2]], [[CONV]]
-; CHECK-NEXT: [[SHR:%.*]] = ashr i32 [[MUL]], 15
-; CHECK-NEXT: [[SPEC_SELECT_I:%.*]] = tail call i32 @llvm.smin.i32(i32 [[SHR]], i32 32767)
-; CHECK-NEXT: [[CONV3:%.*]] = trunc i32 [[SPEC_SELECT_I]] to i16
+; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[MUL]], 15
+; CHECK-NEXT: [[CMP4_I:%.*]] = icmp slt i32 [[MUL]], 1073741824
+; CHECK-NEXT: [[TMP16:%.*]] = trunc i32 [[SHR]] to i16
+; CHECK-NEXT: [[CONV3:%.*]] = select i1 [[CMP4_I]], i16 [[TMP16]], i16 32767
; CHECK-NEXT: [[INCDEC_PTR4]] = getelementptr inbounds i8, ptr [[PDST_ADDR_04]], i32 2
; CHECK-NEXT: store i16 [[CONV3]], ptr [[PDST_ADDR_04]], align 2
; CHECK-NEXT: [[DEC]] = add i32 [[BLKCNT_06]], -1
diff --git a/llvm/test/Transforms/PhaseOrdering/icmp-ashr-breaking-select-idiom.ll b/llvm/test/Transforms/PhaseOrdering/icmp-ashr-breaking-select-idiom.ll
index 67d721b23d6f00..8559f973f281ac 100644
--- a/llvm/test/Transforms/PhaseOrdering/icmp-ashr-breaking-select-idiom.ll
+++ b/llvm/test/Transforms/PhaseOrdering/icmp-ashr-breaking-select-idiom.ll
@@ -5,7 +5,8 @@ define i32 @testa(i32 %mul) {
; CHECK-LABEL: define i32 @testa(
; CHECK-SAME: i32 [[MUL:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[SHR:%.*]] = ashr i32 [[MUL]], 15
-; CHECK-NEXT: [[SPEC_SELECT_I:%.*]] = tail call i32 @llvm.smin.i32(i32 [[SHR]], i32 32767)
+; CHECK-NEXT: [[CMP4_I:%.*]] = icmp slt i32 [[MUL]], 1073741824
+; CHECK-NEXT: [[SPEC_SELECT_I:%.*]] = select i1 [[CMP4_I]], i32 [[SHR]], i32 32767
; CHECK-NEXT: ret i32 [[SPEC_SELECT_I]]
;
%shr = ashr i32 %mul, 15
@@ -19,8 +20,11 @@ define i32 @testb(i32 %mul) {
; CHECK-LABEL: define i32 @testb(
; CHECK-SAME: i32 [[MUL:%.*]]) local_unnamed_addr #[[ATTR0]] {
; CHECK-NEXT: [[SHR102:%.*]] = ashr i32 [[MUL]], 7
-; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.smax.i32(i32 [[SHR102]], i32 -128)
-; CHECK-NEXT: [[SPEC_SELECT_I:%.*]] = tail call i32 @llvm.smin.i32(i32 [[TMP1]], i32 127)
+; CHECK-NEXT: [[CMP4_I:%.*]] = icmp sgt i32 [[MUL]], 16383
+; CHECK-NEXT: [[RETVAL_0_I:%.*]] = select i1 [[CMP4_I]], i32 127, i32 -128
+; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[MUL]], 16384
+; CHECK-NEXT: [[CLEANUP_DEST_SLOT_0_I:%.*]] = icmp ult i32 [[TMP1]], 32768
+; CHECK-NEXT: [[SPEC_SELECT_I:%.*]] = select i1 [[CLEANUP_DEST_SLOT_0_I]], i32 [[SHR102]], i32 [[RETVAL_0_I]]
; CHECK-NEXT: ret i32 [[SPEC_SELECT_I]]
;
%shr102 = ashr i32 %mul, 7
diff --git a/llvm/test/Transforms/PhaseOrdering/loop-access-checks.ll b/llvm/test/Transforms/PhaseOrdering/loop-access-checks.ll
index bd509509c321f8..d41883fb788d3b 100644
--- a/llvm/test/Transforms/PhaseOrdering/loop-access-checks.ll
+++ b/llvm/test/Transforms/PhaseOrdering/loop-access-checks.ll
@@ -279,7 +279,7 @@ define void @loop_with_signed_induction(ptr noundef nonnull align 8 dereferencea
; CHECK-NEXT: [[SUB_PTR_RHS_CAST_I_I:%.*]] = ptrtoint ptr [[TMP1]] to i64
; CHECK-NEXT: [[SUB_PTR_SUB_I_I:%.*]] = sub i64 [[SUB_PTR_LHS_CAST_I_I]], [[SUB_PTR_RHS_CAST_I_I]]
; CHECK-NEXT: [[SUB_PTR_DIV_I_I:%.*]] = ashr exact i64 [[SUB_PTR_SUB_I_I]], 3
-; CHECK-NEXT: [[CMP9:%.*]] = icmp sgt i64 [[SUB_PTR_DIV_I_I]], 0
+; CHECK-NEXT: [[CMP9:%.*]] = icmp sgt i64 [[SUB_PTR_SUB_I_I]], 0
; CHECK-NEXT: br i1 [[CMP9]], label [[FOR_BODY:%.*]], label [[FOR_COND_CLEANUP:%.*]]
; CHECK: for.cond.cleanup:
; CHECK-NEXT: ret void
``````````
</details>
https://github.com/llvm/llvm-project/pull/83563
More information about the llvm-commits
mailing list