[llvm] [InstCombine] Fold select of clamped shifts (PR #114797)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 14 06:56:59 PST 2024
https://github.com/RKSimon updated https://github.com/llvm/llvm-project/pull/114797
>From 2b30a761ed28c24feb9a1232eb1cc2b3b4ea6efa Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Mon, 4 Nov 2024 13:57:10 +0000
Subject: [PATCH 1/4] [InstCombine] Add select of clamped shifts test coverage
based off #109888
Add baseline tests for removing shift amount clamps which are also bound by a select:
Fold (select (icmp_ugt A, BW-1), (shift X, (and A, C)), Y) --> (select (icmp_ugt A, BW-1), (shift X, A), T)
Fold (select (icmp_ugt A, BW-1), Y, (shift X, (umin A, C))) --> (select (icmp_ugt A, BW-1), Y, (shift X, A))
---
.../InstCombine/select-shift-clamp.ll | 236 ++++++++++++++++++
1 file changed, 236 insertions(+)
create mode 100644 llvm/test/Transforms/InstCombine/select-shift-clamp.ll
diff --git a/llvm/test/Transforms/InstCombine/select-shift-clamp.ll b/llvm/test/Transforms/InstCombine/select-shift-clamp.ll
new file mode 100644
index 00000000000000..a7b2022819a09c
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/select-shift-clamp.ll
@@ -0,0 +1,236 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -passes=instcombine < %s | FileCheck %s
+
+declare void @use_i17(i17)
+declare void @use_i32(i32)
+
+; Fold (select (icmp_ugt A, BW-1), (shift X, (and A, C)), FalseVal)
+; --> (select (icmp_ugt A, BW-1), (shift X, A), FalseVal)
+; Fold (select (icmp_ult A, BW), (shift X, (and A, C)), FalseVal)
+; --> (select (icmp_ult A, BW), (shift X, A), FalseVal)
+; iff Pow2 element width and C masks all amt bits.
+
+define i32 @select_ult_shl_clamp_and_i32(i32 %a0, i32 %a1, i32 %a2) {
+; CHECK-LABEL: @select_ult_shl_clamp_and_i32(
+; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[A1:%.*]], 32
+; CHECK-NEXT: [[M:%.*]] = and i32 [[A1]], 31
+; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[A0:%.*]], [[M]]
+; CHECK-NEXT: [[R:%.*]] = select i1 [[C]], i32 [[TMP1]], i32 [[A2:%.*]]
+; CHECK-NEXT: ret i32 [[R]]
+;
+ %c = icmp ult i32 %a1, 32
+ %m = and i32 %a1, 31
+ %s = shl i32 %a0, %m
+ %r = select i1 %c, i32 %s, i32 %a2
+ ret i32 %r
+}
+
+define i32 @select_ule_ashr_clamp_and_i32(i32 %a0, i32 %a1, i32 %a2) {
+; CHECK-LABEL: @select_ule_ashr_clamp_and_i32(
+; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[A1:%.*]], 32
+; CHECK-NEXT: [[M:%.*]] = and i32 [[A1]], 127
+; CHECK-NEXT: [[TMP1:%.*]] = ashr i32 [[A0:%.*]], [[M]]
+; CHECK-NEXT: [[R:%.*]] = select i1 [[C]], i32 [[TMP1]], i32 [[A2:%.*]]
+; CHECK-NEXT: ret i32 [[R]]
+;
+ %c = icmp ule i32 %a1, 31
+ %m = and i32 %a1, 127
+ %s = ashr i32 %a0, %m
+ %r = select i1 %c, i32 %s, i32 %a2
+ ret i32 %r
+}
+
+define i32 @select_ugt_lshr_clamp_and_i32(i32 %a0, i32 %a1, i32 %a2) {
+; CHECK-LABEL: @select_ugt_lshr_clamp_and_i32(
+; CHECK-NEXT: [[C:%.*]] = icmp ugt i32 [[A1:%.*]], 31
+; CHECK-NEXT: [[M:%.*]] = and i32 [[A1]], 31
+; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[A0:%.*]], [[M]]
+; CHECK-NEXT: [[R:%.*]] = select i1 [[C]], i32 [[A2:%.*]], i32 [[TMP1]]
+; CHECK-NEXT: ret i32 [[R]]
+;
+ %c = icmp ugt i32 %a1, 31
+ %m = and i32 %a1, 31
+ %s = lshr i32 %a0, %m
+ %r = select i1 %c, i32 %a2, i32 %s
+ ret i32 %r
+}
+
+define i32 @select_uge_shl_clamp_and_i32(i32 %a0, i32 %a1, i32 %a2) {
+; CHECK-LABEL: @select_uge_shl_clamp_and_i32(
+; CHECK-NEXT: [[C:%.*]] = icmp ugt i32 [[A1:%.*]], 31
+; CHECK-NEXT: [[M:%.*]] = and i32 [[A1]], 63
+; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[A0:%.*]], [[M]]
+; CHECK-NEXT: [[R:%.*]] = select i1 [[C]], i32 [[A2:%.*]], i32 [[TMP1]]
+; CHECK-NEXT: ret i32 [[R]]
+;
+ %c = icmp uge i32 %a1, 32
+ %m = and i32 %a1, 63
+ %s = shl i32 %a0, %m
+ %r = select i1 %c, i32 %a2, i32 %s
+ ret i32 %r
+}
+
+; negative test - multiuse
+define i32 @select_ule_ashr_clamp_and_i32_multiuse(i32 %a0, i32 %a1, i32 %a2) {
+; CHECK-LABEL: @select_ule_ashr_clamp_and_i32_multiuse(
+; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[A1:%.*]], 32
+; CHECK-NEXT: [[M:%.*]] = and i32 [[A1]], 127
+; CHECK-NEXT: [[S:%.*]] = ashr i32 [[A0:%.*]], [[M]]
+; CHECK-NEXT: [[R:%.*]] = select i1 [[C]], i32 [[S]], i32 [[A2:%.*]]
+; CHECK-NEXT: call void @use_i32(i32 [[S]])
+; CHECK-NEXT: ret i32 [[R]]
+;
+ %c = icmp ule i32 %a1, 31
+ %m = and i32 %a1, 127
+ %s = ashr i32 %a0, %m
+ %r = select i1 %c, i32 %s, i32 %a2
+ call void @use_i32(i32 %s)
+ ret i32 %r
+}
+
+; negative test - mask doesn't cover all legal amount bit
+define i32 @select_ult_shl_clamp_and_i32_badmask(i32 %a0, i32 %a1, i32 %a2) {
+; CHECK-LABEL: @select_ult_shl_clamp_and_i32_badmask(
+; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[A1:%.*]], 32
+; CHECK-NEXT: [[M:%.*]] = and i32 [[A1]], 28
+; CHECK-NEXT: [[S:%.*]] = shl i32 [[A0:%.*]], [[M]]
+; CHECK-NEXT: [[R:%.*]] = select i1 [[C]], i32 [[S]], i32 [[A2:%.*]]
+; CHECK-NEXT: ret i32 [[R]]
+;
+ %c = icmp ult i32 %a1, 32
+ %m = and i32 %a1, 28
+ %s = shl i32 %a0, %m
+ %r = select i1 %c, i32 %s, i32 %a2
+ ret i32 %r
+}
+
+; negative test - non-pow2
+define i17 @select_uge_lshr_clamp_and_i17_nonpow2(i17 %a0, i17 %a1, i17 %a2) {
+; CHECK-LABEL: @select_uge_lshr_clamp_and_i17_nonpow2(
+; CHECK-NEXT: [[C:%.*]] = icmp ugt i17 [[A1:%.*]], 16
+; CHECK-NEXT: [[M:%.*]] = and i17 [[A1]], 255
+; CHECK-NEXT: [[S:%.*]] = lshr i17 [[A0:%.*]], [[M]]
+; CHECK-NEXT: [[R:%.*]] = select i1 [[C]], i17 [[A2:%.*]], i17 [[S]]
+; CHECK-NEXT: ret i17 [[R]]
+;
+ %c = icmp uge i17 %a1, 17
+ %m = and i17 %a1, 255
+ %s = lshr i17 %a0, %m
+ %r = select i1 %c, i17 %a2, i17 %s
+ ret i17 %r
+}
+
+; Fold (select (icmp_ugt A, BW-1), TrueVal, (shift X, (umin A, C)))
+; --> (select (icmp_ugt A, BW-1), TrueVal, (shift X, A))
+; Fold (select (icmp_ult A, BW), (shift X, (umin A, C)), FalseVal)
+; --> (select (icmp_ult A, BW), (shift X, A), FalseVal)
+; iff C >= BW-1
+
+define i32 @select_ult_shl_clamp_umin_i32(i32 %a0, i32 %a1, i32 %a2) {
+; CHECK-LABEL: @select_ult_shl_clamp_umin_i32(
+; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[A1:%.*]], 32
+; CHECK-NEXT: [[M:%.*]] = call i32 @llvm.umin.i32(i32 [[A1]], i32 31)
+; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[A0:%.*]], [[M]]
+; CHECK-NEXT: [[R:%.*]] = select i1 [[C]], i32 [[TMP1]], i32 [[A2:%.*]]
+; CHECK-NEXT: ret i32 [[R]]
+;
+ %c = icmp ult i32 %a1, 32
+ %m = call i32 @llvm.umin.i32(i32 %a1, i32 31)
+ %s = shl i32 %a0, %m
+ %r = select i1 %c, i32 %s, i32 %a2
+ ret i32 %r
+}
+
+define i17 @select_ule_ashr_clamp_umin_i17(i17 %a0, i17 %a1, i17 %a2) {
+; CHECK-LABEL: @select_ule_ashr_clamp_umin_i17(
+; CHECK-NEXT: [[C:%.*]] = icmp ult i17 [[A1:%.*]], 17
+; CHECK-NEXT: [[M:%.*]] = call i17 @llvm.umin.i17(i17 [[A1]], i17 17)
+; CHECK-NEXT: [[TMP1:%.*]] = ashr i17 [[A0:%.*]], [[M]]
+; CHECK-NEXT: [[R:%.*]] = select i1 [[C]], i17 [[TMP1]], i17 [[A2:%.*]]
+; CHECK-NEXT: ret i17 [[R]]
+;
+ %c = icmp ule i17 %a1, 16
+ %m = call i17 @llvm.umin.i17(i17 %a1, i17 17)
+ %s = ashr i17 %a0, %m
+ %r = select i1 %c, i17 %s, i17 %a2
+ ret i17 %r
+}
+
+define i32 @select_ugt_shl_clamp_umin_i32(i32 %a0, i32 %a1, i32 %a2) {
+; CHECK-LABEL: @select_ugt_shl_clamp_umin_i32(
+; CHECK-NEXT: [[C:%.*]] = icmp ugt i32 [[A1:%.*]], 31
+; CHECK-NEXT: [[M:%.*]] = call i32 @llvm.umin.i32(i32 [[A1]], i32 128)
+; CHECK-NEXT: [[S:%.*]] = shl i32 [[A0:%.*]], [[M]]
+; CHECK-NEXT: [[R:%.*]] = select i1 [[C]], i32 [[A2:%.*]], i32 [[S]]
+; CHECK-NEXT: ret i32 [[R]]
+;
+ %c = icmp ugt i32 %a1, 31
+ %m = call i32 @llvm.umin.i32(i32 %a1, i32 128)
+ %s = shl i32 %a0, %m
+ %r = select i1 %c, i32 %a2, i32 %s
+ ret i32 %r
+}
+
+define <2 x i32> @select_uge_lshr_clamp_umin_v2i32(<2 x i32> %a0, <2 x i32> %a1, <2 x i32> %a2) {
+; CHECK-LABEL: @select_uge_lshr_clamp_umin_v2i32(
+; CHECK-NEXT: [[C:%.*]] = icmp ugt <2 x i32> [[A1:%.*]], <i32 31, i32 31>
+; CHECK-NEXT: [[M:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[A1]], <2 x i32> <i32 63, i32 31>)
+; CHECK-NEXT: [[S:%.*]] = lshr <2 x i32> [[A0:%.*]], [[M]]
+; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[C]], <2 x i32> [[A2:%.*]], <2 x i32> [[S]]
+; CHECK-NEXT: ret <2 x i32> [[R]]
+;
+ %c = icmp uge <2 x i32> %a1, <i32 32, i32 32>
+ %m = call <2 x i32> @llvm.umin.v2i32(<2 x i32> %a1, <2 x i32> <i32 63, i32 31>)
+ %s = lshr <2 x i32> %a0, %m
+ %r = select <2 x i1> %c, <2 x i32> %a2, <2 x i32> %s
+ ret <2 x i32> %r
+}
+
+; negative test - multiuse
+define i32 @select_ugt_shl_clamp_umin_i32_multiuse(i32 %a0, i32 %a1, i32 %a2) {
+; CHECK-LABEL: @select_ugt_shl_clamp_umin_i32_multiuse(
+; CHECK-NEXT: [[C:%.*]] = icmp ugt i32 [[A1:%.*]], 32
+; CHECK-NEXT: [[M:%.*]] = call i32 @llvm.umin.i32(i32 [[A1]], i32 128)
+; CHECK-NEXT: [[S:%.*]] = shl i32 [[A0:%.*]], [[M]]
+; CHECK-NEXT: [[R:%.*]] = select i1 [[C]], i32 [[A2:%.*]], i32 [[S]]
+; CHECK-NEXT: call void @use_i32(i32 [[S]])
+; CHECK-NEXT: ret i32 [[R]]
+;
+ %c = icmp ugt i32 %a1, 32
+ %m = call i32 @llvm.umin.i32(i32 %a1, i32 128)
+ %s = shl i32 %a0, %m
+ %r = select i1 %c, i32 %a2, i32 %s
+ call void @use_i32(i32 %s)
+ ret i32 %r
+}
+
+; negative test - umin limit doesn't cover all legal amounts
+define i17 @select_uge_lshr_clamp_umin_i17_badlimit(i17 %a0, i17 %a1, i17 %a2) {
+; CHECK-LABEL: @select_uge_lshr_clamp_umin_i17_badlimit(
+; CHECK-NEXT: [[C:%.*]] = icmp ugt i17 [[A1:%.*]], 15
+; CHECK-NEXT: [[M:%.*]] = call i17 @llvm.umin.i17(i17 [[A1]], i17 12)
+; CHECK-NEXT: [[S:%.*]] = lshr i17 [[A0:%.*]], [[M]]
+; CHECK-NEXT: [[R:%.*]] = select i1 [[C]], i17 [[A2:%.*]], i17 [[S]]
+; CHECK-NEXT: ret i17 [[R]]
+;
+ %c = icmp uge i17 %a1, 16
+ %m = call i17 @llvm.umin.i17(i17 %a1, i17 12)
+ %s = lshr i17 %a0, %m
+ %r = select i1 %c, i17 %a2, i17 %s
+ ret i17 %r
+}
+
+define range(i64 0, -9223372036854775807) <4 x i64> @PR109888(<4 x i64> %0) {
+; CHECK-LABEL: @PR109888(
+; CHECK-NEXT: [[C:%.*]] = icmp ult <4 x i64> [[TMP0:%.*]], <i64 64, i64 64, i64 64, i64 64>
+; CHECK-NEXT: [[M:%.*]] = and <4 x i64> [[TMP0]], <i64 63, i64 63, i64 63, i64 63>
+; CHECK-NEXT: [[TMP2:%.*]] = shl nuw <4 x i64> <i64 1, i64 1, i64 1, i64 1>, [[M]]
+; CHECK-NEXT: [[R:%.*]] = select <4 x i1> [[C]], <4 x i64> [[TMP2]], <4 x i64> zeroinitializer
+; CHECK-NEXT: ret <4 x i64> [[R]]
+;
+ %c = icmp ult <4 x i64> %0, <i64 64, i64 64, i64 64, i64 64>
+ %m = and <4 x i64> %0, <i64 63, i64 63, i64 63, i64 63>
+ %s = shl nuw <4 x i64> <i64 1, i64 1, i64 1, i64 1>, %m
+ %r = select <4 x i1> %c, <4 x i64> %s, <4 x i64> zeroinitializer
+ ret <4 x i64> %r
+}
>From 08756e3a0fc167ff66d5064847435f41b83e6f38 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Mon, 4 Nov 2024 14:09:26 +0000
Subject: [PATCH 2/4] [InstCombine] Fold select of clamped shifts
If we are feeding a shift into a select conditioned by an inbounds check for the shift amount, then we can strip any mask/clamp limit that has been put on the shift amount
Fold (select (icmp_ugt A, BW-1), (shift X, (and A, C)), Y) --> (select (icmp_ugt A, BW-1), (shift X, A), T)
Fold (select (icmp_ugt A, BW-1), Y, (shift X, (umin A, C))) --> (select (icmp_ugt A, BW-1), Y, (shift X, A))
Alive2: https://alive2.llvm.org/ce/z/xC6FwD
Fixes #109888
---
.../InstCombine/InstCombineSelect.cpp | 64 +++++++++++++++++++
.../InstCombine/select-shift-clamp.ll | 27 +++-----
2 files changed, 73 insertions(+), 18 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 010b77548c152a..8a9b862a6cc4f8 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -2761,6 +2761,67 @@ static Instruction *foldSelectWithSRem(SelectInst &SI, InstCombinerImpl &IC,
return nullptr;
}
+static Instruction *foldSelectWithClampedShift(SelectInst &SI,
+ InstCombinerImpl &IC,
+ IRBuilderBase &Builder) {
+ Value *CondVal = SI.getCondition();
+ Value *TrueVal = SI.getTrueValue();
+ Value *FalseVal = SI.getFalseValue();
+ Type *SelType = SI.getType();
+ uint64_t BW = SelType->getScalarSizeInBits();
+
+ auto MatchClampedShift = [&](Value *V, Value *Amt) -> BinaryOperator * {
+ Value *X, *Limit;
+
+ // Fold (select (icmp_ugt A, BW-1), TrueVal, (shift X, (umin A, C)))
+ // --> (select (icmp_ugt A, BW-1), TrueVal, (shift X, A))
+ // Fold (select (icmp_ult A, BW), (shift X, (umin A, C)), FalseVal)
+ // --> (select (icmp_ult A, BW), (shift X, A), FalseVal)
+ // iff C >= BW-1
+ if (match(V, m_OneUse(m_Shift(m_Value(X),
+ m_UMin(m_Specific(Amt), m_Value(Limit)))))) {
+ KnownBits KnownLimit = IC.computeKnownBits(Limit, 0, &SI);
+ if (KnownLimit.getMinValue().uge(BW - 1))
+ return cast<BinaryOperator>(V);
+ }
+
+ // Fold (select (icmp_ugt A, BW-1), (shift X, (and A, C)), FalseVal)
+ // --> (select (icmp_ugt A, BW-1), (shift X, A), FalseVal)
+ // Fold (select (icmp_ult A, BW), (shift X, (and A, C)), FalseVal)
+ // --> (select (icmp_ult A, BW), (shift X, A), FalseVal)
+ // iff Pow2 element width and C masks all amt bits.
+ if (isPowerOf2_64(BW) &&
+ match(V, m_OneUse(m_Shift(m_Value(X),
+ m_And(m_Specific(Amt), m_Value(Limit)))))) {
+ KnownBits KnownLimit = IC.computeKnownBits(Limit, 0, &SI);
+ if (KnownLimit.countMinTrailingOnes() >= Log2_64(BW))
+ return cast<BinaryOperator>(V);
+ }
+
+ return nullptr;
+ };
+
+ Value *Amt;
+ if (match(CondVal, m_SpecificICmp(ICmpInst::ICMP_UGT, m_Value(Amt),
+ m_SpecificInt(BW - 1)))) {
+ if (BinaryOperator *ShiftI = MatchClampedShift(FalseVal, Amt))
+ return SelectInst::Create(
+ CondVal, TrueVal,
+ Builder.CreateBinOp(ShiftI->getOpcode(), ShiftI->getOperand(0), Amt));
+ }
+
+ if (match(CondVal, m_SpecificICmp(ICmpInst::ICMP_ULT, m_Value(Amt),
+ m_SpecificInt(BW)))) {
+ if (BinaryOperator *ShiftI = MatchClampedShift(TrueVal, Amt))
+ return SelectInst::Create(
+ CondVal,
+ Builder.CreateBinOp(ShiftI->getOpcode(), ShiftI->getOperand(0), Amt),
+ FalseVal);
+ }
+
+ return nullptr;
+}
+
static Value *foldSelectWithFrozenICmp(SelectInst &Sel, InstCombiner::BuilderTy &Builder) {
FreezeInst *FI = dyn_cast<FreezeInst>(Sel.getCondition());
if (!FI)
@@ -3871,6 +3932,9 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) {
if (Instruction *I = foldSelectExtConst(SI))
return I;
+ if (Instruction *I = foldSelectWithClampedShift(SI, *this, Builder))
+ return I;
+
if (Instruction *I = foldSelectWithSRem(SI, *this, Builder))
return I;
diff --git a/llvm/test/Transforms/InstCombine/select-shift-clamp.ll b/llvm/test/Transforms/InstCombine/select-shift-clamp.ll
index a7b2022819a09c..9be6e71b67a351 100644
--- a/llvm/test/Transforms/InstCombine/select-shift-clamp.ll
+++ b/llvm/test/Transforms/InstCombine/select-shift-clamp.ll
@@ -13,8 +13,7 @@ declare void @use_i32(i32)
define i32 @select_ult_shl_clamp_and_i32(i32 %a0, i32 %a1, i32 %a2) {
; CHECK-LABEL: @select_ult_shl_clamp_and_i32(
; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[A1:%.*]], 32
-; CHECK-NEXT: [[M:%.*]] = and i32 [[A1]], 31
-; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[A0:%.*]], [[M]]
+; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[A0:%.*]], [[A1]]
; CHECK-NEXT: [[R:%.*]] = select i1 [[C]], i32 [[TMP1]], i32 [[A2:%.*]]
; CHECK-NEXT: ret i32 [[R]]
;
@@ -28,8 +27,7 @@ define i32 @select_ult_shl_clamp_and_i32(i32 %a0, i32 %a1, i32 %a2) {
define i32 @select_ule_ashr_clamp_and_i32(i32 %a0, i32 %a1, i32 %a2) {
; CHECK-LABEL: @select_ule_ashr_clamp_and_i32(
; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[A1:%.*]], 32
-; CHECK-NEXT: [[M:%.*]] = and i32 [[A1]], 127
-; CHECK-NEXT: [[TMP1:%.*]] = ashr i32 [[A0:%.*]], [[M]]
+; CHECK-NEXT: [[TMP1:%.*]] = ashr i32 [[A0:%.*]], [[A1]]
; CHECK-NEXT: [[R:%.*]] = select i1 [[C]], i32 [[TMP1]], i32 [[A2:%.*]]
; CHECK-NEXT: ret i32 [[R]]
;
@@ -43,8 +41,7 @@ define i32 @select_ule_ashr_clamp_and_i32(i32 %a0, i32 %a1, i32 %a2) {
define i32 @select_ugt_lshr_clamp_and_i32(i32 %a0, i32 %a1, i32 %a2) {
; CHECK-LABEL: @select_ugt_lshr_clamp_and_i32(
; CHECK-NEXT: [[C:%.*]] = icmp ugt i32 [[A1:%.*]], 31
-; CHECK-NEXT: [[M:%.*]] = and i32 [[A1]], 31
-; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[A0:%.*]], [[M]]
+; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[A0:%.*]], [[A1]]
; CHECK-NEXT: [[R:%.*]] = select i1 [[C]], i32 [[A2:%.*]], i32 [[TMP1]]
; CHECK-NEXT: ret i32 [[R]]
;
@@ -58,8 +55,7 @@ define i32 @select_ugt_lshr_clamp_and_i32(i32 %a0, i32 %a1, i32 %a2) {
define i32 @select_uge_shl_clamp_and_i32(i32 %a0, i32 %a1, i32 %a2) {
; CHECK-LABEL: @select_uge_shl_clamp_and_i32(
; CHECK-NEXT: [[C:%.*]] = icmp ugt i32 [[A1:%.*]], 31
-; CHECK-NEXT: [[M:%.*]] = and i32 [[A1]], 63
-; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[A0:%.*]], [[M]]
+; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[A0:%.*]], [[A1]]
; CHECK-NEXT: [[R:%.*]] = select i1 [[C]], i32 [[A2:%.*]], i32 [[TMP1]]
; CHECK-NEXT: ret i32 [[R]]
;
@@ -129,8 +125,7 @@ define i17 @select_uge_lshr_clamp_and_i17_nonpow2(i17 %a0, i17 %a1, i17 %a2) {
define i32 @select_ult_shl_clamp_umin_i32(i32 %a0, i32 %a1, i32 %a2) {
; CHECK-LABEL: @select_ult_shl_clamp_umin_i32(
; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[A1:%.*]], 32
-; CHECK-NEXT: [[M:%.*]] = call i32 @llvm.umin.i32(i32 [[A1]], i32 31)
-; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[A0:%.*]], [[M]]
+; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[A0:%.*]], [[A1]]
; CHECK-NEXT: [[R:%.*]] = select i1 [[C]], i32 [[TMP1]], i32 [[A2:%.*]]
; CHECK-NEXT: ret i32 [[R]]
;
@@ -144,8 +139,7 @@ define i32 @select_ult_shl_clamp_umin_i32(i32 %a0, i32 %a1, i32 %a2) {
define i17 @select_ule_ashr_clamp_umin_i17(i17 %a0, i17 %a1, i17 %a2) {
; CHECK-LABEL: @select_ule_ashr_clamp_umin_i17(
; CHECK-NEXT: [[C:%.*]] = icmp ult i17 [[A1:%.*]], 17
-; CHECK-NEXT: [[M:%.*]] = call i17 @llvm.umin.i17(i17 [[A1]], i17 17)
-; CHECK-NEXT: [[TMP1:%.*]] = ashr i17 [[A0:%.*]], [[M]]
+; CHECK-NEXT: [[TMP1:%.*]] = ashr i17 [[A0:%.*]], [[A1]]
; CHECK-NEXT: [[R:%.*]] = select i1 [[C]], i17 [[TMP1]], i17 [[A2:%.*]]
; CHECK-NEXT: ret i17 [[R]]
;
@@ -159,8 +153,7 @@ define i17 @select_ule_ashr_clamp_umin_i17(i17 %a0, i17 %a1, i17 %a2) {
define i32 @select_ugt_shl_clamp_umin_i32(i32 %a0, i32 %a1, i32 %a2) {
; CHECK-LABEL: @select_ugt_shl_clamp_umin_i32(
; CHECK-NEXT: [[C:%.*]] = icmp ugt i32 [[A1:%.*]], 31
-; CHECK-NEXT: [[M:%.*]] = call i32 @llvm.umin.i32(i32 [[A1]], i32 128)
-; CHECK-NEXT: [[S:%.*]] = shl i32 [[A0:%.*]], [[M]]
+; CHECK-NEXT: [[S:%.*]] = shl i32 [[A0:%.*]], [[A1]]
; CHECK-NEXT: [[R:%.*]] = select i1 [[C]], i32 [[A2:%.*]], i32 [[S]]
; CHECK-NEXT: ret i32 [[R]]
;
@@ -174,8 +167,7 @@ define i32 @select_ugt_shl_clamp_umin_i32(i32 %a0, i32 %a1, i32 %a2) {
define <2 x i32> @select_uge_lshr_clamp_umin_v2i32(<2 x i32> %a0, <2 x i32> %a1, <2 x i32> %a2) {
; CHECK-LABEL: @select_uge_lshr_clamp_umin_v2i32(
; CHECK-NEXT: [[C:%.*]] = icmp ugt <2 x i32> [[A1:%.*]], <i32 31, i32 31>
-; CHECK-NEXT: [[M:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[A1]], <2 x i32> <i32 63, i32 31>)
-; CHECK-NEXT: [[S:%.*]] = lshr <2 x i32> [[A0:%.*]], [[M]]
+; CHECK-NEXT: [[S:%.*]] = lshr <2 x i32> [[A0:%.*]], [[A1]]
; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[C]], <2 x i32> [[A2:%.*]], <2 x i32> [[S]]
; CHECK-NEXT: ret <2 x i32> [[R]]
;
@@ -223,8 +215,7 @@ define i17 @select_uge_lshr_clamp_umin_i17_badlimit(i17 %a0, i17 %a1, i17 %a2) {
define range(i64 0, -9223372036854775807) <4 x i64> @PR109888(<4 x i64> %0) {
; CHECK-LABEL: @PR109888(
; CHECK-NEXT: [[C:%.*]] = icmp ult <4 x i64> [[TMP0:%.*]], <i64 64, i64 64, i64 64, i64 64>
-; CHECK-NEXT: [[M:%.*]] = and <4 x i64> [[TMP0]], <i64 63, i64 63, i64 63, i64 63>
-; CHECK-NEXT: [[TMP2:%.*]] = shl nuw <4 x i64> <i64 1, i64 1, i64 1, i64 1>, [[M]]
+; CHECK-NEXT: [[TMP2:%.*]] = shl nuw <4 x i64> <i64 1, i64 1, i64 1, i64 1>, [[TMP0]]
; CHECK-NEXT: [[R:%.*]] = select <4 x i1> [[C]], <4 x i64> [[TMP2]], <4 x i64> zeroinitializer
; CHECK-NEXT: ret <4 x i64> [[R]]
;
>From 60bfef209a625dec8cd4611d9182963a89188c84 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Mon, 4 Nov 2024 15:24:55 +0000
Subject: [PATCH 3/4] Add missing freeze
---
.../InstCombine/InstCombineSelect.cpp | 13 ++++++---
.../InstCombine/select-shift-clamp.ll | 27 ++++++++++++-------
2 files changed, 27 insertions(+), 13 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 8a9b862a6cc4f8..093e724de3ed3c 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -2804,19 +2804,24 @@ static Instruction *foldSelectWithClampedShift(SelectInst &SI,
Value *Amt;
if (match(CondVal, m_SpecificICmp(ICmpInst::ICMP_UGT, m_Value(Amt),
m_SpecificInt(BW - 1)))) {
- if (BinaryOperator *ShiftI = MatchClampedShift(FalseVal, Amt))
+ if (BinaryOperator *ShiftI = MatchClampedShift(FalseVal, Amt)) {
+ Amt = Builder.CreateFreeze(Amt);
return SelectInst::Create(
- CondVal, TrueVal,
+ Builder.CreateICmpUGT(Amt, cast<Instruction>(CondVal)->getOperand(1)),
+ TrueVal,
Builder.CreateBinOp(ShiftI->getOpcode(), ShiftI->getOperand(0), Amt));
+ }
}
if (match(CondVal, m_SpecificICmp(ICmpInst::ICMP_ULT, m_Value(Amt),
m_SpecificInt(BW)))) {
- if (BinaryOperator *ShiftI = MatchClampedShift(TrueVal, Amt))
+ if (BinaryOperator *ShiftI = MatchClampedShift(TrueVal, Amt)) {
+ Amt = Builder.CreateFreeze(Amt);
return SelectInst::Create(
- CondVal,
+ Builder.CreateICmpULT(Amt, cast<Instruction>(CondVal)->getOperand(1)),
Builder.CreateBinOp(ShiftI->getOpcode(), ShiftI->getOperand(0), Amt),
FalseVal);
+ }
}
return nullptr;
diff --git a/llvm/test/Transforms/InstCombine/select-shift-clamp.ll b/llvm/test/Transforms/InstCombine/select-shift-clamp.ll
index 9be6e71b67a351..fd03d847c8a33a 100644
--- a/llvm/test/Transforms/InstCombine/select-shift-clamp.ll
+++ b/llvm/test/Transforms/InstCombine/select-shift-clamp.ll
@@ -12,8 +12,9 @@ declare void @use_i32(i32)
define i32 @select_ult_shl_clamp_and_i32(i32 %a0, i32 %a1, i32 %a2) {
; CHECK-LABEL: @select_ult_shl_clamp_and_i32(
-; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[A1:%.*]], 32
+; CHECK-NEXT: [[A1:%.*]] = freeze i32 [[A3:%.*]]
; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[A0:%.*]], [[A1]]
+; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[A1]], 32
; CHECK-NEXT: [[R:%.*]] = select i1 [[C]], i32 [[TMP1]], i32 [[A2:%.*]]
; CHECK-NEXT: ret i32 [[R]]
;
@@ -26,8 +27,9 @@ define i32 @select_ult_shl_clamp_and_i32(i32 %a0, i32 %a1, i32 %a2) {
define i32 @select_ule_ashr_clamp_and_i32(i32 %a0, i32 %a1, i32 %a2) {
; CHECK-LABEL: @select_ule_ashr_clamp_and_i32(
-; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[A1:%.*]], 32
+; CHECK-NEXT: [[A1:%.*]] = freeze i32 [[A3:%.*]]
; CHECK-NEXT: [[TMP1:%.*]] = ashr i32 [[A0:%.*]], [[A1]]
+; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[A1]], 32
; CHECK-NEXT: [[R:%.*]] = select i1 [[C]], i32 [[TMP1]], i32 [[A2:%.*]]
; CHECK-NEXT: ret i32 [[R]]
;
@@ -40,8 +42,9 @@ define i32 @select_ule_ashr_clamp_and_i32(i32 %a0, i32 %a1, i32 %a2) {
define i32 @select_ugt_lshr_clamp_and_i32(i32 %a0, i32 %a1, i32 %a2) {
; CHECK-LABEL: @select_ugt_lshr_clamp_and_i32(
-; CHECK-NEXT: [[C:%.*]] = icmp ugt i32 [[A1:%.*]], 31
+; CHECK-NEXT: [[A1:%.*]] = freeze i32 [[A3:%.*]]
; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[A0:%.*]], [[A1]]
+; CHECK-NEXT: [[C:%.*]] = icmp ugt i32 [[A1]], 31
; CHECK-NEXT: [[R:%.*]] = select i1 [[C]], i32 [[A2:%.*]], i32 [[TMP1]]
; CHECK-NEXT: ret i32 [[R]]
;
@@ -54,8 +57,9 @@ define i32 @select_ugt_lshr_clamp_and_i32(i32 %a0, i32 %a1, i32 %a2) {
define i32 @select_uge_shl_clamp_and_i32(i32 %a0, i32 %a1, i32 %a2) {
; CHECK-LABEL: @select_uge_shl_clamp_and_i32(
-; CHECK-NEXT: [[C:%.*]] = icmp ugt i32 [[A1:%.*]], 31
+; CHECK-NEXT: [[A1:%.*]] = freeze i32 [[A3:%.*]]
; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[A0:%.*]], [[A1]]
+; CHECK-NEXT: [[C:%.*]] = icmp ugt i32 [[A1]], 31
; CHECK-NEXT: [[R:%.*]] = select i1 [[C]], i32 [[A2:%.*]], i32 [[TMP1]]
; CHECK-NEXT: ret i32 [[R]]
;
@@ -124,8 +128,9 @@ define i17 @select_uge_lshr_clamp_and_i17_nonpow2(i17 %a0, i17 %a1, i17 %a2) {
define i32 @select_ult_shl_clamp_umin_i32(i32 %a0, i32 %a1, i32 %a2) {
; CHECK-LABEL: @select_ult_shl_clamp_umin_i32(
-; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[A1:%.*]], 32
+; CHECK-NEXT: [[A1:%.*]] = freeze i32 [[A3:%.*]]
; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[A0:%.*]], [[A1]]
+; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[A1]], 32
; CHECK-NEXT: [[R:%.*]] = select i1 [[C]], i32 [[TMP1]], i32 [[A2:%.*]]
; CHECK-NEXT: ret i32 [[R]]
;
@@ -138,8 +143,9 @@ define i32 @select_ult_shl_clamp_umin_i32(i32 %a0, i32 %a1, i32 %a2) {
define i17 @select_ule_ashr_clamp_umin_i17(i17 %a0, i17 %a1, i17 %a2) {
; CHECK-LABEL: @select_ule_ashr_clamp_umin_i17(
-; CHECK-NEXT: [[C:%.*]] = icmp ult i17 [[A1:%.*]], 17
+; CHECK-NEXT: [[A1:%.*]] = freeze i17 [[A3:%.*]]
; CHECK-NEXT: [[TMP1:%.*]] = ashr i17 [[A0:%.*]], [[A1]]
+; CHECK-NEXT: [[C:%.*]] = icmp ult i17 [[A1]], 17
; CHECK-NEXT: [[R:%.*]] = select i1 [[C]], i17 [[TMP1]], i17 [[A2:%.*]]
; CHECK-NEXT: ret i17 [[R]]
;
@@ -152,8 +158,9 @@ define i17 @select_ule_ashr_clamp_umin_i17(i17 %a0, i17 %a1, i17 %a2) {
define i32 @select_ugt_shl_clamp_umin_i32(i32 %a0, i32 %a1, i32 %a2) {
; CHECK-LABEL: @select_ugt_shl_clamp_umin_i32(
-; CHECK-NEXT: [[C:%.*]] = icmp ugt i32 [[A1:%.*]], 31
+; CHECK-NEXT: [[A1:%.*]] = freeze i32 [[A3:%.*]]
; CHECK-NEXT: [[S:%.*]] = shl i32 [[A0:%.*]], [[A1]]
+; CHECK-NEXT: [[C:%.*]] = icmp ugt i32 [[A1]], 31
; CHECK-NEXT: [[R:%.*]] = select i1 [[C]], i32 [[A2:%.*]], i32 [[S]]
; CHECK-NEXT: ret i32 [[R]]
;
@@ -166,8 +173,9 @@ define i32 @select_ugt_shl_clamp_umin_i32(i32 %a0, i32 %a1, i32 %a2) {
define <2 x i32> @select_uge_lshr_clamp_umin_v2i32(<2 x i32> %a0, <2 x i32> %a1, <2 x i32> %a2) {
; CHECK-LABEL: @select_uge_lshr_clamp_umin_v2i32(
-; CHECK-NEXT: [[C:%.*]] = icmp ugt <2 x i32> [[A1:%.*]], <i32 31, i32 31>
+; CHECK-NEXT: [[A1:%.*]] = freeze <2 x i32> [[A3:%.*]]
; CHECK-NEXT: [[S:%.*]] = lshr <2 x i32> [[A0:%.*]], [[A1]]
+; CHECK-NEXT: [[C:%.*]] = icmp ugt <2 x i32> [[A1]], <i32 31, i32 31>
; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[C]], <2 x i32> [[A2:%.*]], <2 x i32> [[S]]
; CHECK-NEXT: ret <2 x i32> [[R]]
;
@@ -214,8 +222,9 @@ define i17 @select_uge_lshr_clamp_umin_i17_badlimit(i17 %a0, i17 %a1, i17 %a2) {
define range(i64 0, -9223372036854775807) <4 x i64> @PR109888(<4 x i64> %0) {
; CHECK-LABEL: @PR109888(
-; CHECK-NEXT: [[C:%.*]] = icmp ult <4 x i64> [[TMP0:%.*]], <i64 64, i64 64, i64 64, i64 64>
+; CHECK-NEXT: [[TMP0:%.*]] = freeze <4 x i64> [[TMP1:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = shl nuw <4 x i64> <i64 1, i64 1, i64 1, i64 1>, [[TMP0]]
+; CHECK-NEXT: [[C:%.*]] = icmp ult <4 x i64> [[TMP0]], <i64 64, i64 64, i64 64, i64 64>
; CHECK-NEXT: [[R:%.*]] = select <4 x i1> [[C]], <4 x i64> [[TMP2]], <4 x i64> zeroinitializer
; CHECK-NEXT: ret <4 x i64> [[R]]
;
>From 79b1bc8522cc3bfa6361c5146ab8b808e7cb0db6 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Thu, 14 Nov 2024 14:56:35 +0000
Subject: [PATCH 4/4] Replace m_And() with a general
SimplifyMultipleUseDemandedBits call
---
.../Transforms/InstCombine/InstCombineSelect.cpp | 15 +++++++++------
.../Transforms/InstCombine/select-shift-clamp.ll | 6 +++---
2 files changed, 12 insertions(+), 9 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 093e724de3ed3c..2b353d5fd69d65 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -2772,6 +2772,7 @@ static Instruction *foldSelectWithClampedShift(SelectInst &SI,
auto MatchClampedShift = [&](Value *V, Value *Amt) -> BinaryOperator * {
Value *X, *Limit;
+ Instruction *I;
// Fold (select (icmp_ugt A, BW-1), TrueVal, (shift X, (umin A, C)))
// --> (select (icmp_ugt A, BW-1), TrueVal, (shift X, A))
@@ -2789,13 +2790,15 @@ static Instruction *foldSelectWithClampedShift(SelectInst &SI,
// --> (select (icmp_ugt A, BW-1), (shift X, A), FalseVal)
// Fold (select (icmp_ult A, BW), (shift X, (and A, C)), FalseVal)
// --> (select (icmp_ult A, BW), (shift X, A), FalseVal)
- // iff Pow2 element width and C masks all amt bits.
+ // iff Pow2 element width we just demand the amt mask bits.
if (isPowerOf2_64(BW) &&
- match(V, m_OneUse(m_Shift(m_Value(X),
- m_And(m_Specific(Amt), m_Value(Limit)))))) {
- KnownBits KnownLimit = IC.computeKnownBits(Limit, 0, &SI);
- if (KnownLimit.countMinTrailingOnes() >= Log2_64(BW))
- return cast<BinaryOperator>(V);
+ match(V, m_OneUse(m_Shift(m_Value(X), m_Instruction(I))))) {
+ KnownBits Known(BW);
+ APInt DemandedBits = APInt::getLowBitsSet(BW, Log2_64(BW));
+ if (Value *NewAmt = IC.SimplifyMultipleUseDemandedBits(
+ I, DemandedBits, Known, /*Depth=*/0,
+ IC.getSimplifyQuery().getWithInstruction(I)))
+ return Amt == NewAmt ? cast<BinaryOperator>(V) : nullptr;
}
return nullptr;
diff --git a/llvm/test/Transforms/InstCombine/select-shift-clamp.ll b/llvm/test/Transforms/InstCombine/select-shift-clamp.ll
index fd03d847c8a33a..31c60b58ed65ef 100644
--- a/llvm/test/Transforms/InstCombine/select-shift-clamp.ll
+++ b/llvm/test/Transforms/InstCombine/select-shift-clamp.ll
@@ -175,7 +175,7 @@ define <2 x i32> @select_uge_lshr_clamp_umin_v2i32(<2 x i32> %a0, <2 x i32> %a1,
; CHECK-LABEL: @select_uge_lshr_clamp_umin_v2i32(
; CHECK-NEXT: [[A1:%.*]] = freeze <2 x i32> [[A3:%.*]]
; CHECK-NEXT: [[S:%.*]] = lshr <2 x i32> [[A0:%.*]], [[A1]]
-; CHECK-NEXT: [[C:%.*]] = icmp ugt <2 x i32> [[A1]], <i32 31, i32 31>
+; CHECK-NEXT: [[C:%.*]] = icmp ugt <2 x i32> [[A1]], splat (i32 31)
; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[C]], <2 x i32> [[A2:%.*]], <2 x i32> [[S]]
; CHECK-NEXT: ret <2 x i32> [[R]]
;
@@ -223,8 +223,8 @@ define i17 @select_uge_lshr_clamp_umin_i17_badlimit(i17 %a0, i17 %a1, i17 %a2) {
define range(i64 0, -9223372036854775807) <4 x i64> @PR109888(<4 x i64> %0) {
; CHECK-LABEL: @PR109888(
; CHECK-NEXT: [[TMP0:%.*]] = freeze <4 x i64> [[TMP1:%.*]]
-; CHECK-NEXT: [[TMP2:%.*]] = shl nuw <4 x i64> <i64 1, i64 1, i64 1, i64 1>, [[TMP0]]
-; CHECK-NEXT: [[C:%.*]] = icmp ult <4 x i64> [[TMP0]], <i64 64, i64 64, i64 64, i64 64>
+; CHECK-NEXT: [[TMP2:%.*]] = shl nuw <4 x i64> splat (i64 1), [[TMP0]]
+; CHECK-NEXT: [[C:%.*]] = icmp ult <4 x i64> [[TMP0]], splat (i64 64)
; CHECK-NEXT: [[R:%.*]] = select <4 x i1> [[C]], <4 x i64> [[TMP2]], <4 x i64> zeroinitializer
; CHECK-NEXT: ret <4 x i64> [[R]]
;
More information about the llvm-commits
mailing list