[llvm] 7910ed2 - [InstCombine] Canonicalise packed-integer-selecting shifts (#162147)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Oct 8 05:58:17 PDT 2025
Author: zGoldthorpe
Date: 2025-10-08T06:58:11-06:00
New Revision: 7910ed22320c5f298c4645ffa9072238c95bc7d6
URL: https://github.com/llvm/llvm-project/commit/7910ed22320c5f298c4645ffa9072238c95bc7d6
DIFF: https://github.com/llvm/llvm-project/commit/7910ed22320c5f298c4645ffa9072238c95bc7d6.diff
LOG: [InstCombine] Canonicalise packed-integer-selecting shifts (#162147)
This patch resolves recent regressions related to [issue
#92891](https://github.com/llvm/llvm-project/issues/92891).
It specifically enables the following types of reductions.
```llvm
define i16 @src(i32 %mask, i32 %upper, i32 range(i32 0, 65536) %lower) {
%upper.shl = shl nuw i32 %upper, 16
%pack = or disjoint i32 %upper.shl, %lower
%mask.bit = and i32 %mask, 16
%sel = lshr i32 %pack, %mask.bit
%trunc = trunc i32 %sel to i16
ret i16 %trunc
}
; =>
define i16 @tgt(i32 %mask, i32 %upper, i32 range(i32 0, 65536) %lower) {
%mask.bit = and i32 %mask, 16
%mask.bit.z = icmp eq i32 %mask.bit, 0
%sel = select i1 %mask.bit.z, i32 %lower, i32 %upper
%trunc = trunc i32 %sel to i16
ret i16 %trunc
}
```
Alive2 proofs: [gJ9MpP](https://alive2.llvm.org/ce/z/gJ9MpP)
Added:
llvm/test/Transforms/InstCombine/fold-selective-shift.ll
Modified:
llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index aa030294ff1e5..127a506e440b7 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -60,6 +60,58 @@ static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo,
return true;
}
+/// Let N = 2 * M.
+/// Given an N-bit integer representing a pack of two M-bit integers,
+/// we can select one of the packed integers by right-shifting by either
+/// zero or M (which is the most straightforward to check if M is a power
+/// of 2), and then isolating the lower M bits. In this case, we can
+/// represent the shift as a select on whether the shr amount is nonzero.
+static Value *simplifyShiftSelectingPackedElement(Instruction *I,
+ const APInt &DemandedMask,
+ InstCombinerImpl &IC,
+ unsigned Depth) {
+ assert(I->getOpcode() == Instruction::LShr &&
+ "Only lshr instruction supported");
+
+ uint64_t ShlAmt;
+ Value *Upper, *Lower;
+ if (!match(I->getOperand(0),
+ m_OneUse(m_c_DisjointOr(
+ m_OneUse(m_Shl(m_Value(Upper), m_ConstantInt(ShlAmt))),
+ m_Value(Lower)))))
+ return nullptr;
+
+ if (!isPowerOf2_64(ShlAmt))
+ return nullptr;
+
+ const uint64_t DemandedBitWidth = DemandedMask.getActiveBits();
+ if (DemandedBitWidth > ShlAmt)
+ return nullptr;
+
+ // Check that upper demanded bits are not lost from lshift.
+ if (Upper->getType()->getScalarSizeInBits() < ShlAmt + DemandedBitWidth)
+ return nullptr;
+
+ KnownBits KnownLowerBits = IC.computeKnownBits(Lower, I, Depth);
+ if (!KnownLowerBits.getMaxValue().isIntN(ShlAmt))
+ return nullptr;
+
+ Value *ShrAmt = I->getOperand(1);
+ KnownBits KnownShrBits = IC.computeKnownBits(ShrAmt, I, Depth);
+
+ // Verify that ShrAmt is either exactly ShlAmt (which is a power of 2) or
+ // zero.
+ if (~KnownShrBits.Zero != ShlAmt)
+ return nullptr;
+
+ Value *ShrAmtZ =
+ IC.Builder.CreateICmpEQ(ShrAmt, Constant::getNullValue(ShrAmt->getType()),
+ ShrAmt->getName() + ".z");
+ Value *Select = IC.Builder.CreateSelect(ShrAmtZ, Lower, Upper);
+ Select->takeName(I);
+ return Select;
+}
+
/// Returns the bitwidth of the given scalar or pointer type. For vector types,
/// returns the element type's bitwidth.
static unsigned getBitWidth(Type *Ty, const DataLayout &DL) {
@@ -798,9 +850,13 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Instruction *I,
Known >>= ShiftAmt;
if (ShiftAmt)
Known.Zero.setHighBits(ShiftAmt); // high bits known zero.
- } else {
- llvm::computeKnownBits(I, Known, Q, Depth);
+ break;
}
+ if (Value *V =
+ simplifyShiftSelectingPackedElement(I, DemandedMask, *this, Depth))
+ return V;
+
+ llvm::computeKnownBits(I, Known, Q, Depth);
break;
}
case Instruction::AShr: {
diff --git a/llvm/test/Transforms/InstCombine/fold-selective-shift.ll b/llvm/test/Transforms/InstCombine/fold-selective-shift.ll
new file mode 100644
index 0000000000000..2b2296541f14a
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/fold-selective-shift.ll
@@ -0,0 +1,323 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -passes=instcombine %s -S | FileCheck %s
+
+declare void @clobber.i32(i32)
+
+define i16 @selective_shift_16(i32 %mask, i16 %upper, i16 %lower) {
+; CHECK-LABEL: define i16 @selective_shift_16(
+; CHECK-SAME: i32 [[MASK:%.*]], i16 [[UPPER:%.*]], i16 [[LOWER:%.*]]) {
+; CHECK-NEXT: [[MASK_BIT:%.*]] = and i32 [[MASK]], 16
+; CHECK-NEXT: [[MASK_BIT_Z:%.*]] = icmp eq i32 [[MASK_BIT]], 0
+; CHECK-NEXT: [[SEL_V:%.*]] = select i1 [[MASK_BIT_Z]], i16 [[LOWER]], i16 [[UPPER]]
+; CHECK-NEXT: ret i16 [[SEL_V]]
+;
+ %upper.zext = zext i16 %upper to i32
+ %upper.shl = shl nuw i32 %upper.zext, 16
+ %lower.zext = zext i16 %lower to i32
+ %pack = or disjoint i32 %upper.shl, %lower.zext
+ %mask.bit = and i32 %mask, 16
+ %sel = lshr i32 %pack, %mask.bit
+ %trunc = trunc i32 %sel to i16
+ ret i16 %trunc
+}
+
+define i16 @selective_shift_16.commute(i32 %mask, i16 %upper, i16 %lower) {
+; CHECK-LABEL: define i16 @selective_shift_16.commute(
+; CHECK-SAME: i32 [[MASK:%.*]], i16 [[UPPER:%.*]], i16 [[LOWER:%.*]]) {
+; CHECK-NEXT: [[MASK_BIT:%.*]] = and i32 [[MASK]], 16
+; CHECK-NEXT: [[MASK_BIT_Z:%.*]] = icmp eq i32 [[MASK_BIT]], 0
+; CHECK-NEXT: [[SEL_V:%.*]] = select i1 [[MASK_BIT_Z]], i16 [[LOWER]], i16 [[UPPER]]
+; CHECK-NEXT: ret i16 [[SEL_V]]
+;
+ %upper.zext = zext i16 %upper to i32
+ %upper.shl = shl nuw i32 %upper.zext, 16
+ %lower.zext = zext i16 %lower to i32
+ %pack = or disjoint i32 %lower.zext, %upper.shl
+ %mask.bit = and i32 %mask, 16
+ %sel = lshr i32 %pack, %mask.bit
+ %trunc = trunc i32 %sel to i16
+ ret i16 %trunc
+}
+
+define i16 @selective_shift_16.range(i32 %mask, i32 %upper, i32 range(i32 0, 65536) %lower) {
+; CHECK-LABEL: define i16 @selective_shift_16.range(
+; CHECK-SAME: i32 [[MASK:%.*]], i32 [[UPPER:%.*]], i32 range(i32 0, 65536) [[LOWER:%.*]]) {
+; CHECK-NEXT: [[MASK_BIT:%.*]] = and i32 [[MASK]], 16
+; CHECK-NEXT: [[MASK_BIT_Z:%.*]] = icmp eq i32 [[MASK_BIT]], 0
+; CHECK-NEXT: [[SEL:%.*]] = select i1 [[MASK_BIT_Z]], i32 [[LOWER]], i32 [[UPPER]]
+; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 [[SEL]] to i16
+; CHECK-NEXT: ret i16 [[TRUNC]]
+;
+ %upper.shl = shl nuw i32 %upper, 16
+ %pack = or disjoint i32 %upper.shl, %lower
+ %mask.bit = and i32 %mask, 16
+ %sel = lshr i32 %pack, %mask.bit
+ %trunc = trunc i32 %sel to i16
+ ret i16 %trunc
+}
+
+define i16 @selective_shift_16.range.commute(i32 %mask, i32 %upper, i32 range(i32 0, 65536) %lower) {
+; CHECK-LABEL: define i16 @selective_shift_16.range.commute(
+; CHECK-SAME: i32 [[MASK:%.*]], i32 [[UPPER:%.*]], i32 range(i32 0, 65536) [[LOWER:%.*]]) {
+; CHECK-NEXT: [[MASK_BIT:%.*]] = and i32 [[MASK]], 16
+; CHECK-NEXT: [[MASK_BIT_Z:%.*]] = icmp eq i32 [[MASK_BIT]], 0
+; CHECK-NEXT: [[SEL:%.*]] = select i1 [[MASK_BIT_Z]], i32 [[LOWER]], i32 [[UPPER]]
+; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 [[SEL]] to i16
+; CHECK-NEXT: ret i16 [[TRUNC]]
+;
+ %upper.shl = shl nuw i32 %upper, 16
+ %pack = or disjoint i32 %lower, %upper.shl
+ %mask.bit = and i32 %mask, 16
+ %sel = lshr i32 %pack, %mask.bit
+ %trunc = trunc i32 %sel to i16
+ ret i16 %trunc
+}
+
+define i32 @selective_shift_16.masked(i32 %mask, i16 %upper, i16 %lower) {
+; CHECK-LABEL: define i32 @selective_shift_16.masked(
+; CHECK-SAME: i32 [[MASK:%.*]], i16 [[UPPER:%.*]], i16 [[LOWER:%.*]]) {
+; CHECK-NEXT: [[MASK_BIT:%.*]] = and i32 [[MASK]], 16
+; CHECK-NEXT: [[MASK_BIT_Z:%.*]] = icmp eq i32 [[MASK_BIT]], 0
+; CHECK-NEXT: [[SEL_V:%.*]] = select i1 [[MASK_BIT_Z]], i16 [[LOWER]], i16 [[UPPER]]
+; CHECK-NEXT: [[SEL:%.*]] = zext i16 [[SEL_V]] to i32
+; CHECK-NEXT: ret i32 [[SEL]]
+;
+ %upper.zext = zext i16 %upper to i32
+ %upper.shl = shl nuw i32 %upper.zext, 16
+ %lower.zext = zext i16 %lower to i32
+ %pack = or disjoint i32 %lower.zext, %upper.shl
+ %mask.bit = and i32 %mask, 16
+ %sel = lshr i32 %pack, %mask.bit
+ %sel.masked = and i32 %sel, 65535
+ ret i32 %sel.masked
+}
+
+define i32 @selective_shift_16.masked.commute(i32 %mask, i16 %upper, i16 %lower) {
+; CHECK-LABEL: define i32 @selective_shift_16.masked.commute(
+; CHECK-SAME: i32 [[MASK:%.*]], i16 [[UPPER:%.*]], i16 [[LOWER:%.*]]) {
+; CHECK-NEXT: [[MASK_BIT:%.*]] = and i32 [[MASK]], 16
+; CHECK-NEXT: [[MASK_BIT_Z:%.*]] = icmp eq i32 [[MASK_BIT]], 0
+; CHECK-NEXT: [[SEL_V:%.*]] = select i1 [[MASK_BIT_Z]], i16 [[LOWER]], i16 [[UPPER]]
+; CHECK-NEXT: [[SEL:%.*]] = zext i16 [[SEL_V]] to i32
+; CHECK-NEXT: ret i32 [[SEL]]
+;
+ %upper.zext = zext i16 %upper to i32
+ %upper.shl = shl nuw i32 %upper.zext, 16
+ %lower.zext = zext i16 %lower to i32
+ %pack = or disjoint i32 %upper.shl, %lower.zext
+ %mask.bit = and i32 %mask, 16
+ %sel = lshr i32 %pack, %mask.bit
+ %sel.masked = and i32 %sel, 65535
+ ret i32 %sel.masked
+}
+
+define <2 x i16> @selective_shift.v16(<2 x i32> %mask, <2 x i16> %upper, <2 x i16> %lower) {
+; CHECK-LABEL: define <2 x i16> @selective_shift.v16(
+; CHECK-SAME: <2 x i32> [[MASK:%.*]], <2 x i16> [[UPPER:%.*]], <2 x i16> [[LOWER:%.*]]) {
+; CHECK-NEXT: [[MASK_BIT:%.*]] = and <2 x i32> [[MASK]], splat (i32 16)
+; CHECK-NEXT: [[MASK_BIT_Z:%.*]] = icmp eq <2 x i32> [[MASK_BIT]], zeroinitializer
+; CHECK-NEXT: [[SEL_V:%.*]] = select <2 x i1> [[MASK_BIT_Z]], <2 x i16> [[LOWER]], <2 x i16> [[UPPER]]
+; CHECK-NEXT: ret <2 x i16> [[SEL_V]]
+;
+ %upper.zext = zext <2 x i16> %upper to <2 x i32>
+ %upper.shl = shl nuw <2 x i32> %upper.zext, splat(i32 16)
+ %lower.zext = zext <2 x i16> %lower to <2 x i32>
+ %pack = or disjoint <2 x i32> %upper.shl, %lower.zext
+ %mask.bit = and <2 x i32> %mask, splat(i32 16)
+ %sel = lshr <2 x i32> %pack, %mask.bit
+ %trunc = trunc <2 x i32> %sel to <2 x i16>
+ ret <2 x i16> %trunc
+}
+
+define i16 @selective_shift_16.wide(i64 %mask, i16 %upper, i16 %lower) {
+; CHECK-LABEL: define i16 @selective_shift_16.wide(
+; CHECK-SAME: i64 [[MASK:%.*]], i16 [[UPPER:%.*]], i16 [[LOWER:%.*]]) {
+; CHECK-NEXT: [[MASK_BIT:%.*]] = and i64 [[MASK]], 16
+; CHECK-NEXT: [[MASK_BIT_Z:%.*]] = icmp eq i64 [[MASK_BIT]], 0
+; CHECK-NEXT: [[SEL_V:%.*]] = select i1 [[MASK_BIT_Z]], i16 [[LOWER]], i16 [[UPPER]]
+; CHECK-NEXT: ret i16 [[SEL_V]]
+;
+ %upper.zext = zext i16 %upper to i64
+ %upper.shl = shl nuw i64 %upper.zext, 16
+ %lower.zext = zext i16 %lower to i64
+ %pack = or disjoint i64 %upper.shl, %lower.zext
+ %mask.bit = and i64 %mask, 16
+ %sel = lshr i64 %pack, %mask.bit
+ %trunc = trunc i64 %sel to i16
+ ret i16 %trunc
+}
+
+; narrow zext type blocks fold
+define i16 @selective_shift_16.narrow(i24 %mask, i16 %upper, i16 %lower) {
+; CHECK-LABEL: define i16 @selective_shift_16.narrow(
+; CHECK-SAME: i24 [[MASK:%.*]], i16 [[UPPER:%.*]], i16 [[LOWER:%.*]]) {
+; CHECK-NEXT: [[UPPER_ZEXT:%.*]] = zext i16 [[UPPER]] to i24
+; CHECK-NEXT: [[UPPER_SHL:%.*]] = shl i24 [[UPPER_ZEXT]], 16
+; CHECK-NEXT: [[LOWER_ZEXT:%.*]] = zext i16 [[LOWER]] to i24
+; CHECK-NEXT: [[PACK:%.*]] = or disjoint i24 [[UPPER_SHL]], [[LOWER_ZEXT]]
+; CHECK-NEXT: [[MASK_BIT:%.*]] = and i24 [[MASK]], 16
+; CHECK-NEXT: [[SEL:%.*]] = lshr i24 [[PACK]], [[MASK_BIT]]
+; CHECK-NEXT: [[TRUNC:%.*]] = trunc i24 [[SEL]] to i16
+; CHECK-NEXT: ret i16 [[TRUNC]]
+;
+ %upper.zext = zext i16 %upper to i24
+ %upper.shl = shl i24 %upper.zext, 16
+ %lower.zext = zext i16 %lower to i24
+ %pack = or disjoint i24 %upper.shl, %lower.zext
+ %mask.bit = and i24 %mask, 16
+ %sel = lshr i24 %pack, %mask.bit
+ %trunc = trunc i24 %sel to i16
+ ret i16 %trunc
+}
+
+; %lower's upper bits block fold
+define i16 @selective_shift_16_norange(i32 %mask, i32 %upper, i32 %lower) {
+; CHECK-LABEL: define i16 @selective_shift_16_norange(
+; CHECK-SAME: i32 [[MASK:%.*]], i32 [[UPPER:%.*]], i32 [[LOWER:%.*]]) {
+; CHECK-NEXT: [[UPPER_SHL:%.*]] = shl nuw i32 [[UPPER]], 16
+; CHECK-NEXT: [[PACK:%.*]] = or i32 [[UPPER_SHL]], [[LOWER]]
+; CHECK-NEXT: [[MASK_BIT:%.*]] = and i32 [[MASK]], 16
+; CHECK-NEXT: [[SEL:%.*]] = lshr i32 [[PACK]], [[MASK_BIT]]
+; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 [[SEL]] to i16
+; CHECK-NEXT: ret i16 [[TRUNC]]
+;
+ %upper.shl = shl nuw i32 %upper, 16
+ %pack = or i32 %upper.shl, %lower
+ %mask.bit = and i32 %mask, 16
+ %sel = lshr i32 %pack, %mask.bit
+ %trunc = trunc i32 %sel to i16
+ ret i16 %trunc
+}
+
+define i16 @selective_shift_16.mu.0(i32 %mask, i16 %upper, i16 %lower) {
+; CHECK-LABEL: define i16 @selective_shift_16.mu.0(
+; CHECK-SAME: i32 [[MASK:%.*]], i16 [[UPPER:%.*]], i16 [[LOWER:%.*]]) {
+; CHECK-NEXT: [[UPPER_ZEXT:%.*]] = zext i16 [[UPPER]] to i32
+; CHECK-NEXT: call void @clobber.i32(i32 [[UPPER_ZEXT]])
+; CHECK-NEXT: [[LOWER_ZEXT:%.*]] = zext i16 [[LOWER]] to i32
+; CHECK-NEXT: call void @clobber.i32(i32 [[LOWER_ZEXT]])
+; CHECK-NEXT: [[MASK_BIT:%.*]] = and i32 [[MASK]], 16
+; CHECK-NEXT: [[MASK_BIT_Z:%.*]] = icmp eq i32 [[MASK_BIT]], 0
+; CHECK-NEXT: [[TRUNC:%.*]] = select i1 [[MASK_BIT_Z]], i16 [[LOWER]], i16 [[UPPER]]
+; CHECK-NEXT: ret i16 [[TRUNC]]
+;
+ %upper.zext = zext i16 %upper to i32
+ call void @clobber.i32(i32 %upper.zext)
+ %upper.shl = shl nuw i32 %upper.zext, 16
+ %lower.zext = zext i16 %lower to i32
+ call void @clobber.i32(i32 %lower.zext)
+ %pack = or disjoint i32 %upper.shl, %lower.zext
+ %mask.bit = and i32 %mask, 16
+ %sel = lshr i32 %pack, %mask.bit
+ %trunc = trunc i32 %sel to i16
+ ret i16 %trunc
+}
+
+; multi-use of %pack blocks fold
+define i16 @selective_shift_16.mu.1(i32 %mask, i16 %upper, i16 %lower) {
+; CHECK-LABEL: define i16 @selective_shift_16.mu.1(
+; CHECK-SAME: i32 [[MASK:%.*]], i16 [[UPPER:%.*]], i16 [[LOWER:%.*]]) {
+; CHECK-NEXT: [[UPPER_ZEXT:%.*]] = zext i16 [[UPPER]] to i32
+; CHECK-NEXT: [[UPPER_SHL:%.*]] = shl nuw i32 [[UPPER_ZEXT]], 16
+; CHECK-NEXT: [[LOWER_ZEXT:%.*]] = zext i16 [[LOWER]] to i32
+; CHECK-NEXT: [[PACK:%.*]] = or disjoint i32 [[UPPER_SHL]], [[LOWER_ZEXT]]
+; CHECK-NEXT: call void @clobber.i32(i32 [[PACK]])
+; CHECK-NEXT: [[MASK_BIT:%.*]] = and i32 [[MASK]], 16
+; CHECK-NEXT: [[SEL:%.*]] = lshr i32 [[PACK]], [[MASK_BIT]]
+; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 [[SEL]] to i16
+; CHECK-NEXT: ret i16 [[TRUNC]]
+;
+ %upper.zext = zext i16 %upper to i32
+ %upper.shl = shl nuw i32 %upper.zext, 16
+ %lower.zext = zext i16 %lower to i32
+ %pack = or disjoint i32 %upper.shl, %lower.zext
+ call void @clobber.i32(i32 %pack)
+ %mask.bit = and i32 %mask, 16
+ %sel = lshr i32 %pack, %mask.bit
+ %trunc = trunc i32 %sel to i16
+ ret i16 %trunc
+}
+
+; non-truncated use of %sel blocks fold
+define i16 @selective_shift_16.mu.2(i32 %mask, i16 %upper, i16 %lower) {
+; CHECK-LABEL: define i16 @selective_shift_16.mu.2(
+; CHECK-SAME: i32 [[MASK:%.*]], i16 [[UPPER:%.*]], i16 [[LOWER:%.*]]) {
+; CHECK-NEXT: [[UPPER_ZEXT:%.*]] = zext i16 [[UPPER]] to i32
+; CHECK-NEXT: [[UPPER_SHL:%.*]] = shl nuw i32 [[UPPER_ZEXT]], 16
+; CHECK-NEXT: [[LOWER_ZEXT:%.*]] = zext i16 [[LOWER]] to i32
+; CHECK-NEXT: [[PACK:%.*]] = or disjoint i32 [[UPPER_SHL]], [[LOWER_ZEXT]]
+; CHECK-NEXT: [[MASK_BIT:%.*]] = and i32 [[MASK]], 16
+; CHECK-NEXT: [[SEL:%.*]] = lshr i32 [[PACK]], [[MASK_BIT]]
+; CHECK-NEXT: call void @clobber.i32(i32 [[SEL]])
+; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 [[SEL]] to i16
+; CHECK-NEXT: ret i16 [[TRUNC]]
+;
+ %upper.zext = zext i16 %upper to i32
+ %upper.shl = shl nuw i32 %upper.zext, 16
+ %lower.zext = zext i16 %lower to i32
+ %pack = or disjoint i32 %upper.shl, %lower.zext
+ %mask.bit = and i32 %mask, 16
+ %sel = lshr i32 %pack, %mask.bit
+ call void @clobber.i32(i32 %sel)
+ %trunc = trunc i32 %sel to i16
+ ret i16 %trunc
+}
+
+; bitwidth must be a power of 2 to fold
+define i24 @selective_shift_24(i48 %mask, i24 %upper, i24 %lower) {
+; CHECK-LABEL: define i24 @selective_shift_24(
+; CHECK-SAME: i48 [[MASK:%.*]], i24 [[UPPER:%.*]], i24 [[LOWER:%.*]]) {
+; CHECK-NEXT: [[UPPER_ZEXT:%.*]] = zext i24 [[UPPER]] to i48
+; CHECK-NEXT: [[UPPER_SHL:%.*]] = shl nuw i48 [[UPPER_ZEXT]], 24
+; CHECK-NEXT: [[LOWER_ZEXT:%.*]] = zext i24 [[LOWER]] to i48
+; CHECK-NEXT: [[PACK:%.*]] = or disjoint i48 [[UPPER_SHL]], [[LOWER_ZEXT]]
+; CHECK-NEXT: [[MASK_BIT:%.*]] = and i48 [[MASK]], 24
+; CHECK-NEXT: [[SEL:%.*]] = lshr i48 [[PACK]], [[MASK_BIT]]
+; CHECK-NEXT: [[TRUNC:%.*]] = trunc i48 [[SEL]] to i24
+; CHECK-NEXT: ret i24 [[TRUNC]]
+;
+ %upper.zext = zext i24 %upper to i48
+ %upper.shl = shl nuw i48 %upper.zext, 24
+ %lower.zext = zext i24 %lower to i48
+ %pack = or disjoint i48 %upper.shl, %lower.zext
+ %mask.bit = and i48 %mask, 24
+ %sel = lshr i48 %pack, %mask.bit
+ %trunc = trunc i48 %sel to i24
+ ret i24 %trunc
+}
+
+define i32 @selective_shift_32(i64 %mask, i32 %upper, i32 %lower) {
+; CHECK-LABEL: define i32 @selective_shift_32(
+; CHECK-SAME: i64 [[MASK:%.*]], i32 [[UPPER:%.*]], i32 [[LOWER:%.*]]) {
+; CHECK-NEXT: [[MASK_BIT:%.*]] = and i64 [[MASK]], 32
+; CHECK-NEXT: [[MASK_BIT_Z:%.*]] = icmp eq i64 [[MASK_BIT]], 0
+; CHECK-NEXT: [[SEL_V:%.*]] = select i1 [[MASK_BIT_Z]], i32 [[LOWER]], i32 [[UPPER]]
+; CHECK-NEXT: ret i32 [[SEL_V]]
+;
+ %upper.zext = zext i32 %upper to i64
+ %upper.shl = shl nuw i64 %upper.zext, 32
+ %lower.zext = zext i32 %lower to i64
+ %pack = or disjoint i64 %upper.shl, %lower.zext
+ %mask.bit = and i64 %mask, 32
+ %sel = lshr i64 %pack, %mask.bit
+ %trunc = trunc i64 %sel to i32
+ ret i32 %trunc
+}
+
+define i32 @selective_shift_32.commute(i64 %mask, i32 %upper, i32 %lower) {
+; CHECK-LABEL: define i32 @selective_shift_32.commute(
+; CHECK-SAME: i64 [[MASK:%.*]], i32 [[UPPER:%.*]], i32 [[LOWER:%.*]]) {
+; CHECK-NEXT: [[MASK_BIT:%.*]] = and i64 [[MASK]], 32
+; CHECK-NEXT: [[MASK_BIT_Z:%.*]] = icmp eq i64 [[MASK_BIT]], 0
+; CHECK-NEXT: [[SEL_V:%.*]] = select i1 [[MASK_BIT_Z]], i32 [[LOWER]], i32 [[UPPER]]
+; CHECK-NEXT: ret i32 [[SEL_V]]
+;
+ %upper.zext = zext i32 %upper to i64
+ %upper.shl = shl nuw i64 %upper.zext, 32
+ %lower.zext = zext i32 %lower to i64
+ %pack = or disjoint i64 %lower.zext, %upper.shl
+ %mask.bit = and i64 %mask, 32
+ %sel = lshr i64 %pack, %mask.bit
+ %trunc = trunc i64 %sel to i32
+ ret i32 %trunc
+}
More information about the llvm-commits
mailing list