[llvm] EnableOptimizeLogicalImm for ARM (PR #165177)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Oct 26 18:21:23 PDT 2025
https://github.com/AZero13 updated https://github.com/llvm/llvm-project/pull/165177
>From 09b7784be3957a35fb141e047910940c108d18c9 Mon Sep 17 00:00:00 2001
From: AZero13 <gfunni234 at gmail.com>
Date: Sat, 25 Oct 2025 12:23:52 -0400
Subject: [PATCH] Update ARMISelLowering.cpp
---
llvm/lib/Target/ARM/ARMISelLowering.cpp | 218 +++++++++++++-----
llvm/test/CodeGen/ARM/funnel-shift-rot.ll | 5 +-
...st-and-by-const-from-lshr-in-eqcmp-zero.ll | 56 +++--
llvm/test/CodeGen/ARM/sdiv-pow2-thumb-size.ll | 153 ++++++++----
llvm/test/CodeGen/Thumb/bic_imm.ll | 33 ++-
5 files changed, 338 insertions(+), 127 deletions(-)
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 313ae3d68fb83..4e9453c609ad7 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -118,6 +118,7 @@ using namespace llvm;
#define DEBUG_TYPE "arm-isel"
STATISTIC(NumTailCalls, "Number of tail calls");
+STATISTIC(NumOptimizedImms, "Number of times immediates were optimized");
STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt");
STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments");
STATISTIC(NumConstpoolPromoted,
@@ -128,6 +129,12 @@ ARMInterworking("arm-interworking", cl::Hidden,
cl::desc("Enable / disable ARM interworking (for debugging only)"),
cl::init(true));
+static cl::opt<bool>
+EnableOptimizeLogicalImm("arm-enable-logical-imm", cl::Hidden,
+ cl::desc("Enable ARM logical imm instruction "
+ "optimization"),
+ cl::init(true));
+
static cl::opt<bool> EnableConstpoolPromotion(
"arm-promote-constant", cl::Hidden,
cl::desc("Enable / disable promotion of unnamed_addr constants into "
@@ -20138,6 +20145,109 @@ void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
}
}
+static bool isLegalLogicalImmediate(unsigned Imm, const ARMSubtarget *Subtarget) {
+ // Handle special cases first
+ if (!Subtarget->isThumb())
+ return ARM_AM::getSOImmVal(Imm) != -1;
+ if (Subtarget->isThumb2())
+ return ARM_AM::getT2SOImmVal(Imm) != -1;
+ // Thumb1 only has 8-bit unsigned immediate.
+ return Imm <= 255;
+}
+
+static bool optimizeLogicalImm(SDValue Op, unsigned Imm, const APInt &Demanded,
+ TargetLowering::TargetLoweringOpt &TLO,
+ unsigned NewOpc, const ARMSubtarget *Subtarget) {
+ unsigned OldImm = Imm, NewImm;
+
+ // Return if the immediate is already all zeros, all ones, a bimm32.
+ if (Imm == 0 || Imm == ~0U || isLegalLogicalImmediate(Imm, Subtarget))
+ return false;
+
+ // bic/orn/eon
+ if ((Op.getOpcode() == ISD::AND || (Subtarget->isThumb2() && Op.getOpcode() == ISD::OR)) && isLegalLogicalImmediate(~Imm, Subtarget))
+ return false;
+
+ unsigned DemandedBits = Demanded.getZExtValue();
+
+ // Clear bits that are not demanded.
+ Imm &= DemandedBits;
+
+ // Try to extend the immediate to a legal ARM rotating immediate
+ // by filling in non-demanded bits. ARM supports:
+ // - An 8-bit value rotated by an even number of bits (0, 2, 4, 6, ..., 30)
+ // - Any 8-bit immediate (Thumb2 also supports 16-bit splat patterns)
+ unsigned NonDemandedBits = ~DemandedBits;
+
+ // Try filling with 0
+ NewImm = Imm & DemandedBits;
+ if (isLegalLogicalImmediate(NewImm, Subtarget) ||
+ ((Op.getOpcode() == ISD::AND ||
+ (Subtarget->isThumb2() && Op.getOpcode() == ISD::OR)) &&
+ isLegalLogicalImmediate(~NewImm, Subtarget))) {
+ ++NumOptimizedImms;
+ } else {
+ // Try filling with 1
+ NewImm = Imm | NonDemandedBits;
+ if (isLegalLogicalImmediate(NewImm, Subtarget) ||
+ ((Op.getOpcode() == ISD::AND ||
+ (Subtarget->isThumb2() && Op.getOpcode() == ISD::OR)) &&
+ isLegalLogicalImmediate(~NewImm, Subtarget))) {
+ ++NumOptimizedImms;
+ } else {
+ return false;
+ }
+ }
+
+ (void)OldImm;
+ assert(((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 &&
+ "demanded bits should never be altered");
+ assert(OldImm != NewImm && "the new imm shouldn't be equal to the old imm");
+
+ // Create the new constant immediate node.
+ EVT VT = Op.getValueType();
+ SDLoc DL(Op);
+ SDValue New;
+
+ // If the new constant immediate is all-zeros or all-ones, let the target
+ // independent DAG combine optimize this node.
+ if (NewImm == 0 || NewImm == ~0U) {
+ New = TLO.DAG.getNode(Op.getOpcode(), DL, VT, Op.getOperand(0),
+ TLO.DAG.getConstant(NewImm, DL, VT));
+ // Otherwise, create a machine node so that target independent DAG combine
+ // doesn't undo this optimization.
+ } else {
+ // bic/orn/eon
+ if (isLegalLogicalImmediate(NewImm, Subtarget)) {
+ SDValue EncConst = TLO.DAG.getTargetConstant(NewImm, DL, VT);
+ New = SDValue(
+ TLO.DAG.getMachineNode(NewOpc, DL, VT, Op.getOperand(0), EncConst),
+ 0);
+ } else if ((Op.getOpcode() == ISD::AND ||
+ (Subtarget->isThumb2() && Op.getOpcode() == ISD::OR)) &&
+ isLegalLogicalImmediate(~NewImm, Subtarget)) {
+
+ if (Op.getOpcode() == ISD::OR) {
+ // ORN
+ NewOpc = ARM::t2ORNri;
+ } else {
+ // AND -> BIC
+ NewOpc = Subtarget->isThumb()
+ ? Subtarget->isThumb2() ? ARM::t2BICri : ARM::tBIC
+ : ARM::BICri;
+ }
+ SDValue EncConst = TLO.DAG.getTargetConstant(~NewImm, DL, VT);
+ New = SDValue(
+ TLO.DAG.getMachineNode(NewOpc, DL, VT, Op.getOperand(0), EncConst),
+ 0);
+ } else {
+ return false;
+ }
+ }
+
+ return TLO.CombineTo(Op, New);
+}
+
bool ARMTargetLowering::targetShrinkDemandedConstant(
SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
TargetLoweringOpt &TLO) const {
@@ -20146,18 +20256,19 @@ bool ARMTargetLowering::targetShrinkDemandedConstant(
if (!TLO.LegalOps)
return false;
- // Only optimize AND for now.
- if (Op.getOpcode() != ISD::AND)
+ if (!EnableOptimizeLogicalImm)
return false;
EVT VT = Op.getValueType();
-
- // Ignore vectors.
if (VT.isVector())
return false;
assert(VT == MVT::i32 && "Unexpected integer type");
+ // Exit early if we demand all bits.
+ if (DemandedBits.popcount() == 32)
+ return false;
+
// Make sure the RHS really is a constant.
ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
if (!C)
@@ -20165,59 +20276,62 @@ bool ARMTargetLowering::targetShrinkDemandedConstant(
unsigned Mask = C->getZExtValue();
- unsigned Demanded = DemandedBits.getZExtValue();
- unsigned ShrunkMask = Mask & Demanded;
- unsigned ExpandedMask = Mask | ~Demanded;
-
- // If the mask is all zeros, let the target-independent code replace the
- // result with zero.
- if (ShrunkMask == 0)
- return false;
-
- // If the mask is all ones, erase the AND. (Currently, the target-independent
- // code won't do this, so we have to do it explicitly to avoid an infinite
- // loop in obscure cases.)
- if (ExpandedMask == ~0U)
- return TLO.CombineTo(Op, Op.getOperand(0));
-
- auto IsLegalMask = [ShrunkMask, ExpandedMask](unsigned Mask) -> bool {
- return (ShrunkMask & Mask) == ShrunkMask && (~ExpandedMask & Mask) == 0;
- };
- auto UseMask = [Mask, Op, VT, &TLO](unsigned NewMask) -> bool {
- if (NewMask == Mask)
- return true;
- SDLoc DL(Op);
- SDValue NewC = TLO.DAG.getConstant(NewMask, DL, VT);
- SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC);
- return TLO.CombineTo(Op, NewOp);
- };
-
- // Prefer uxtb mask.
- if (IsLegalMask(0xFF))
- return UseMask(0xFF);
+ // If thumb, check for uxth and uxtb masks.
+ if (Subtarget->isThumb1Only() && Op.getOpcode() == ISD::AND) {
+ unsigned Demanded = DemandedBits.getZExtValue();
+ unsigned ShrunkMask = Mask & Demanded;
+ unsigned ExpandedMask = Mask | ~Demanded;
- // Prefer uxth mask.
- if (IsLegalMask(0xFFFF))
- return UseMask(0xFFFF);
+ // If the mask is all zeros, let the target-independent code replace the
+ // result with zero.
+ if (ShrunkMask == 0)
+ return false;
- // [1, 255] is Thumb1 movs+ands, legal immediate for ARM/Thumb2.
- // FIXME: Prefer a contiguous sequence of bits for other optimizations.
- if (ShrunkMask < 256)
- return UseMask(ShrunkMask);
+ // If the mask is all ones, erase the AND. (Currently, the
+ // target-independent code won't do this, so we have to do it explicitly to
+ // avoid an infinite loop in obscure cases.)
+ if (ExpandedMask == ~0U)
+ return TLO.CombineTo(Op, Op.getOperand(0));
+ auto IsLegalMask = [ShrunkMask, ExpandedMask](unsigned Mask) -> bool {
+ return (ShrunkMask & Mask) == ShrunkMask && (~ExpandedMask & Mask) == 0;
+ };
+ auto UseMask = [Mask, Op, VT, &TLO](unsigned NewMask) -> bool {
+ if (NewMask == Mask)
+ return true;
+ SDLoc DL(Op);
+ SDValue NewC = TLO.DAG.getConstant(NewMask, DL, VT);
+ SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC);
+ return TLO.CombineTo(Op, NewOp);
+ };
- // [-256, -2] is Thumb1 movs+bics, legal immediate for ARM/Thumb2.
- // FIXME: Prefer a contiguous sequence of bits for other optimizations.
- if ((int)ExpandedMask <= -2 && (int)ExpandedMask >= -256)
- return UseMask(ExpandedMask);
+ if (IsLegalMask(0xFF))
+ return UseMask(0xFF);
+ if (IsLegalMask(0xFFFF))
+ return UseMask(0xFFFF);
+ }
- // Potential improvements:
- //
- // We could try to recognize lsls+lsrs or lsrs+lsls pairs here.
- // We could try to prefer Thumb1 immediates which can be lowered to a
- // two-instruction sequence.
- // We could try to recognize more legal ARM/Thumb2 immediates here.
+ unsigned NewOpc;
+ switch (Op.getOpcode()) {
+ default:
+ return false;
+ case ISD::AND:
+ NewOpc = Subtarget->isThumb()
+ ? Subtarget->isThumb2() ? ARM::t2ANDri : ARM::tAND
+ : ARM::ANDri;
+ break;
+ case ISD::OR:
+ NewOpc = Subtarget->isThumb()
+ ? Subtarget->isThumb2() ? ARM::t2ORRri : ARM::tORR
+ : ARM::ORRri;
+ break;
+ case ISD::XOR:
+ NewOpc = Subtarget->isThumb()
+ ? Subtarget->isThumb2() ? ARM::t2EORri : ARM::tEOR
+ : ARM::EORri;
+ break;
+ }
- return false;
+ return optimizeLogicalImm(Op, Mask, DemandedBits, TLO, NewOpc, Subtarget);
}
bool ARMTargetLowering::SimplifyDemandedBitsForTargetNode(
diff --git a/llvm/test/CodeGen/ARM/funnel-shift-rot.ll b/llvm/test/CodeGen/ARM/funnel-shift-rot.ll
index a1b6847d623d0..6f34a5fd00314 100644
--- a/llvm/test/CodeGen/ARM/funnel-shift-rot.ll
+++ b/llvm/test/CodeGen/ARM/funnel-shift-rot.ll
@@ -19,7 +19,7 @@ declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
define i8 @rotl_i8_const_shift(i8 %x) {
; CHECK-LABEL: rotl_i8_const_shift:
; CHECK: @ %bb.0:
-; CHECK-NEXT: uxtb r1, r0
+; CHECK-NEXT: and r1, r0, #224
; CHECK-NEXT: lsl r0, r0, #3
; CHECK-NEXT: orr r0, r0, r1, lsr #5
; CHECK-NEXT: bx lr
@@ -161,8 +161,7 @@ define <4 x i32> @rotl_v4i32_rotl_const_shift(<4 x i32> %x) {
define i8 @rotr_i8_const_shift(i8 %x) {
; CHECK-LABEL: rotr_i8_const_shift:
; CHECK: @ %bb.0:
-; CHECK-NEXT: uxtb r1, r0
-; CHECK-NEXT: lsr r1, r1, #3
+; CHECK-NEXT: ubfx r1, r0, #3, #5
; CHECK-NEXT: orr r0, r1, r0, lsl #5
; CHECK-NEXT: bx lr
%f = call i8 @llvm.fshr.i8(i8 %x, i8 %x, i8 3)
diff --git a/llvm/test/CodeGen/ARM/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll b/llvm/test/CodeGen/ARM/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll
index 7cc623fb0a616..a21ac8944d7ad 100644
--- a/llvm/test/CodeGen/ARM/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll
+++ b/llvm/test/CodeGen/ARM/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll
@@ -21,9 +21,9 @@ define i1 @scalar_i8_signbit_eq(i8 %x, i8 %y) nounwind {
; ARM-LABEL: scalar_i8_signbit_eq:
; ARM: @ %bb.0:
; ARM-NEXT: uxtb r1, r1
-; ARM-NEXT: lsl r0, r0, r1
+; ARM-NEXT: mov r2, #128
+; ARM-NEXT: and r0, r2, r0, lsl r1
; ARM-NEXT: mov r1, #1
-; ARM-NEXT: uxtb r0, r0
; ARM-NEXT: eor r0, r1, r0, lsr #7
; ARM-NEXT: bx lr
;
@@ -42,7 +42,7 @@ define i1 @scalar_i8_signbit_eq(i8 %x, i8 %y) nounwind {
; THUMB78-NEXT: uxtb r1, r1
; THUMB78-NEXT: lsls r0, r1
; THUMB78-NEXT: movs r1, #1
-; THUMB78-NEXT: uxtb r0, r0
+; THUMB78-NEXT: and r0, r0, #128
; THUMB78-NEXT: eor.w r0, r1, r0, lsr #7
; THUMB78-NEXT: bx lr
%t0 = lshr i8 128, %y
@@ -122,9 +122,9 @@ define i1 @scalar_i16_signbit_eq(i16 %x, i16 %y) nounwind {
; ARM-LABEL: scalar_i16_signbit_eq:
; ARM: @ %bb.0:
; ARM-NEXT: uxth r1, r1
-; ARM-NEXT: lsl r0, r0, r1
+; ARM-NEXT: mov r2, #32768
+; ARM-NEXT: and r0, r2, r0, lsl r1
; ARM-NEXT: mov r1, #1
-; ARM-NEXT: uxth r0, r0
; ARM-NEXT: eor r0, r1, r0, lsr #15
; ARM-NEXT: bx lr
;
@@ -144,7 +144,7 @@ define i1 @scalar_i16_signbit_eq(i16 %x, i16 %y) nounwind {
; THUMB78-NEXT: uxth r1, r1
; THUMB78-NEXT: lsls r0, r1
; THUMB78-NEXT: movs r1, #1
-; THUMB78-NEXT: uxth r0, r0
+; THUMB78-NEXT: and r0, r0, #32768
; THUMB78-NEXT: eor.w r0, r1, r0, lsr #15
; THUMB78-NEXT: bx lr
%t0 = lshr i16 32768, %y
@@ -862,21 +862,35 @@ define <4 x i1> @vec_4xi32_nonsplat_undef2_eq(<4 x i32> %x, <4 x i32> %y) nounwi
;------------------------------------------------------------------------------;
define i1 @scalar_i8_signbit_ne(i8 %x, i8 %y) nounwind {
-; ARM-LABEL: scalar_i8_signbit_ne:
-; ARM: @ %bb.0:
-; ARM-NEXT: uxtb r1, r1
-; ARM-NEXT: lsl r0, r0, r1
-; ARM-NEXT: uxtb r0, r0
-; ARM-NEXT: lsr r0, r0, #7
-; ARM-NEXT: bx lr
+; ARM6-LABEL: scalar_i8_signbit_ne:
+; ARM6: @ %bb.0:
+; ARM6-NEXT: uxtb r1, r1
+; ARM6-NEXT: mov r2, #128
+; ARM6-NEXT: and r0, r2, r0, lsl r1
+; ARM6-NEXT: lsr r0, r0, #7
+; ARM6-NEXT: bx lr
;
-; THUMB-LABEL: scalar_i8_signbit_ne:
-; THUMB: @ %bb.0:
-; THUMB-NEXT: uxtb r1, r1
-; THUMB-NEXT: lsls r0, r1
-; THUMB-NEXT: uxtb r0, r0
-; THUMB-NEXT: lsrs r0, r0, #7
-; THUMB-NEXT: bx lr
+; ARM78-LABEL: scalar_i8_signbit_ne:
+; ARM78: @ %bb.0:
+; ARM78-NEXT: uxtb r1, r1
+; ARM78-NEXT: lsl r0, r0, r1
+; ARM78-NEXT: ubfx r0, r0, #7, #1
+; ARM78-NEXT: bx lr
+;
+; THUMB6-LABEL: scalar_i8_signbit_ne:
+; THUMB6: @ %bb.0:
+; THUMB6-NEXT: uxtb r1, r1
+; THUMB6-NEXT: lsls r0, r1
+; THUMB6-NEXT: uxtb r0, r0
+; THUMB6-NEXT: lsrs r0, r0, #7
+; THUMB6-NEXT: bx lr
+;
+; THUMB78-LABEL: scalar_i8_signbit_ne:
+; THUMB78: @ %bb.0:
+; THUMB78-NEXT: uxtb r1, r1
+; THUMB78-NEXT: lsls r0, r1
+; THUMB78-NEXT: ubfx r0, r0, #7, #1
+; THUMB78-NEXT: bx lr
%t0 = lshr i8 128, %y
%t1 = and i8 %t0, %x
%res = icmp ne i8 %t1, 0 ; we are perfectly happy with 'ne' predicate
@@ -1051,3 +1065,5 @@ define i1 @scalar_i8_signbit_eq_with_nonzero(i8 %x, i8 %y) nounwind {
%res = icmp eq i8 %t1, 1 ; should be comparing with 0
ret i1 %res
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; THUMB: {{.*}}
diff --git a/llvm/test/CodeGen/ARM/sdiv-pow2-thumb-size.ll b/llvm/test/CodeGen/ARM/sdiv-pow2-thumb-size.ll
index 4b0419577cdf0..7a93267fcc390 100644
--- a/llvm/test/CodeGen/ARM/sdiv-pow2-thumb-size.ll
+++ b/llvm/test/CodeGen/ARM/sdiv-pow2-thumb-size.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
; RUN: llc -mtriple=thumbv8 %s -o - | FileCheck %s --check-prefixes=CHECK,T2
; RUN: llc -mtriple=thumbv8m.main %s -o - | FileCheck %s --check-prefixes=CHECK,T2
; RUN: llc -mtriple=thumbv8m.base %s -o - | FileCheck %s --check-prefixes=CHECK,T1
@@ -13,11 +14,21 @@
; Test sdiv i16
define dso_local signext i16 @f0(i16 signext %F) local_unnamed_addr #0 {
-; CHECK-LABEL: f0
-; CHECK: movs r1, #2
-; CHECK-NEXT: sdiv r0, r0, r1
-; CHECK-NEXT: sxth r0, r0
-; CHECK-NEXT: bx lr
+; CHECK-LABEL: f0:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: movs r1, #2
+; CHECK-NEXT: sdiv r0, r0, r1
+; CHECK-NEXT: sxth r0, r0
+; CHECK-NEXT: bx lr
+;
+; V6M-LABEL: f0:
+; V6M: @ %bb.0: @ %entry
+; V6M-NEXT: uxth r1, r0
+; V6M-NEXT: lsrs r1, r1, #15
+; V6M-NEXT: adds r0, r0, r1
+; V6M-NEXT: sxth r0, r0
+; V6M-NEXT: asrs r0, r0, #1
+; V6M-NEXT: bx lr
entry:
%0 = sdiv i16 %F, 2
@@ -26,10 +37,19 @@ entry:
; Same as above, but now with i32
define dso_local i32 @f1(i32 %F) local_unnamed_addr #0 {
-; CHECK-LABEL: f1
-; CHECK: movs r1, #4
-; CHECK-NEXT: sdiv r0, r0, r1
-; CHECK-NEXT: bx lr
+; CHECK-LABEL: f1:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: movs r1, #4
+; CHECK-NEXT: sdiv r0, r0, r1
+; CHECK-NEXT: bx lr
+;
+; V6M-LABEL: f1:
+; V6M: @ %bb.0: @ %entry
+; V6M-NEXT: asrs r1, r0, #31
+; V6M-NEXT: lsrs r1, r1, #30
+; V6M-NEXT: adds r0, r0, r1
+; V6M-NEXT: asrs r0, r0, #2
+; V6M-NEXT: bx lr
entry:
%div = sdiv i32 %F, 4
@@ -38,10 +58,18 @@ entry:
; The immediate is not a power of 2, so we expect a sdiv.
define dso_local i32 @f2(i32 %F) local_unnamed_addr #0 {
-; CHECK-LABEL: f2
-; CHECK: movs r1, #5
-; CHECK-NEXT: sdiv r0, r0, r1
-; CHECK-NEXT: bx lr
+; CHECK-LABEL: f2:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: movs r1, #5
+; CHECK-NEXT: sdiv r0, r0, r1
+; CHECK-NEXT: bx lr
+;
+; V6M-LABEL: f2:
+; V6M: @ %bb.0: @ %entry
+; V6M-NEXT: push {r7, lr}
+; V6M-NEXT: movs r1, #5
+; V6M-NEXT: bl __divsi3
+; V6M-NEXT: pop {r7, pc}
entry:
%div = sdiv i32 %F, 5
@@ -51,8 +79,28 @@ entry:
; Try a larger power of 2 immediate: immediates larger than
; 128 don't give any code size savings.
define dso_local i32 @f3(i32 %F) local_unnamed_addr #0 {
-; CHECK-LABEL: f3
-; CHECK-NOT: sdiv
+; T2-LABEL: f3:
+; T2: @ %bb.0: @ %entry
+; T2-NEXT: asrs r1, r0, #31
+; T2-NEXT: add.w r0, r0, r1, lsr #24
+; T2-NEXT: asrs r0, r0, #8
+; T2-NEXT: bx lr
+;
+; T1-LABEL: f3:
+; T1: @ %bb.0: @ %entry
+; T1-NEXT: asrs r1, r0, #31
+; T1-NEXT: lsrs r1, r1, #24
+; T1-NEXT: adds r0, r0, r1
+; T1-NEXT: asrs r0, r0, #8
+; T1-NEXT: bx lr
+;
+; V6M-LABEL: f3:
+; V6M: @ %bb.0: @ %entry
+; V6M-NEXT: asrs r1, r0, #31
+; V6M-NEXT: lsrs r1, r1, #24
+; V6M-NEXT: adds r0, r0, r1
+; V6M-NEXT: asrs r0, r0, #8
+; V6M-NEXT: bx lr
entry:
%div = sdiv i32 %F, 256
ret i32 %div
@@ -65,20 +113,32 @@ attributes #0 = { minsize norecurse nounwind optsize readnone }
; the sdiv to sdiv, but to the faster instruction sequence.
define dso_local signext i16 @f4(i16 signext %F) {
-; T2-LABEL: f4
-; T2: uxth r1, r0
-; T2-NEXT: add.w r0, r0, r1, lsr #15
-; T2-NEXT: sxth r0, r0
-; T2-NEXT: asrs r0, r0, #1
-; T2-NEXT: bx lr
-
-; T1-LABEL: f4
-; T1: uxth r1, r0
-; T1-NEXT: lsrs r1, r1, #15
-; T1-NEXT: adds r0, r0, r1
-; T1-NEXT: sxth r0, r0
-; T1-NEXT: asrs r0, r0, #1
-; T1-NEXT: bx lr
+; T2-LABEL: f4:
+; T2: @ %bb.0: @ %entry
+; T2-NEXT: and r1, r0, #32768
+; T2-NEXT: add.w r0, r0, r1, lsr #15
+; T2-NEXT: sxth r0, r0
+; T2-NEXT: asrs r0, r0, #1
+; T2-NEXT: bx lr
+;
+; T1-LABEL: f4:
+; T1: @ %bb.0: @ %entry
+; T1-NEXT: uxth r1, r0
+; T1-NEXT: lsrs r1, r1, #15
+; T1-NEXT: adds r0, r0, r1
+; T1-NEXT: sxth r0, r0
+; T1-NEXT: asrs r0, r0, #1
+; T1-NEXT: bx lr
+;
+; V6M-LABEL: f4:
+; V6M: @ %bb.0: @ %entry
+; V6M-NEXT: uxth r1, r0
+; V6M-NEXT: lsrs r1, r1, #15
+; V6M-NEXT: adds r0, r0, r1
+; V6M-NEXT: sxth r0, r0
+; V6M-NEXT: asrs r0, r0, #1
+; V6M-NEXT: bx lr
+
entry:
%0 = sdiv i16 %F, 2
@@ -86,18 +146,29 @@ entry:
}
define dso_local i32 @f5(i32 %F) {
-; T2-LABEL: f5
-; T2: asrs r1, r0, #31
-; T2-NEXT: add.w r0, r0, r1, lsr #30
-; T2-NEXT: asrs r0, r0, #2
-; T2-NEXT: bx lr
-
-; T1-LABEL: f5
-; T1: asrs r1, r0, #31
-; T1-NEXT: lsrs r1, r1, #30
-; T1-NEXT: adds r0, r0, r1
-; T1-NEXT: asrs r0, r0, #2
-; T1-NEXT: bx lr
+; T2-LABEL: f5:
+; T2: @ %bb.0: @ %entry
+; T2-NEXT: asrs r1, r0, #31
+; T2-NEXT: add.w r0, r0, r1, lsr #30
+; T2-NEXT: asrs r0, r0, #2
+; T2-NEXT: bx lr
+;
+; T1-LABEL: f5:
+; T1: @ %bb.0: @ %entry
+; T1-NEXT: asrs r1, r0, #31
+; T1-NEXT: lsrs r1, r1, #30
+; T1-NEXT: adds r0, r0, r1
+; T1-NEXT: asrs r0, r0, #2
+; T1-NEXT: bx lr
+;
+; V6M-LABEL: f5:
+; V6M: @ %bb.0: @ %entry
+; V6M-NEXT: asrs r1, r0, #31
+; V6M-NEXT: lsrs r1, r1, #30
+; V6M-NEXT: adds r0, r0, r1
+; V6M-NEXT: asrs r0, r0, #2
+; V6M-NEXT: bx lr
+
entry:
%div = sdiv i32 %F, 4
diff --git a/llvm/test/CodeGen/Thumb/bic_imm.ll b/llvm/test/CodeGen/Thumb/bic_imm.ll
index 741b2cf8db2e3..7257891c7a116 100644
--- a/llvm/test/CodeGen/Thumb/bic_imm.ll
+++ b/llvm/test/CodeGen/Thumb/bic_imm.ll
@@ -39,14 +39,19 @@ entry:
define void @truncated(i16 %a, ptr %p) {
; CHECK-T1-LABEL: truncated:
; CHECK-T1: @ %bb.0:
-; CHECK-T1-NEXT: movs r2, #128
-; CHECK-T1-NEXT: bics r0, r2
-; CHECK-T1-NEXT: strh r0, [r1]
+; CHECK-T1-NEXT: ldr r2, .LCPI2_0
+; CHECK-T1-NEXT: ands r2, r0
+; CHECK-T1-NEXT: strh r2, [r1]
; CHECK-T1-NEXT: bx lr
+; CHECK-T1-NEXT: .p2align 2
+; CHECK-T1-NEXT: @ %bb.1:
+; CHECK-T1-NEXT: .LCPI2_0:
+; CHECK-T1-NEXT: .long 65407 @ 0xff7f
;
; CHECK-T2-LABEL: truncated:
; CHECK-T2: @ %bb.0:
-; CHECK-T2-NEXT: bic r0, r0, #128
+; CHECK-T2-NEXT: movw r2, #65407
+; CHECK-T2-NEXT: ands r0, r2
; CHECK-T2-NEXT: strh r0, [r1]
; CHECK-T2-NEXT: bx lr
%and = and i16 %a, -129
@@ -57,14 +62,19 @@ define void @truncated(i16 %a, ptr %p) {
define void @truncated_neg2(i16 %a, ptr %p) {
; CHECK-T1-LABEL: truncated_neg2:
; CHECK-T1: @ %bb.0:
-; CHECK-T1-NEXT: movs r2, #1
-; CHECK-T1-NEXT: bics r0, r2
-; CHECK-T1-NEXT: strh r0, [r1]
+; CHECK-T1-NEXT: ldr r2, .LCPI3_0
+; CHECK-T1-NEXT: ands r2, r0
+; CHECK-T1-NEXT: strh r2, [r1]
; CHECK-T1-NEXT: bx lr
+; CHECK-T1-NEXT: .p2align 2
+; CHECK-T1-NEXT: @ %bb.1:
+; CHECK-T1-NEXT: .LCPI3_0:
+; CHECK-T1-NEXT: .long 65534 @ 0xfffe
;
; CHECK-T2-LABEL: truncated_neg2:
; CHECK-T2: @ %bb.0:
-; CHECK-T2-NEXT: bic r0, r0, #1
+; CHECK-T2-NEXT: movw r2, #65534
+; CHECK-T2-NEXT: ands r0, r2
; CHECK-T2-NEXT: strh r0, [r1]
; CHECK-T2-NEXT: bx lr
%and = and i16 %a, -2
@@ -76,13 +86,14 @@ define void @truncated_neg256(i16 %a, ptr %p) {
; CHECK-T1-LABEL: truncated_neg256:
; CHECK-T1: @ %bb.0:
; CHECK-T1-NEXT: movs r2, #255
-; CHECK-T1-NEXT: bics r0, r2
-; CHECK-T1-NEXT: strh r0, [r1]
+; CHECK-T1-NEXT: lsls r2, r2, #8
+; CHECK-T1-NEXT: ands r2, r0
+; CHECK-T1-NEXT: strh r2, [r1]
; CHECK-T1-NEXT: bx lr
;
; CHECK-T2-LABEL: truncated_neg256:
; CHECK-T2: @ %bb.0:
-; CHECK-T2-NEXT: bic r0, r0, #255
+; CHECK-T2-NEXT: and r0, r0, #65280
; CHECK-T2-NEXT: strh r0, [r1]
; CHECK-T2-NEXT: bx lr
%and = and i16 %a, -256
More information about the llvm-commits
mailing list