[llvm] 8eeabf6 - [AArch64] Add funnel shift lowering for SelectionDAG
Tuan Chuong Goh via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 28 03:57:13 PDT 2023
Author: Tuan Chuong Goh
Date: 2023-07-28T11:56:25+01:00
New Revision: 8eeabf674ca0f71aaf8120ebe387ea8e8f4e686c
URL: https://github.com/llvm/llvm-project/commit/8eeabf674ca0f71aaf8120ebe387ea8e8f4e686c
DIFF: https://github.com/llvm/llvm-project/commit/8eeabf674ca0f71aaf8120ebe387ea8e8f4e686c.diff
LOG: [AArch64] Add funnel shift lowering for SelectionDAG
Consider FSHR legal if shift amount is constant
Lower FSHL to FSHR if shift amount is constant
Differential Revision: https://reviews.llvm.org/D155565
Added:
Modified:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/lib/Target/AArch64/AArch64ISelLowering.h
llvm/lib/Target/AArch64/AArch64InstrFormats.td
llvm/test/CodeGen/AArch64/arm64-long-shift.ll
llvm/test/CodeGen/AArch64/icmp-shift-opt.ll
llvm/test/CodeGen/AArch64/logic-shift.ll
llvm/test/CodeGen/AArch64/nontemporal-load.ll
llvm/test/CodeGen/AArch64/pr55201.ll
llvm/test/CodeGen/AArch64/setcc-fsh.ll
llvm/test/CodeGen/AArch64/wide-scalar-shift-legalization.ll
llvm/test/CodeGen/AArch64/zext-to-tbl.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 05e7ef92833937..7547eae576a24f 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -556,6 +556,12 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
+ // Lowering Funnel Shifts to EXTR
+ setOperationAction(ISD::FSHR, MVT::i32, Custom);
+ setOperationAction(ISD::FSHR, MVT::i64, Custom);
+ setOperationAction(ISD::FSHL, MVT::i32, Custom);
+ setOperationAction(ISD::FSHL, MVT::i64, Custom);
+
if (Subtarget->isTargetWindows())
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
else
@@ -2352,7 +2358,6 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
MAKE_CASE(AArch64ISD::BICi)
MAKE_CASE(AArch64ISD::ORRi)
MAKE_CASE(AArch64ISD::BSP)
- MAKE_CASE(AArch64ISD::EXTR)
MAKE_CASE(AArch64ISD::ZIP1)
MAKE_CASE(AArch64ISD::ZIP2)
MAKE_CASE(AArch64ISD::UZP1)
@@ -5797,6 +5802,30 @@ static SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) {
return SDValue();
}
+// Treat FSHR with constant shifts as legal operation, otherwise it is expanded
+// FSHL is converted to FSHR before deciding what to do with it
+static SDValue LowerFunnelShift(SDValue Op, SelectionDAG &DAG) {
+ SDValue Shifts = Op.getOperand(2);
+ // Check if the shift amount is a constant
+ // If opcode is FSHL, convert it to FSHR
+ if (auto *ShiftNo = dyn_cast<ConstantSDNode>(Shifts)) {
+ SDLoc DL(Op);
+ MVT VT = Op.getSimpleValueType();
+
+ if (Op.getOpcode() == ISD::FSHL) {
+ unsigned int NewShiftNo =
+ VT.getFixedSizeInBits() - ShiftNo->getZExtValue();
+ return DAG.getNode(
+ ISD::FSHR, DL, VT, Op.getOperand(0), Op.getOperand(1),
+ DAG.getConstant(NewShiftNo, DL, Shifts.getValueType()));
+ } else if (Op.getOpcode() == ISD::FSHR) {
+ return Op;
+ }
+ }
+
+ return SDValue();
+}
+
SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
SelectionDAG &DAG) const {
LLVM_DEBUG(dbgs() << "Custom lowering: ");
@@ -6106,6 +6135,9 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
return Result;
}
+ case ISD::FSHL:
+ case ISD::FSHR:
+ return LowerFunnelShift(Op, DAG);
}
}
@@ -16678,70 +16710,6 @@ static SDValue performFDivCombine(SDNode *N, SelectionDAG &DAG,
DAG.getConstant(C, DL, MVT::i32));
}
-/// An EXTR instruction is made up of two shifts, ORed together. This helper
-/// searches for and classifies those shifts.
-static bool findEXTRHalf(SDValue N, SDValue &Src, uint32_t &ShiftAmount,
- bool &FromHi) {
- if (N.getOpcode() == ISD::SHL)
- FromHi = false;
- else if (N.getOpcode() == ISD::SRL)
- FromHi = true;
- else
- return false;
-
- if (!isa<ConstantSDNode>(N.getOperand(1)))
- return false;
-
- ShiftAmount = N->getConstantOperandVal(1);
- Src = N->getOperand(0);
- return true;
-}
-
-/// EXTR instruction extracts a contiguous chunk of bits from two existing
-/// registers viewed as a high/low pair. This function looks for the pattern:
-/// <tt>(or (shl VAL1, \#N), (srl VAL2, \#RegWidth-N))</tt> and replaces it
-/// with an EXTR. Can't quite be done in TableGen because the two immediates
-/// aren't independent.
-static SDValue tryCombineToEXTR(SDNode *N,
- TargetLowering::DAGCombinerInfo &DCI) {
- SelectionDAG &DAG = DCI.DAG;
- SDLoc DL(N);
- EVT VT = N->getValueType(0);
-
- assert(N->getOpcode() == ISD::OR && "Unexpected root");
-
- if (VT != MVT::i32 && VT != MVT::i64)
- return SDValue();
-
- SDValue LHS;
- uint32_t ShiftLHS = 0;
- bool LHSFromHi = false;
- if (!findEXTRHalf(N->getOperand(0), LHS, ShiftLHS, LHSFromHi))
- return SDValue();
-
- SDValue RHS;
- uint32_t ShiftRHS = 0;
- bool RHSFromHi = false;
- if (!findEXTRHalf(N->getOperand(1), RHS, ShiftRHS, RHSFromHi))
- return SDValue();
-
- // If they're both trying to come from the high part of the register, they're
- // not really an EXTR.
- if (LHSFromHi == RHSFromHi)
- return SDValue();
-
- if (ShiftLHS + ShiftRHS != VT.getSizeInBits())
- return SDValue();
-
- if (LHSFromHi) {
- std::swap(LHS, RHS);
- std::swap(ShiftLHS, ShiftRHS);
- }
-
- return DAG.getNode(AArch64ISD::EXTR, DL, VT, LHS, RHS,
- DAG.getConstant(ShiftRHS, DL, MVT::i64));
-}
-
static SDValue tryCombineToBSL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
const AArch64TargetLowering &TLI) {
EVT VT = N->getValueType(0);
@@ -16925,10 +16893,6 @@ static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
return SDValue();
- // Attempt to form an EXTR from (or (shl VAL1, #N), (srl VAL2, #RegWidth-N))
- if (SDValue Res = tryCombineToEXTR(N, DCI))
- return Res;
-
if (SDValue Res = tryCombineToBSL(N, DCI, TLI))
return Res;
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index aca45f113e7366..984b875e370c5a 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -165,9 +165,6 @@ enum NodeType : unsigned {
// Floating point comparison
FCMP,
- // Scalar extract
- EXTR,
-
// Scalar-to-vector duplication
DUP,
DUPLANE8,
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index d39fd69f9e0ee4..578a7645da21c9 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -2985,7 +2985,7 @@ class BaseExtractImm<RegisterClass regtype, Operand imm_type, string asm,
multiclass ExtractImm<string asm> {
def Wrri : BaseExtractImm<GPR32, imm0_31, asm,
[(set GPR32:$Rd,
- (AArch64Extr GPR32:$Rn, GPR32:$Rm, imm0_31:$imm))]> {
+ (fshr GPR32:$Rn, GPR32:$Rm, imm0_31:$imm))]> {
let Inst{31} = 0;
let Inst{22} = 0;
// imm<5> must be zero.
@@ -2993,7 +2993,7 @@ multiclass ExtractImm<string asm> {
}
def Xrri : BaseExtractImm<GPR64, imm0_63, asm,
[(set GPR64:$Rd,
- (AArch64Extr GPR64:$Rn, GPR64:$Rm, imm0_63:$imm))]> {
+ (fshr GPR64:$Rn, GPR64:$Rm, imm0_63:$imm))]> {
let Inst{31} = 1;
let Inst{22} = 1;
diff --git a/llvm/test/CodeGen/AArch64/arm64-long-shift.ll b/llvm/test/CodeGen/AArch64/arm64-long-shift.ll
index 5a1375f82ab681..a0072a6e196305 100644
--- a/llvm/test/CodeGen/AArch64/arm64-long-shift.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-long-shift.ll
@@ -60,7 +60,7 @@ define i128 @ashr_mask(i128 %r, i128 %s) nounwind readnone {
; CHECK-NEXT: and x10, x2, #0x3f
; CHECK-NEXT: eor x10, x10, #0x3f
; CHECK-NEXT: lsl x9, x9, x10
-; CHECK-NEXT: orr x0, x8, x9
+; CHECK-NEXT: orr x0, x9, x8
; CHECK-NEXT: asr x1, x1, x2
; CHECK-NEXT: ret
%mask = and i128 %s, 63
@@ -93,7 +93,7 @@ define i128 @lshr_mask(i128 %r, i128 %s) nounwind readnone {
; CHECK-NEXT: and x10, x2, #0x3f
; CHECK-NEXT: eor x10, x10, #0x3f
; CHECK-NEXT: lsl x9, x9, x10
-; CHECK-NEXT: orr x0, x8, x9
+; CHECK-NEXT: orr x0, x9, x8
; CHECK-NEXT: lsr x1, x1, x2
; CHECK-NEXT: ret
%mask = and i128 %s, 63
diff --git a/llvm/test/CodeGen/AArch64/icmp-shift-opt.ll b/llvm/test/CodeGen/AArch64/icmp-shift-opt.ll
index 74a1c28032d899..6edb75995ba1f2 100644
--- a/llvm/test/CodeGen/AArch64/icmp-shift-opt.ll
+++ b/llvm/test/CodeGen/AArch64/icmp-shift-opt.ll
@@ -137,11 +137,8 @@ define i1 @opt_setcc_shl_ne_zero_i256(i256 %a) nounwind {
; CHECK-LABEL: opt_setcc_shl_ne_zero_i256:
; CHECK: // %bb.0:
; CHECK-NEXT: orr x8, x2, x0
-; CHECK-NEXT: extr x9, x3, x2, #47
-; CHECK-NEXT: extr x10, x1, x0, #47
-; CHECK-NEXT: extr x8, x8, x1, #47
-; CHECK-NEXT: orr x9, x10, x9
-; CHECK-NEXT: orr x8, x8, x9
+; CHECK-NEXT: orr x8, x1, x8
+; CHECK-NEXT: orr x8, x8, x3, lsl #17
; CHECK-NEXT: cmp x8, #0
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/logic-shift.ll b/llvm/test/CodeGen/AArch64/logic-shift.ll
index ba63c4433a2a38..24187abc7e994d 100644
--- a/llvm/test/CodeGen/AArch64/logic-shift.ll
+++ b/llvm/test/CodeGen/AArch64/logic-shift.ll
@@ -690,8 +690,8 @@ define i64 @mix_logic_shl(i64 %x0, i64 %x1, i64 %y, i64 %z) {
define i32 @or_fshl_commute0(i32 %x, i32 %y) {
; CHECK-LABEL: or_fshl_commute0:
; CHECK: // %bb.0:
-; CHECK-NEXT: ror w8, w0, #27
-; CHECK-NEXT: orr w0, w8, w1, lsl #5
+; CHECK-NEXT: orr w8, w0, w1
+; CHECK-NEXT: extr w0, w8, w0, #27
; CHECK-NEXT: ret
%or1 = or i32 %x, %y
%sh1 = shl i32 %or1, 5
@@ -703,8 +703,8 @@ define i32 @or_fshl_commute0(i32 %x, i32 %y) {
define i64 @or_fshl_commute1(i64 %x, i64 %y) {
; CHECK-LABEL: or_fshl_commute1:
; CHECK: // %bb.0:
-; CHECK-NEXT: ror x8, x0, #29
-; CHECK-NEXT: orr x0, x8, x1, lsl #35
+; CHECK-NEXT: orr w8, w1, w0
+; CHECK-NEXT: extr x0, x8, x0, #29
; CHECK-NEXT: ret
%or1 = or i64 %y, %x
%sh1 = shl i64 %or1, 35
@@ -762,8 +762,8 @@ define i32 @or_fshl_wrong_shift(i32 %x, i32 %y) {
define i64 @or_fshr_commute0(i64 %x, i64 %y) {
; CHECK-LABEL: or_fshr_commute0:
; CHECK: // %bb.0:
-; CHECK-NEXT: ror x8, x0, #24
-; CHECK-NEXT: orr x0, x8, x1, lsr #24
+; CHECK-NEXT: orr x8, x0, x1
+; CHECK-NEXT: extr x0, x0, x8, #24
; CHECK-NEXT: ret
%or1 = or i64 %x, %y
%sh1 = shl i64 %x, 40
@@ -775,8 +775,8 @@ define i64 @or_fshr_commute0(i64 %x, i64 %y) {
define i32 @or_fshr_commute1(i32 %x, i32 %y) {
; CHECK-LABEL: or_fshr_commute1:
; CHECK: // %bb.0:
-; CHECK-NEXT: ror w8, w0, #29
-; CHECK-NEXT: orr w0, w8, w1, lsr #29
+; CHECK-NEXT: orr w8, w1, w0
+; CHECK-NEXT: extr w0, w0, w8, #29
; CHECK-NEXT: ret
%or1 = or i32 %y, %x
%sh1 = shl i32 %x, 3
diff --git a/llvm/test/CodeGen/AArch64/nontemporal-load.ll b/llvm/test/CodeGen/AArch64/nontemporal-load.ll
index 5ccf6b562b42b3..f8f7de48ca800b 100644
--- a/llvm/test/CodeGen/AArch64/nontemporal-load.ll
+++ b/llvm/test/CodeGen/AArch64/nontemporal-load.ll
@@ -528,27 +528,25 @@ define <4 x i63> @test_ldnp_v4i63(ptr %A) {
; CHECK-LABEL: test_ldnp_v4i63:
; CHECK: ; %bb.0:
; CHECK-NEXT: ldp x8, x9, [x0]
-; CHECK-NEXT: ldp x10, x11, [x0, #16]
-; CHECK-NEXT: extr x12, x9, x8, #63
+; CHECK-NEXT: ldp x10, x12, [x0, #16]
+; CHECK-NEXT: extr x11, x9, x8, #63
; CHECK-NEXT: and x0, x8, #0x7fffffffffffffff
; CHECK-NEXT: extr x9, x10, x9, #62
-; CHECK-NEXT: extr x10, x11, x10, #61
-; CHECK-NEXT: and x1, x12, #0x7fffffffffffffff
+; CHECK-NEXT: extr x3, x12, x10, #61
+; CHECK-NEXT: and x1, x11, #0x7fffffffffffffff
; CHECK-NEXT: and x2, x9, #0x7fffffffffffffff
-; CHECK-NEXT: and x3, x10, #0x7fffffffffffffff
; CHECK-NEXT: ret
;
; CHECK-BE-LABEL: test_ldnp_v4i63:
; CHECK-BE: // %bb.0:
-; CHECK-BE-NEXT: ldp x9, x8, [x0]
-; CHECK-BE-NEXT: ldp x10, x11, [x0, #16]
-; CHECK-BE-NEXT: extr x9, x9, x8, #61
-; CHECK-BE-NEXT: extr x8, x8, x10, #62
-; CHECK-BE-NEXT: extr x10, x10, x11, #63
-; CHECK-BE-NEXT: and x3, x11, #0x7fffffffffffffff
-; CHECK-BE-NEXT: and x0, x9, #0x7fffffffffffffff
-; CHECK-BE-NEXT: and x1, x8, #0x7fffffffffffffff
-; CHECK-BE-NEXT: and x2, x10, #0x7fffffffffffffff
+; CHECK-BE-NEXT: ldp x8, x9, [x0, #16]
+; CHECK-BE-NEXT: ldp x11, x10, [x0]
+; CHECK-BE-NEXT: and x3, x9, #0x7fffffffffffffff
+; CHECK-BE-NEXT: extr x12, x10, x8, #62
+; CHECK-BE-NEXT: extr x8, x8, x9, #63
+; CHECK-BE-NEXT: extr x0, x11, x10, #61
+; CHECK-BE-NEXT: and x1, x12, #0x7fffffffffffffff
+; CHECK-BE-NEXT: and x2, x8, #0x7fffffffffffffff
; CHECK-BE-NEXT: ret
%lv = load <4 x i63>, ptr %A, align 8, !nontemporal !0
ret <4 x i63> %lv
diff --git a/llvm/test/CodeGen/AArch64/pr55201.ll b/llvm/test/CodeGen/AArch64/pr55201.ll
index 44c42d38365657..bc2d9653e5a320 100644
--- a/llvm/test/CodeGen/AArch64/pr55201.ll
+++ b/llvm/test/CodeGen/AArch64/pr55201.ll
@@ -4,8 +4,8 @@
define i32 @f(i32 %x) {
; CHECK-LABEL: f:
; CHECK: // %bb.0:
-; CHECK-NEXT: ror w8, w0, #27
-; CHECK-NEXT: orr w8, w8, #0x20
+; CHECK-NEXT: orr w8, w0, #0x1
+; CHECK-NEXT: extr w8, w8, w0, #27
; CHECK-NEXT: and w0, w8, #0xffffffe1
; CHECK-NEXT: ret
%or1 = or i32 %x, 1
diff --git a/llvm/test/CodeGen/AArch64/setcc-fsh.ll b/llvm/test/CodeGen/AArch64/setcc-fsh.ll
index aaf3a6c178135d..08bfe282703ff6 100644
--- a/llvm/test/CodeGen/AArch64/setcc-fsh.ll
+++ b/llvm/test/CodeGen/AArch64/setcc-fsh.ll
@@ -224,8 +224,8 @@ define i1 @fshl_xor_eq_0(i32 %x, i32 %y) {
define i1 @fshl_or_sgt_0(i32 %x, i32 %y) {
; CHECK-LABEL: fshl_or_sgt_0:
; CHECK: // %bb.0:
-; CHECK-NEXT: ror w8, w0, #30
-; CHECK-NEXT: orr w8, w8, w1, lsl #2
+; CHECK-NEXT: orr w8, w0, w1
+; CHECK-NEXT: extr w8, w8, w0, #30
; CHECK-NEXT: cmp w8, #0
; CHECK-NEXT: cset w0, gt
; CHECK-NEXT: ret
@@ -238,8 +238,8 @@ define i1 @fshl_or_sgt_0(i32 %x, i32 %y) {
define i1 @fshl_or_ne_2(i32 %x, i32 %y) {
; CHECK-LABEL: fshl_or_ne_2:
; CHECK: // %bb.0:
-; CHECK-NEXT: ror w8, w0, #30
-; CHECK-NEXT: orr w8, w8, w1, lsl #2
+; CHECK-NEXT: orr w8, w0, w1
+; CHECK-NEXT: extr w8, w8, w0, #30
; CHECK-NEXT: cmp w8, #2
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/wide-scalar-shift-legalization.ll b/llvm/test/CodeGen/AArch64/wide-scalar-shift-legalization.ll
index 710ff9ac701de0..906aa153d73001 100644
--- a/llvm/test/CodeGen/AArch64/wide-scalar-shift-legalization.ll
+++ b/llvm/test/CodeGen/AArch64/wide-scalar-shift-legalization.ll
@@ -167,29 +167,28 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; ALL-NEXT: ubfx x12, x9, #3, #5
; ALL-NEXT: add x8, x8, x12
; ALL-NEXT: and x9, x9, #0x7
+; ALL-NEXT: mvn w13, w9
; ALL-NEXT: stp q0, q0, [sp, #32]
; ALL-NEXT: stp x10, x11, [sp, #16]
-; ALL-NEXT: eor x11, x9, #0x3f
; ALL-NEXT: str q1, [sp]
-; ALL-NEXT: ldp x10, x13, [x8, #8]
-; ALL-NEXT: ldr x12, [x8, #24]
-; ALL-NEXT: ldr x8, [x8]
+; ALL-NEXT: ldp x11, x10, [x8, #8]
+; ALL-NEXT: ldr x12, [x8]
+; ALL-NEXT: ldr x8, [x8, #24]
+; ALL-NEXT: lsr x15, x11, x9
+; ALL-NEXT: lsl x11, x11, #1
; ALL-NEXT: lsl x14, x10, #1
; ALL-NEXT: lsr x10, x10, x9
-; ALL-NEXT: lsl x15, x12, #1
-; ALL-NEXT: lsl x14, x14, x11
-; ALL-NEXT: lsl x11, x15, x11
-; ALL-NEXT: mvn w15, w9
-; ALL-NEXT: lsr x8, x8, x9
; ALL-NEXT: lsr x12, x12, x9
-; ALL-NEXT: lsr x9, x13, x9
-; ALL-NEXT: orr x8, x8, x14
-; ALL-NEXT: orr x9, x9, x11
-; ALL-NEXT: lsl x11, x13, #1
-; ALL-NEXT: lsl x11, x11, x15
-; ALL-NEXT: orr x10, x10, x11
-; ALL-NEXT: stp x9, x12, [x2, #16]
-; ALL-NEXT: stp x8, x10, [x2]
+; ALL-NEXT: lsr x9, x8, x9
+; ALL-NEXT: lsl x8, x8, #1
+; ALL-NEXT: lsl x11, x11, x13
+; ALL-NEXT: lsl x8, x8, x13
+; ALL-NEXT: orr x11, x11, x12
+; ALL-NEXT: orr x8, x8, x10
+; ALL-NEXT: lsl x10, x14, x13
+; ALL-NEXT: orr x10, x15, x10
+; ALL-NEXT: stp x8, x9, [x2, #16]
+; ALL-NEXT: stp x11, x10, [x2]
; ALL-NEXT: add sp, sp, #64
; ALL-NEXT: ret
%src = load i256, ptr %src.ptr, align 1
@@ -212,27 +211,25 @@ define void @shl_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; ALL-NEXT: sub x8, x8, x12
; ALL-NEXT: and x9, x9, #0x7
; ALL-NEXT: mvn w12, w9
-; ALL-NEXT: eor x14, x9, #0x3f
; ALL-NEXT: stp q0, q0, [sp]
; ALL-NEXT: stp x10, x11, [sp, #48]
; ALL-NEXT: str q1, [sp, #32]
-; ALL-NEXT: ldp x11, x10, [x8, #8]
-; ALL-NEXT: ldr x13, [x8]
-; ALL-NEXT: ldr x8, [x8, #24]
-; ALL-NEXT: lsr x15, x11, #1
-; ALL-NEXT: lsl x11, x11, x9
-; ALL-NEXT: lsr x16, x10, #1
-; ALL-NEXT: lsr x12, x15, x12
-; ALL-NEXT: lsr x15, x13, #1
-; ALL-NEXT: lsr x16, x16, x14
-; ALL-NEXT: lsr x14, x15, x14
-; ALL-NEXT: lsl x13, x13, x9
+; ALL-NEXT: ldp x10, x11, [x8]
+; ALL-NEXT: ldp x13, x8, [x8, #16]
+; ALL-NEXT: lsr x14, x10, #1
+; ALL-NEXT: lsl x10, x10, x9
+; ALL-NEXT: lsl x15, x11, x9
+; ALL-NEXT: lsr x11, x11, #1
+; ALL-NEXT: lsr x14, x14, x12
+; ALL-NEXT: lsr x11, x11, x12
; ALL-NEXT: lsl x8, x8, x9
-; ALL-NEXT: lsl x9, x10, x9
-; ALL-NEXT: orr x11, x11, x14
-; ALL-NEXT: orr x8, x8, x16
-; ALL-NEXT: orr x9, x9, x12
-; ALL-NEXT: stp x13, x11, [x2]
+; ALL-NEXT: lsl x9, x13, x9
+; ALL-NEXT: lsr x13, x13, #1
+; ALL-NEXT: orr x14, x15, x14
+; ALL-NEXT: lsr x13, x13, x12
+; ALL-NEXT: orr x9, x9, x11
+; ALL-NEXT: orr x8, x8, x13
+; ALL-NEXT: stp x10, x14, [x2]
; ALL-NEXT: stp x9, x8, [x2, #16]
; ALL-NEXT: add sp, sp, #64
; ALL-NEXT: ret
@@ -257,26 +254,24 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; ALL-NEXT: add x8, x8, x10
; ALL-NEXT: and x9, x9, #0x7
; ALL-NEXT: stp x12, x12, [sp, #48]
-; ALL-NEXT: eor x14, x9, #0x3f
; ALL-NEXT: stp x12, x12, [sp, #32]
; ALL-NEXT: mvn w12, w9
-; ALL-NEXT: ldp x10, x11, [x8, #8]
-; ALL-NEXT: ldr x13, [x8, #24]
-; ALL-NEXT: ldr x8, [x8]
-; ALL-NEXT: lsl x16, x10, #1
+; ALL-NEXT: ldp x10, x11, [x8, #16]
+; ALL-NEXT: ldp x8, x13, [x8]
+; ALL-NEXT: lsl x14, x10, #1
+; ALL-NEXT: lsr x10, x10, x9
; ALL-NEXT: lsl x15, x11, #1
-; ALL-NEXT: lsl x16, x16, x14
-; ALL-NEXT: lsl x12, x15, x12
+; ALL-NEXT: asr x11, x11, x9
+; ALL-NEXT: lsl x15, x15, x12
+; ALL-NEXT: lsl x14, x14, x12
+; ALL-NEXT: orr x10, x15, x10
; ALL-NEXT: lsl x15, x13, #1
-; ALL-NEXT: lsl x14, x15, x14
-; ALL-NEXT: lsr x11, x11, x9
-; ALL-NEXT: asr x13, x13, x9
+; ALL-NEXT: lsl x12, x15, x12
; ALL-NEXT: lsr x8, x8, x9
-; ALL-NEXT: lsr x9, x10, x9
-; ALL-NEXT: orr x11, x11, x14
-; ALL-NEXT: orr x8, x8, x16
-; ALL-NEXT: orr x9, x9, x12
-; ALL-NEXT: stp x11, x13, [x2, #16]
+; ALL-NEXT: lsr x9, x13, x9
+; ALL-NEXT: orr x8, x12, x8
+; ALL-NEXT: orr x9, x9, x14
+; ALL-NEXT: stp x10, x11, [x2, #16]
; ALL-NEXT: stp x8, x9, [x2]
; ALL-NEXT: add sp, sp, #64
; ALL-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/zext-to-tbl.ll b/llvm/test/CodeGen/AArch64/zext-to-tbl.ll
index 73b0d6ca95e9dd..c4dd9a1eb1a6cb 100644
--- a/llvm/test/CodeGen/AArch64/zext-to-tbl.ll
+++ b/llvm/test/CodeGen/AArch64/zext-to-tbl.ll
@@ -947,27 +947,29 @@ define void @zext_v8i8_to_v8i20_in_loop(ptr %src, ptr %dst) {
; CHECK-BE-NEXT: ushll2 v1.4s, v0.8h, #0
; CHECK-BE-NEXT: ushll v0.4s, v0.4h, #0
; CHECK-BE-NEXT: mov w9, v1.s[1]
-; CHECK-BE-NEXT: mov w11, v0.s[1]
-; CHECK-BE-NEXT: mov w13, v1.s[2]
-; CHECK-BE-NEXT: fmov w14, s1
+; CHECK-BE-NEXT: mov w11, v1.s[2]
+; CHECK-BE-NEXT: fmov w12, s1
+; CHECK-BE-NEXT: mov w13, v0.s[1]
; CHECK-BE-NEXT: mov w15, v0.s[2]
-; CHECK-BE-NEXT: fmov w16, s0
; CHECK-BE-NEXT: mov w10, v1.s[3]
+; CHECK-BE-NEXT: mov w14, v0.s[3]
; CHECK-BE-NEXT: lsl x9, x9, #40
-; CHECK-BE-NEXT: mov w12, v0.s[3]
-; CHECK-BE-NEXT: lsl x11, x11, #40
-; CHECK-BE-NEXT: orr x9, x9, x14, lsl #60
-; CHECK-BE-NEXT: orr x11, x11, x16, lsl #60
-; CHECK-BE-NEXT: orr x9, x9, x13, lsl #20
-; CHECK-BE-NEXT: orr x11, x11, x15, lsl #20
-; CHECK-BE-NEXT: lsr w13, w14, #4
-; CHECK-BE-NEXT: lsr w14, w16, #4
+; CHECK-BE-NEXT: orr x9, x9, x12, lsl #60
+; CHECK-BE-NEXT: lsr x12, x12, #4
+; CHECK-BE-NEXT: orr x9, x9, x11, lsl #20
+; CHECK-BE-NEXT: fmov w11, s0
+; CHECK-BE-NEXT: lsl x13, x13, #40
+; CHECK-BE-NEXT: lsr x9, x9, #16
+; CHECK-BE-NEXT: bfi x9, x12, #48, #4
; CHECK-BE-NEXT: strh w10, [x1, #18]
-; CHECK-BE-NEXT: extr x9, x13, x9, #16
-; CHECK-BE-NEXT: strh w12, [x1, #8]
-; CHECK-BE-NEXT: extr x10, x14, x11, #16
+; CHECK-BE-NEXT: orr x13, x13, x11, lsl #60
+; CHECK-BE-NEXT: lsr x11, x11, #4
+; CHECK-BE-NEXT: orr x13, x13, x15, lsl #20
+; CHECK-BE-NEXT: strh w14, [x1, #8]
+; CHECK-BE-NEXT: lsr x12, x13, #16
; CHECK-BE-NEXT: stur x9, [x1, #10]
-; CHECK-BE-NEXT: str x10, [x1], #64
+; CHECK-BE-NEXT: bfi x12, x11, #48, #4
+; CHECK-BE-NEXT: str x12, [x1], #64
; CHECK-BE-NEXT: b.ne .LBB10_1
; CHECK-BE-NEXT: // %bb.2: // %exit
; CHECK-BE-NEXT: ret
@@ -2701,29 +2703,29 @@ define void @zext_v8i8_to_v8i33_in_loop(ptr %src, ptr %dst) {
; CHECK-BE-NEXT: ushll v2.2d, v1.2s, #0
; CHECK-BE-NEXT: ushll2 v3.2d, v0.4s, #0
; CHECK-BE-NEXT: ushll v0.2d, v0.2s, #0
-; CHECK-BE-NEXT: ushll2 v1.2d, v1.4s, #0
; CHECK-BE-NEXT: mov x9, v3.d[1]
+; CHECK-BE-NEXT: ushll2 v1.2d, v1.4s, #0
; CHECK-BE-NEXT: fmov x10, d3
; CHECK-BE-NEXT: mov x11, v0.d[1]
-; CHECK-BE-NEXT: fmov x12, d0
-; CHECK-BE-NEXT: mov x13, v1.d[1]
-; CHECK-BE-NEXT: mov x14, v2.d[1]
+; CHECK-BE-NEXT: fmov x13, d0
+; CHECK-BE-NEXT: mov x12, v1.d[1]
+; CHECK-BE-NEXT: strb w9, [x1, #32]
; CHECK-BE-NEXT: orr x10, x9, x10, lsl #33
; CHECK-BE-NEXT: fmov x15, d1
-; CHECK-BE-NEXT: strb w9, [x1, #32]
-; CHECK-BE-NEXT: fmov x16, d2
+; CHECK-BE-NEXT: mov x14, v2.d[1]
; CHECK-BE-NEXT: lsl x11, x11, #2
-; CHECK-BE-NEXT: lsl x13, x13, #4
-; CHECK-BE-NEXT: orr x12, x11, x12, lsl #35
-; CHECK-BE-NEXT: lsl x14, x14, #6
-; CHECK-BE-NEXT: orr x15, x13, x15, lsl #37
+; CHECK-BE-NEXT: lsl x12, x12, #4
+; CHECK-BE-NEXT: orr x13, x11, x13, lsl #35
; CHECK-BE-NEXT: extr x10, x11, x10, #8
-; CHECK-BE-NEXT: orr x11, x14, x16, lsl #39
-; CHECK-BE-NEXT: extr x12, x13, x12, #8
-; CHECK-BE-NEXT: extr x9, x14, x15, #8
-; CHECK-BE-NEXT: extr x11, xzr, x11, #8
-; CHECK-BE-NEXT: stp x12, x10, [x1, #16]
-; CHECK-BE-NEXT: stp x11, x9, [x1], #128
+; CHECK-BE-NEXT: fmov x11, d2
+; CHECK-BE-NEXT: orr x15, x12, x15, lsl #37
+; CHECK-BE-NEXT: lsl x14, x14, #6
+; CHECK-BE-NEXT: extr x9, x12, x13, #8
+; CHECK-BE-NEXT: orr x11, x14, x11, lsl #39
+; CHECK-BE-NEXT: extr x12, x14, x15, #8
+; CHECK-BE-NEXT: lsr x11, x11, #8
+; CHECK-BE-NEXT: stp x9, x10, [x1, #16]
+; CHECK-BE-NEXT: stp x11, x12, [x1], #128
; CHECK-BE-NEXT: b.ne .LBB22_1
; CHECK-BE-NEXT: // %bb.2: // %exit
; CHECK-BE-NEXT: ret
More information about the llvm-commits
mailing list