[llvm] [AArch64] Replace 64-bit MADD with [SU]MADDL when possible (PR #135926)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 16 00:48:09 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: Yuri Gribov (yugr)
<details>
<summary>Changes</summary>
This PR adds MIR peephole optimization to convert 64-bit MADDs to [SU]MADDLs where possible. This is already done at ISel stage but this has limited visibility and does not work if operands are in different blocks (which may happen in case of LICM, conditional statements, etc.).
Patch works for all patterns which I've seen in real code but please let me know if something isn't covered.
Patch was tested via LLVM's `ninja check` and llvm-test-suite (tested via QEMU simulation `-DTEST_SUITE_USER_MODE_EMULATION=ON -DTEST_SUITE_RUN_UNDER='qemu-aarch64 -L /usr/aarch64-linux-gnu'`). Please let me know if more testing is needed.
For llvm-test-suite benchmarks (`-DTEST_SUITE_BENCHMARKING_ONLY=ON`) this resulted in replacement of 7.8% of 64-bit MADDs:
```
$ find llvm-test-suite/build-benchmarking-ref -name \*.o | xargs -n1 aarch64-linux-gnu-objdump -d | grep '\<madd\>.*x' | wc -l
3110
$ find llvm-test-suite/build-benchmarking-new -name \*.o | xargs -n1 aarch64-linux-gnu-objdump -d | grep '\<madd\>.*x' | wc -l
2886
```
Joint work of Yuri Gribov and Mikhail Semenov (@<!-- -->mermen).
---
Patch is 67.74 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/135926.diff
6 Files Affected:
- (modified) llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp (+216)
- (modified) llvm/test/CodeGen/AArch64/aarch64-mull-masks.ll (+363-723)
- (modified) llvm/test/CodeGen/AArch64/arm64-extract-insert-varidx.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/fast-isel-gep.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/insertextract.ll (+2-2)
- (added) llvm/test/CodeGen/AArch64/madd-32bit.mir (+53)
``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
index 0ddd17cee1344..e10c5a1c56a9e 100644
--- a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
+++ b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
@@ -67,11 +67,15 @@
// 9. Replace UBFMXri with UBFMWri if the instruction is equivalent to a 32 bit
// LSR or LSL alias of UBFM.
//
+// 10. Replace MADDX (MSUBX) with SMADDL (SMSUBL) if operands are
+// extensions of 32-bit values.
+//
//===----------------------------------------------------------------------===//
#include "AArch64ExpandImm.h"
#include "AArch64InstrInfo.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
@@ -81,6 +85,15 @@ using namespace llvm;
namespace {
+// Information about 64-bit operand of MADD
+struct MADDOperandInfo {
+ Register SrcReg = 0;
+ // Whether operand is zero extension of SrcReg
+ bool IsZExt = false;
+ // Whether operand is sign extension of SrcReg
+ bool IsSExt = false;
+};
+
struct AArch64MIPeepholeOpt : public MachineFunctionPass {
static char ID;
@@ -137,6 +150,13 @@ struct AArch64MIPeepholeOpt : public MachineFunctionPass {
bool visitCopy(MachineInstr &MI);
bool runOnMachineFunction(MachineFunction &MF) override;
+ MADDOperandInfo analyzeMADDOperand(const MachineOperand &MO) const;
+ MADDOperandInfo
+ analyzeMADDOperand(const MachineOperand &MO,
+ SmallPtrSetImpl<const MachineInstr *> &VisitedPHIs) const;
+ MachineInstr *getUltimateDef(Register Reg) const;
+ bool visitMADD(MachineInstr &MI);
+
StringRef getPassName() const override {
return "AArch64 MI Peephole Optimization pass";
}
@@ -836,6 +856,198 @@ bool AArch64MIPeepholeOpt::visitCopy(MachineInstr &MI) {
return true;
}
+// Get real definition of register bypassing intermediate copies
+MachineInstr *AArch64MIPeepholeOpt::getUltimateDef(Register Reg) const {
+ auto *MI = MRI->getUniqueVRegDef(Reg);
+ while (MI->isFullCopy() && MI->getOperand(1).getReg().isVirtual())
+ MI = MRI->getUniqueVRegDef(MI->getOperand(1).getReg());
+ assert(MI);
+ return MI;
+}
+
+namespace {
+
+bool isSExtLoad(unsigned Opc) {
+ switch (Opc) {
+ case AArch64::LDRSBXpost:
+ case AArch64::LDRSBXpre:
+ case AArch64::LDRSBXroW:
+ case AArch64::LDRSBXroX:
+ case AArch64::LDRSBXui:
+ case AArch64::LDRSHXpost:
+ case AArch64::LDRSHXpre:
+ case AArch64::LDRSHXroW:
+ case AArch64::LDRSHXroX:
+ case AArch64::LDRSHXui:
+ case AArch64::LDRSWpost:
+ case AArch64::LDRSWpre:
+ case AArch64::LDRSWroW:
+ case AArch64::LDRSWroX:
+ case AArch64::LDRSWui:
+ case AArch64::LDURSHXi:
+ case AArch64::LDURSBXi:
+ case AArch64::LDURSWi:
+ return true;
+ }
+ return false;
+}
+
+// Checks if bit 31 is known to be zero and so result can be safely
+// sign-extended
+bool isSExtInvariant(const MachineInstr &MI) {
+ switch (MI.getOpcode()) {
+ case AArch64::MOVi32imm:
+ case AArch64::MOVi64imm:
+ // Immediates with bit 31 == 0 can be safely sign-extended
+ return isUInt<31>(MI.getOperand(1).getImm());
+ case AArch64::LDRBBpost:
+ case AArch64::LDRBBpre:
+ case AArch64::LDRBBroW:
+ case AArch64::LDRBBroX:
+ case AArch64::LDRBBui:
+ case AArch64::LDRHHpost:
+ case AArch64::LDRHHpre:
+ case AArch64::LDRHHroW:
+ case AArch64::LDRHHroX:
+ case AArch64::LDRHHui:
+ case AArch64::LDURHHi:
+ case AArch64::LDURBBi:
+ // Zero-extended 8/16-bit loads can be safely sign-extended
+ return true;
+ case AArch64::UBFMXri:
+ return MI.getOperand(3).getImm() >= MI.getOperand(2).getImm() &&
+ MI.getOperand(3).getImm() - MI.getOperand(2).getImm() <= 30;
+ default:
+ return false;
+ }
+}
+
+} // anonymous namespace
+
+MADDOperandInfo
+AArch64MIPeepholeOpt::analyzeMADDOperand(const MachineOperand &MO) const {
+ SmallPtrSet<const MachineInstr *, 2> VisitedPHIs;
+ return analyzeMADDOperand(MO, VisitedPHIs);
+}
+
+MADDOperandInfo AArch64MIPeepholeOpt::analyzeMADDOperand(
+ const MachineOperand &MO,
+ SmallPtrSetImpl<const MachineInstr *> &VisitedPHIs) const {
+ MADDOperandInfo Info;
+
+ if (MO.isImm()) {
+ auto Imm = AArch64_AM::decodeLogicalImmediate(MO.getImm(), 64);
+ Info.IsZExt = isUInt<32>(Imm);
+ Info.IsSExt = isInt<32>(Imm);
+ return Info;
+ }
+
+ if (!MO.isReg())
+ return Info;
+
+ auto Reg = MO.getReg();
+ auto *MI = getUltimateDef(Reg);
+
+ // Check if MI is an extension of 32-bit value
+
+ const auto Opc = MI->getOpcode();
+ if (MI->isFullCopy() && MI->getOperand(1).getReg() == AArch64::XZR) {
+ Info.IsZExt = Info.IsSExt = true;
+ } else if (MI->isSubregToReg() && MI->getOperand(1).getImm() == 0 &&
+ MI->getOperand(3).getImm() == AArch64::sub_32) {
+ Info.SrcReg = Reg;
+ Info.IsZExt = true;
+ Info.IsSExt = isSExtInvariant(*getUltimateDef(MI->getOperand(2).getReg()));
+ } else if (Opc == AArch64::MOVi64imm) {
+ auto Imm = MI->getOperand(1).getImm();
+ Info.SrcReg = Reg;
+ Info.IsZExt = isUInt<32>(Imm);
+ Info.IsSExt = isInt<32>(Imm);
+ } else if (Opc == AArch64::ANDXri &&
+ AArch64_AM::decodeLogicalImmediate(MI->getOperand(2).getImm(),
+ 64) <= UINT32_MAX) {
+ auto Imm =
+ AArch64_AM::decodeLogicalImmediate(MI->getOperand(2).getImm(), 64);
+ Info.SrcReg = Reg;
+ Info.IsZExt = true;
+ Info.IsSExt = isInt<32>(Imm);
+ } else if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) &&
+ MI->getOperand(3).getImm() >= MI->getOperand(2).getImm() &&
+ MI->getOperand(3).getImm() - MI->getOperand(2).getImm() <= 31) {
+ // TODO: support also [SU]BFIZ
+ // TODO: support also BFM (by checking base register)
+ Info.SrcReg = Reg;
+ if (Opc == AArch64::UBFMXri) {
+ Info.IsZExt = true;
+ Info.IsSExt =
+ MI->getOperand(3).getImm() - MI->getOperand(2).getImm() <= 30;
+ } else {
+ Info.IsSExt = true;
+ }
+ } else if (Opc == AArch64::CSELXr) {
+ auto NInfo = analyzeMADDOperand(MI->getOperand(1), VisitedPHIs);
+ auto MInfo = analyzeMADDOperand(MI->getOperand(2), VisitedPHIs);
+ Info.SrcReg = Reg;
+ Info.IsZExt = NInfo.IsZExt && MInfo.IsZExt;
+ Info.IsSExt = NInfo.IsSExt && MInfo.IsSExt;
+ } else if (isSExtLoad(Opc)) {
+ Info.SrcReg = Reg;
+ Info.IsSExt = true;
+ } else if (MI->isPHI()) {
+ Info.SrcReg = Reg;
+ Info.IsZExt = true;
+ Info.IsSExt = true;
+ if (!VisitedPHIs.insert(MI).second)
+ return Info;
+ for (unsigned I = 1, N = MI->getNumOperands(); I < N; I += 2) {
+ auto OpInfo = analyzeMADDOperand(MI->getOperand(I), VisitedPHIs);
+ Info.IsZExt &= OpInfo.IsZExt;
+ Info.IsSExt &= OpInfo.IsSExt;
+ }
+ }
+
+ return Info;
+}
+
+bool AArch64MIPeepholeOpt::visitMADD(MachineInstr &MI) {
+ // Try below transformations:
+ // MADDX (32-bit sext) (32-bit sext) -> SMADDL
+ // MADDX (32-bit zext) (32-bit zext) -> UMADDL
+
+ MADDOperandInfo Infos[] = {
+ analyzeMADDOperand(MI.getOperand(1)),
+ analyzeMADDOperand(MI.getOperand(2)),
+ };
+
+ unsigned Opc;
+
+ if (Infos[0].IsZExt && Infos[1].IsZExt) {
+ Opc = MI.getOpcode() == AArch64::MADDXrrr ? AArch64::UMADDLrrr
+ : AArch64::UMSUBLrrr;
+ } else if (Infos[0].IsSExt && Infos[1].IsSExt) {
+ Opc = MI.getOpcode() == AArch64::MADDXrrr ? AArch64::SMADDLrrr
+ : AArch64::SMSUBLrrr;
+ } else {
+ return false;
+ }
+
+ MI.setDesc(TII->get(Opc));
+
+ for (unsigned I = 0; I < std::size(Infos); ++I) {
+ const auto &Info = Infos[I];
+ auto &Op = MI.getOperand(I + 1);
+
+ Op.setReg(Info.SrcReg);
+
+ bool Is32Bit = TRI->getRegSizeInBits(*MRI->getRegClass(Info.SrcReg));
+ Op.setSubReg(Is32Bit ? AArch64::sub_32 : 0);
+ }
+
+ LLVM_DEBUG(dbgs() << "Updated: " << MI << "\n");
+
+ return true;
+}
+
bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;
@@ -927,6 +1139,10 @@ bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
case AArch64::COPY:
Changed |= visitCopy(MI);
break;
+ case AArch64::MADDXrrr:
+ case AArch64::MSUBXrrr:
+ Changed = visitMADD(MI);
+ break;
}
}
}
diff --git a/llvm/test/CodeGen/AArch64/aarch64-mull-masks.ll b/llvm/test/CodeGen/AArch64/aarch64-mull-masks.ll
index c9864a357186d..ce398acff283a 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-mull-masks.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-mull-masks.ll
@@ -12,7 +12,7 @@ define i64 @umull(i64 %x0, i64 %x1) {
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: mov w8, w0
; CHECK-GI-NEXT: mov w9, w1
-; CHECK-GI-NEXT: mul x0, x9, x8
+; CHECK-GI-NEXT: umull x0, w9, w8
; CHECK-GI-NEXT: ret
entry:
%and = and i64 %x0, 4294967295
@@ -31,7 +31,7 @@ define i64 @umull2(i64 %x, i32 %y) {
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: mov w8, w0
; CHECK-GI-NEXT: mov w9, w1
-; CHECK-GI-NEXT: mul x0, x8, x9
+; CHECK-GI-NEXT: umull x0, w8, w9
; CHECK-GI-NEXT: ret
entry:
%and = and i64 %x, 4294967295
@@ -50,7 +50,7 @@ define i64 @umull2_commuted(i64 %x, i32 %y) {
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: mov w8, w0
; CHECK-GI-NEXT: mov w9, w1
-; CHECK-GI-NEXT: mul x0, x9, x8
+; CHECK-GI-NEXT: umull x0, w9, w8
; CHECK-GI-NEXT: ret
entry:
%and = and i64 %x, 4294967295
@@ -69,7 +69,7 @@ define i64 @smull(i64 %x0, i64 %x1) {
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: sxtw x8, w0
; CHECK-GI-NEXT: sxtw x9, w1
-; CHECK-GI-NEXT: mul x0, x9, x8
+; CHECK-GI-NEXT: smull x0, w9, w8
; CHECK-GI-NEXT: ret
entry:
%sext = shl i64 %x0, 32
@@ -91,7 +91,7 @@ define i64 @smull2(i64 %x, i32 %y) {
; CHECK-GI-NEXT: // kill: def $w1 killed $w1 def $x1
; CHECK-GI-NEXT: sxtw x8, w0
; CHECK-GI-NEXT: sxtw x9, w1
-; CHECK-GI-NEXT: mul x0, x8, x9
+; CHECK-GI-NEXT: smull x0, w8, w9
; CHECK-GI-NEXT: ret
entry:
%shl = shl i64 %x, 32
@@ -112,7 +112,7 @@ define i64 @smull2_commuted(i64 %x, i32 %y) {
; CHECK-GI-NEXT: // kill: def $w1 killed $w1 def $x1
; CHECK-GI-NEXT: sxtw x8, w0
; CHECK-GI-NEXT: sxtw x9, w1
-; CHECK-GI-NEXT: mul x0, x9, x8
+; CHECK-GI-NEXT: smull x0, w9, w8
; CHECK-GI-NEXT: ret
entry:
%shl = shl i64 %x, 32
@@ -123,21 +123,13 @@ entry:
}
define i64 @smull_ldrsb_b(ptr %x0, i8 %x1) {
-; CHECK-SD-LABEL: smull_ldrsb_b:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: // kill: def $w1 killed $w1 def $x1
-; CHECK-SD-NEXT: ldrsb x8, [x0]
-; CHECK-SD-NEXT: sxtb x9, w1
-; CHECK-SD-NEXT: smull x0, w8, w9
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: smull_ldrsb_b:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: // kill: def $w1 killed $w1 def $x1
-; CHECK-GI-NEXT: ldrsb x8, [x0]
-; CHECK-GI-NEXT: sxtb x9, w1
-; CHECK-GI-NEXT: mul x0, x8, x9
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: smull_ldrsb_b:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT: ldrsb x8, [x0]
+; CHECK-NEXT: sxtb x9, w1
+; CHECK-NEXT: smull x0, w8, w9
+; CHECK-NEXT: ret
entry:
%ext64 = load i8, ptr %x0
%sext = sext i8 %ext64 to i64
@@ -147,21 +139,13 @@ entry:
}
define i64 @smull_ldrsb_b_commuted(ptr %x0, i8 %x1) {
-; CHECK-SD-LABEL: smull_ldrsb_b_commuted:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: // kill: def $w1 killed $w1 def $x1
-; CHECK-SD-NEXT: ldrsb x8, [x0]
-; CHECK-SD-NEXT: sxtb x9, w1
-; CHECK-SD-NEXT: smull x0, w9, w8
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: smull_ldrsb_b_commuted:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: // kill: def $w1 killed $w1 def $x1
-; CHECK-GI-NEXT: ldrsb x8, [x0]
-; CHECK-GI-NEXT: sxtb x9, w1
-; CHECK-GI-NEXT: mul x0, x9, x8
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: smull_ldrsb_b_commuted:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT: ldrsb x8, [x0]
+; CHECK-NEXT: sxtb x9, w1
+; CHECK-NEXT: smull x0, w9, w8
+; CHECK-NEXT: ret
entry:
%ext64 = load i8, ptr %x0
%sext = sext i8 %ext64 to i64
@@ -171,21 +155,13 @@ entry:
}
define i64 @smull_ldrsb_h(ptr %x0, i16 %x1) {
-; CHECK-SD-LABEL: smull_ldrsb_h:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: // kill: def $w1 killed $w1 def $x1
-; CHECK-SD-NEXT: ldrsb x8, [x0]
-; CHECK-SD-NEXT: sxth x9, w1
-; CHECK-SD-NEXT: smull x0, w8, w9
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: smull_ldrsb_h:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: // kill: def $w1 killed $w1 def $x1
-; CHECK-GI-NEXT: ldrsb x8, [x0]
-; CHECK-GI-NEXT: sxth x9, w1
-; CHECK-GI-NEXT: mul x0, x8, x9
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: smull_ldrsb_h:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT: ldrsb x8, [x0]
+; CHECK-NEXT: sxth x9, w1
+; CHECK-NEXT: smull x0, w8, w9
+; CHECK-NEXT: ret
entry:
%ext64 = load i8, ptr %x0
%sext = sext i8 %ext64 to i64
@@ -206,7 +182,7 @@ define i64 @smull_ldrsb_w(ptr %x0, i32 %x1) {
; CHECK-GI-NEXT: // kill: def $w1 killed $w1 def $x1
; CHECK-GI-NEXT: ldrsb x8, [x0]
; CHECK-GI-NEXT: sxtw x9, w1
-; CHECK-GI-NEXT: mul x0, x8, x9
+; CHECK-GI-NEXT: smull x0, w8, w9
; CHECK-GI-NEXT: ret
entry:
%ext64 = load i8, ptr %x0
@@ -217,21 +193,13 @@ entry:
}
define i64 @smull_ldrsh_b(ptr %x0, i8 %x1) {
-; CHECK-SD-LABEL: smull_ldrsh_b:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: // kill: def $w1 killed $w1 def $x1
-; CHECK-SD-NEXT: ldrsh x8, [x0]
-; CHECK-SD-NEXT: sxtb x9, w1
-; CHECK-SD-NEXT: smull x0, w8, w9
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: smull_ldrsh_b:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: // kill: def $w1 killed $w1 def $x1
-; CHECK-GI-NEXT: ldrsh x8, [x0]
-; CHECK-GI-NEXT: sxtb x9, w1
-; CHECK-GI-NEXT: mul x0, x8, x9
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: smull_ldrsh_b:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT: ldrsh x8, [x0]
+; CHECK-NEXT: sxtb x9, w1
+; CHECK-NEXT: smull x0, w8, w9
+; CHECK-NEXT: ret
entry:
%ext64 = load i16, ptr %x0
%sext = sext i16 %ext64 to i64
@@ -241,21 +209,13 @@ entry:
}
define i64 @smull_ldrsh_h(ptr %x0, i16 %x1) {
-; CHECK-SD-LABEL: smull_ldrsh_h:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: // kill: def $w1 killed $w1 def $x1
-; CHECK-SD-NEXT: ldrsh x8, [x0]
-; CHECK-SD-NEXT: sxth x9, w1
-; CHECK-SD-NEXT: smull x0, w8, w9
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: smull_ldrsh_h:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: // kill: def $w1 killed $w1 def $x1
-; CHECK-GI-NEXT: ldrsh x8, [x0]
-; CHECK-GI-NEXT: sxth x9, w1
-; CHECK-GI-NEXT: mul x0, x8, x9
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: smull_ldrsh_h:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT: ldrsh x8, [x0]
+; CHECK-NEXT: sxth x9, w1
+; CHECK-NEXT: smull x0, w8, w9
+; CHECK-NEXT: ret
entry:
%ext64 = load i16, ptr %x0
%sext = sext i16 %ext64 to i64
@@ -265,21 +225,13 @@ entry:
}
define i64 @smull_ldrsh_h_commuted(ptr %x0, i16 %x1) {
-; CHECK-SD-LABEL: smull_ldrsh_h_commuted:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: // kill: def $w1 killed $w1 def $x1
-; CHECK-SD-NEXT: ldrsh x8, [x0]
-; CHECK-SD-NEXT: sxth x9, w1
-; CHECK-SD-NEXT: smull x0, w9, w8
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: smull_ldrsh_h_commuted:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: // kill: def $w1 killed $w1 def $x1
-; CHECK-GI-NEXT: ldrsh x8, [x0]
-; CHECK-GI-NEXT: sxth x9, w1
-; CHECK-GI-NEXT: mul x0, x9, x8
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: smull_ldrsh_h_commuted:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT: ldrsh x8, [x0]
+; CHECK-NEXT: sxth x9, w1
+; CHECK-NEXT: smull x0, w9, w8
+; CHECK-NEXT: ret
entry:
%ext64 = load i16, ptr %x0
%sext = sext i16 %ext64 to i64
@@ -300,7 +252,7 @@ define i64 @smull_ldrsh_w(ptr %x0, i32 %x1) {
; CHECK-GI-NEXT: // kill: def $w1 killed $w1 def $x1
; CHECK-GI-NEXT: ldrsh x8, [x0]
; CHECK-GI-NEXT: sxtw x9, w1
-; CHECK-GI-NEXT: mul x0, x8, x9
+; CHECK-GI-NEXT: smull x0, w8, w9
; CHECK-GI-NEXT: ret
entry:
%ext64 = load i16, ptr %x0
@@ -311,21 +263,13 @@ entry:
}
define i64 @smull_ldrsw_b(ptr %x0, i8 %x1) {
-; CHECK-SD-LABEL: smull_ldrsw_b:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: // kill: def $w1 killed $w1 def $x1
-; CHECK-SD-NEXT: ldrsw x8, [x0]
-; CHECK-SD-NEXT: sxtb x9, w1
-; CHECK-SD-NEXT: smull x0, w8, w9
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: smull_ldrsw_b:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: // kill: def $w1 killed $w1 def $x1
-; CHECK-GI-NEXT: ldrsw x8, [x0]
-; CHECK-GI-NEXT: sxtb x9, w1
-; CHECK-GI-NEXT: mul x0, x8, x9
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: smull_ldrsw_b:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT: ldrsw x8, [x0]
+; CHECK-NEXT: sxtb x9, w1
+; CHECK-NEXT: smull x0, w8, w9
+; CHECK-NEXT: ret
entry:
%ext64 = load i32, ptr %x0
%sext = sext i32 %ext64 to i64
@@ -335,21 +279,13 @@ entry:
}
define i64 @smull_ldrsw_h(ptr %x0, i16 %x1) {
-; CHECK-SD-LABEL: smull_ldrsw_h:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: // kill: def $w1 killed $w1 def $x1
-; CHECK-SD-NEXT: ldrsw x8, [x0]
-; CHECK-SD-NEXT: sxth x9, w1
-; CHECK-SD-NEXT: smull x0, w8, w9
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: smull_ldrsw_h:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: // kill: def $w1 killed $w1 def $x1
-; CHECK-GI-NEXT: ldrsw x8, [x0]
-; CHECK-GI-NEXT: sxth x9, w1
-; CHECK-GI-NEXT: mul x0, x8, x9
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: smull_ldrsw_h:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT: ldrsw x8, [x0]
+; CHECK-NEXT: sxth x9, w1
+; CHECK-NEXT: smull x0, w8, w9
+; CHECK-NEXT: ret
entry:
%ext64 = load i32, ptr %x0
%sext = sext i32 %ext64 to i64
@@ -370,7 +306,7 @@ define i64 @smull_ldrsw_w(ptr %x0, i32 %x1) {
; CHECK-GI-NEXT: // kill: def $w1 killed $w1 def $x1
; CHECK-GI-NEXT: ldrsw x8, [x0]
; CHECK-GI-NEXT: sxtw x9, w1
-; CHECK-GI-NEXT: mul x0, x8, x9
+; CHECK-GI-NEXT: smull x0, w8, w9
; CHECK-GI-NEXT: ret
entry:
%ext64 = load i32, ptr %x0
@@ -392,7 +328,7 @@ define i64 @smull_ldrsw_w_commuted(ptr %x0, i32 %x1) {
; CHECK-GI-NEXT: // kill: def $w1 killed $w1 def $x1
; CHECK-GI-NEXT: ldrsw x8, [x0]
; CHECK-GI-NEXT: sxtw x9, w1
-; CHECK-GI-NEXT: mul x0, x9, x8
+; CHECK-GI-NEXT: smull x0, w9, w8
; CHECK-GI-NEXT: ret
entry:
%ext64 = load i32, ptr %x0
@@ -418,7 +354,7 @@ define i64 @smull_sext_bb(i8 %x0, i8 %x1) {
; CHECK-GI-NEXT: // kill: def $w1 killed $w1 def $x1
; CHECK-GI-NEXT: sxtb x8, w0
; CHECK-GI-NEXT: sxtb x9, w1
-; CHECK-GI-NEXT: mul x0, x8, x9
+; CHECK-GI-NEXT: smull x0, w8, w9
; CHECK-GI-NEXT: ret
entry:
%sext = sext i8 %x0 to i64
@@ -438,7 +374,7 @@ define i64 @smull_ldrsw_shift(ptr %x0, i64 %x1) {
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: ldrsw x8, [x0]
; CHECK-GI-NEXT: sxtw x9, w1
-; CHECK-GI-NEXT: mul x0, x8, x9
+; CHECK-GI-NEXT: smull x0, w8, w9
; CHECK-GI-NEXT: ret
entry:
%ext64 = load i32, ptr %x0
@@ -465,21 +401,13 @@ entry:
}
define i64 @smull_ldrsw_zexth(ptr %x0, i16 %x1) {
-; CHECK-SD-LABEL: smull_ldrsw_zexth:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: ldrsw x8, [x0]
-; CHECK-SD-NEXT: // kill: def $w1 killed $w1 def $x1
-; CHECK-SD-NEXT: and x9, x1, #0xffff
-; CHECK-SD-NEXT: smull x0, w8, w9
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: smull_ldrsw_zexth:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ldrsw x8, [x0]
-; CHECK-GI-NEXT: // kill: def $w1 killed $w1 def $x1
-; CHECK-GI-NEXT: a...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/135926
More information about the llvm-commits
mailing list