[llvm] b702276 - [AArch64] Add Machine InstCombiner patterns for FMUL indexed variant
Andrew Savonichev via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 9 04:31:06 PST 2021
Author: Andrew Savonichev
Date: 2021-11-09T15:30:19+03:00
New Revision: b702276ad0d6c750740f69979ea4ac30469e2110
URL: https://github.com/llvm/llvm-project/commit/b702276ad0d6c750740f69979ea4ac30469e2110
DIFF: https://github.com/llvm/llvm-project/commit/b702276ad0d6c750740f69979ea4ac30469e2110.diff
LOG: [AArch64] Add Machine InstCombiner patterns for FMUL indexed variant
This patch adds DUP+FMUL => FMUL_indexed pattern to InstCombiner.
FMUL_indexed is normally selected during instruction selection, but it
does not work in cases when VDUP and VMUL are in different basic
blocks.
Differential Revision: https://reviews.llvm.org/D99662
Added:
llvm/test/CodeGen/AArch64/machine-combiner-fmul-dup.mir
Modified:
llvm/include/llvm/CodeGen/MachineCombinerPattern.h
llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
llvm/test/CodeGen/AArch64/arm64-fma-combines.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/MachineCombinerPattern.h b/llvm/include/llvm/CodeGen/MachineCombinerPattern.h
index ac0cc70744d1..67544779f34c 100644
--- a/llvm/include/llvm/CodeGen/MachineCombinerPattern.h
+++ b/llvm/include/llvm/CodeGen/MachineCombinerPattern.h
@@ -153,7 +153,18 @@ enum class MachineCombinerPattern {
FMLSv4f32_OP1,
FMLSv4f32_OP2,
FMLSv4i32_indexed_OP1,
- FMLSv4i32_indexed_OP2
+ FMLSv4i32_indexed_OP2,
+
+ FMULv2i32_indexed_OP1,
+ FMULv2i32_indexed_OP2,
+ FMULv2i64_indexed_OP1,
+ FMULv2i64_indexed_OP2,
+ FMULv4i16_indexed_OP1,
+ FMULv4i16_indexed_OP2,
+ FMULv4i32_indexed_OP1,
+ FMULv4i32_indexed_OP2,
+ FMULv8i16_indexed_OP1,
+ FMULv8i16_indexed_OP2,
};
} // end namespace llvm
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index c79c19b2fbeb..378ec59b3ae6 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -4917,6 +4917,55 @@ static bool getFMAPatterns(MachineInstr &Root,
return Found;
}
+static bool getFMULPatterns(MachineInstr &Root,
+ SmallVectorImpl<MachineCombinerPattern> &Patterns) {
+ MachineBasicBlock &MBB = *Root.getParent();
+ bool Found = false;
+
+ auto Match = [&](unsigned Opcode, int Operand,
+ MachineCombinerPattern Pattern) -> bool {
+ MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ MachineOperand &MO = Root.getOperand(Operand);
+ MachineInstr *MI = nullptr;
+ if (MO.isReg() && Register::isVirtualRegister(MO.getReg()))
+ MI = MRI.getUniqueVRegDef(MO.getReg());
+ if (MI && MI->getOpcode() == Opcode) {
+ Patterns.push_back(Pattern);
+ return true;
+ }
+ return false;
+ };
+
+ typedef MachineCombinerPattern MCP;
+
+ switch (Root.getOpcode()) {
+ default:
+ return false;
+ case AArch64::FMULv2f32:
+ Found = Match(AArch64::DUPv2i32lane, 1, MCP::FMULv2i32_indexed_OP1);
+ Found |= Match(AArch64::DUPv2i32lane, 2, MCP::FMULv2i32_indexed_OP2);
+ break;
+ case AArch64::FMULv2f64:
+ Found = Match(AArch64::DUPv2i64lane, 1, MCP::FMULv2i64_indexed_OP1);
+ Found |= Match(AArch64::DUPv2i64lane, 2, MCP::FMULv2i64_indexed_OP2);
+ break;
+ case AArch64::FMULv4f16:
+ Found = Match(AArch64::DUPv4i16lane, 1, MCP::FMULv4i16_indexed_OP1);
+ Found |= Match(AArch64::DUPv4i16lane, 2, MCP::FMULv4i16_indexed_OP2);
+ break;
+ case AArch64::FMULv4f32:
+ Found = Match(AArch64::DUPv4i32lane, 1, MCP::FMULv4i32_indexed_OP1);
+ Found |= Match(AArch64::DUPv4i32lane, 2, MCP::FMULv4i32_indexed_OP2);
+ break;
+ case AArch64::FMULv8f16:
+ Found = Match(AArch64::DUPv8i16lane, 1, MCP::FMULv8i16_indexed_OP1);
+ Found |= Match(AArch64::DUPv8i16lane, 2, MCP::FMULv8i16_indexed_OP2);
+ break;
+ }
+
+ return Found;
+}
+
/// Return true when a code sequence can improve throughput. It
/// should be called only for instructions in loops.
/// \param Pattern - combiner pattern
@@ -4980,6 +5029,16 @@ bool AArch64InstrInfo::isThroughputPattern(
case MachineCombinerPattern::FMLSv2f64_OP2:
case MachineCombinerPattern::FMLSv4i32_indexed_OP2:
case MachineCombinerPattern::FMLSv4f32_OP2:
+ case MachineCombinerPattern::FMULv2i32_indexed_OP1:
+ case MachineCombinerPattern::FMULv2i32_indexed_OP2:
+ case MachineCombinerPattern::FMULv2i64_indexed_OP1:
+ case MachineCombinerPattern::FMULv2i64_indexed_OP2:
+ case MachineCombinerPattern::FMULv4i16_indexed_OP1:
+ case MachineCombinerPattern::FMULv4i16_indexed_OP2:
+ case MachineCombinerPattern::FMULv4i32_indexed_OP1:
+ case MachineCombinerPattern::FMULv4i32_indexed_OP2:
+ case MachineCombinerPattern::FMULv8i16_indexed_OP1:
+ case MachineCombinerPattern::FMULv8i16_indexed_OP2:
case MachineCombinerPattern::MULADDv8i8_OP1:
case MachineCombinerPattern::MULADDv8i8_OP2:
case MachineCombinerPattern::MULADDv16i8_OP1:
@@ -5036,6 +5095,8 @@ bool AArch64InstrInfo::getMachineCombinerPatterns(
if (getMaddPatterns(Root, Patterns))
return true;
// Floating point patterns
+ if (getFMULPatterns(Root, Patterns))
+ return true;
if (getFMAPatterns(Root, Patterns))
return true;
@@ -5124,6 +5185,42 @@ genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI,
return MUL;
}
+/// Fold (FMUL x (DUP y lane)) into (FMUL_indexed x y lane)
+static MachineInstr *
+genIndexedMultiply(MachineInstr &Root,
+ SmallVectorImpl<MachineInstr *> &InsInstrs,
+ unsigned IdxDupOp, unsigned MulOpc,
+ const TargetRegisterClass *RC, MachineRegisterInfo &MRI) {
+ assert(((IdxDupOp == 1) || (IdxDupOp == 2)) &&
+ "Invalid index of FMUL operand");
+
+ MachineFunction &MF = *Root.getMF();
+ const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+
+ MachineInstr *Dup =
+ MF.getRegInfo().getUniqueVRegDef(Root.getOperand(IdxDupOp).getReg());
+
+ Register DupSrcReg = Dup->getOperand(1).getReg();
+ MRI.clearKillFlags(DupSrcReg);
+ MRI.constrainRegClass(DupSrcReg, RC);
+
+ unsigned DupSrcLane = Dup->getOperand(2).getImm();
+
+ unsigned IdxMulOp = IdxDupOp == 1 ? 2 : 1;
+ MachineOperand &MulOp = Root.getOperand(IdxMulOp);
+
+ Register ResultReg = Root.getOperand(0).getReg();
+
+ MachineInstrBuilder MIB;
+ MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MulOpc), ResultReg)
+ .add(MulOp)
+ .addReg(DupSrcReg)
+ .addImm(DupSrcLane);
+
+ InsInstrs.push_back(MIB);
+ return &Root;
+}
+
/// genFusedMultiplyAcc - Helper to generate fused multiply accumulate
/// instructions.
///
@@ -6082,12 +6179,53 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
}
break;
}
+ case MachineCombinerPattern::FMULv2i32_indexed_OP1:
+ case MachineCombinerPattern::FMULv2i32_indexed_OP2: {
+ unsigned IdxDupOp =
+ (Pattern == MachineCombinerPattern::FMULv2i32_indexed_OP1) ? 1 : 2;
+ genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv2i32_indexed,
+ &AArch64::FPR128RegClass, MRI);
+ break;
+ }
+ case MachineCombinerPattern::FMULv2i64_indexed_OP1:
+ case MachineCombinerPattern::FMULv2i64_indexed_OP2: {
+ unsigned IdxDupOp =
+ (Pattern == MachineCombinerPattern::FMULv2i64_indexed_OP1) ? 1 : 2;
+ genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv2i64_indexed,
+ &AArch64::FPR128RegClass, MRI);
+ break;
+ }
+ case MachineCombinerPattern::FMULv4i16_indexed_OP1:
+ case MachineCombinerPattern::FMULv4i16_indexed_OP2: {
+ unsigned IdxDupOp =
+ (Pattern == MachineCombinerPattern::FMULv4i16_indexed_OP1) ? 1 : 2;
+ genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv4i16_indexed,
+ &AArch64::FPR128_loRegClass, MRI);
+ break;
+ }
+ case MachineCombinerPattern::FMULv4i32_indexed_OP1:
+ case MachineCombinerPattern::FMULv4i32_indexed_OP2: {
+ unsigned IdxDupOp =
+ (Pattern == MachineCombinerPattern::FMULv4i32_indexed_OP1) ? 1 : 2;
+ genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv4i32_indexed,
+ &AArch64::FPR128RegClass, MRI);
+ break;
+ }
+ case MachineCombinerPattern::FMULv8i16_indexed_OP1:
+ case MachineCombinerPattern::FMULv8i16_indexed_OP2: {
+ unsigned IdxDupOp =
+ (Pattern == MachineCombinerPattern::FMULv8i16_indexed_OP1) ? 1 : 2;
+ genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv8i16_indexed,
+ &AArch64::FPR128_loRegClass, MRI);
+ break;
+ }
} // end switch (Pattern)
// Record MUL and ADD/SUB for deletion
// FIXME: This assertion fails in CodeGen/AArch64/tailmerging_in_mbp.ll and
// CodeGen/AArch64/urem-seteq-nonzero.ll.
// assert(MUL && "MUL was never set");
- DelInstrs.push_back(MUL);
+ if (MUL)
+ DelInstrs.push_back(MUL);
DelInstrs.push_back(&Root);
}
diff --git a/llvm/test/CodeGen/AArch64/arm64-fma-combines.ll b/llvm/test/CodeGen/AArch64/arm64-fma-combines.ll
index 95ef0f90d231..d83da9db44b6 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fma-combines.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fma-combines.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -O=3 -mtriple=arm64-apple-ios -mcpu=cyclone -enable-unsafe-fp-math | FileCheck %s
+; RUN: llc < %s -O=3 -mtriple=arm64-apple-ios -mcpu=cyclone -mattr=+fullfp16 -enable-unsafe-fp-math -verify-machineinstrs | FileCheck %s
+
define void @foo_2d(double* %src) {
; CHECK-LABEL: %entry
; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
@@ -134,3 +135,128 @@ for.body: ; preds = %for.body, %entry
for.end: ; preds = %for.body
ret void
}
+
+define void @indexed_2s(<2 x float> %shuf, <2 x float> %add,
+ <2 x float>* %pmul, <2 x float>* %pret) {
+; CHECK-LABEL: %entry
+; CHECK: for.body
+; CHECK: fmla.2s {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}[0]
+;
+entry:
+ %shuffle = shufflevector <2 x float> %shuf, <2 x float> undef, <2 x i32> zeroinitializer
+ br label %for.body
+
+for.body:
+ %i = phi i64 [ 0, %entry ], [ %inext, %for.body ]
+ %pmul_i = getelementptr inbounds <2 x float>, <2 x float>* %pmul, i64 %i
+ %pret_i = getelementptr inbounds <2 x float>, <2 x float>* %pret, i64 %i
+
+ %mul_i = load <2 x float>, <2 x float>* %pmul_i
+
+ %mul = fmul fast <2 x float> %mul_i, %shuffle
+ %muladd = fadd fast <2 x float> %mul, %add
+
+ store <2 x float> %muladd, <2 x float>* %pret_i, align 16
+ %inext = add i64 %i, 1
+ br label %for.body
+}
+
+define void @indexed_2d(<2 x double> %shuf, <2 x double> %add,
+ <2 x double>* %pmul, <2 x double>* %pret) {
+; CHECK-LABEL: %entry
+; CHECK: for.body
+; CHECK: fmla.2d {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}[0]
+;
+entry:
+ %shuffle = shufflevector <2 x double> %shuf, <2 x double> undef, <2 x i32> zeroinitializer
+ br label %for.body
+
+for.body:
+ %i = phi i64 [ 0, %entry ], [ %inext, %for.body ]
+ %pmul_i = getelementptr inbounds <2 x double>, <2 x double>* %pmul, i64 %i
+ %pret_i = getelementptr inbounds <2 x double>, <2 x double>* %pret, i64 %i
+
+ %mul_i = load <2 x double>, <2 x double>* %pmul_i
+
+ %mul = fmul fast <2 x double> %mul_i, %shuffle
+ %muladd = fadd fast <2 x double> %mul, %add
+
+ store <2 x double> %muladd, <2 x double>* %pret_i, align 16
+ %inext = add i64 %i, 1
+ br label %for.body
+}
+
+define void @indexed_4s(<4 x float> %shuf, <4 x float> %add,
+ <4 x float>* %pmul, <4 x float>* %pret) {
+; CHECK-LABEL: %entry
+; CHECK: for.body
+; CHECK: fmla.4s {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}[0]
+;
+entry:
+ %shuffle = shufflevector <4 x float> %shuf, <4 x float> undef, <4 x i32> zeroinitializer
+ br label %for.body
+
+for.body:
+ %i = phi i64 [ 0, %entry ], [ %inext, %for.body ]
+ %pmul_i = getelementptr inbounds <4 x float>, <4 x float>* %pmul, i64 %i
+ %pret_i = getelementptr inbounds <4 x float>, <4 x float>* %pret, i64 %i
+
+ %mul_i = load <4 x float>, <4 x float>* %pmul_i
+
+ %mul = fmul fast <4 x float> %mul_i, %shuffle
+ %muladd = fadd fast <4 x float> %mul, %add
+
+ store <4 x float> %muladd, <4 x float>* %pret_i, align 16
+ %inext = add i64 %i, 1
+ br label %for.body
+}
+
+define void @indexed_4h(<4 x half> %shuf, <4 x half> %add,
+ <4 x half>* %pmul, <4 x half>* %pret) {
+; CHECK-LABEL: %entry
+; CHECK: for.body
+; CHECK: fmla.4h {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}[0]
+;
+entry:
+ %shuffle = shufflevector <4 x half> %shuf, <4 x half> undef, <4 x i32> zeroinitializer
+ br label %for.body
+
+for.body:
+ %i = phi i64 [ 0, %entry ], [ %inext, %for.body ]
+ %pmul_i = getelementptr inbounds <4 x half>, <4 x half>* %pmul, i64 %i
+ %pret_i = getelementptr inbounds <4 x half>, <4 x half>* %pret, i64 %i
+
+ %mul_i = load <4 x half>, <4 x half>* %pmul_i
+
+ %mul = fmul fast <4 x half> %mul_i, %shuffle
+ %muladd = fadd fast <4 x half> %mul, %add
+
+ store <4 x half> %muladd, <4 x half>* %pret_i, align 16
+ %inext = add i64 %i, 1
+ br label %for.body
+}
+
+define void @indexed_8h(<8 x half> %shuf, <8 x half> %add,
+ <8 x half>* %pmul, <8 x half>* %pret) {
+; CHECK-LABEL: %entry
+; CHECK: for.body
+; CHECK: fmla.8h {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}[0]
+;
+entry:
+ %shuffle = shufflevector <8 x half> %shuf, <8 x half> undef, <8 x i32> zeroinitializer
+ br label %for.body
+
+for.body:
+ %i = phi i64 [ 0, %entry ], [ %inext, %for.body ]
+ %pmul_i = getelementptr inbounds <8 x half>, <8 x half>* %pmul, i64 %i
+ %pret_i = getelementptr inbounds <8 x half>, <8 x half>* %pret, i64 %i
+
+ %mul_i = load <8 x half>, <8 x half>* %pmul_i
+
+ %mul = fmul fast <8 x half> %mul_i, %shuffle
+ %muladd = fadd fast <8 x half> %mul, %add
+
+ store <8 x half> %muladd, <8 x half>* %pret_i, align 16
+ %inext = add i64 %i, 1
+ br label %for.body
+}
diff --git a/llvm/test/CodeGen/AArch64/machine-combiner-fmul-dup.mir b/llvm/test/CodeGen/AArch64/machine-combiner-fmul-dup.mir
new file mode 100644
index 000000000000..4de93fca36f1
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/machine-combiner-fmul-dup.mir
@@ -0,0 +1,547 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -run-pass=machine-combiner -o - -simplify-mir -mtriple=aarch64-unknown-linux-gnu -mattr=+fullfp16 -verify-machineinstrs %s | FileCheck %s
+--- |
+ ; ModuleID = 'lit.ll'
+ source_filename = "lit.ll"
+ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+ target triple = "aarch64-unknown-linux-gnu"
+
+ define void @indexed_2s(<2 x float> %shuf, <2 x float> %mu, <2 x float> %ad, <2 x float>* %ret) #0 {
+ entry:
+ %shuffle = shufflevector <2 x float> %shuf, <2 x float> undef, <2 x i32> zeroinitializer
+ br label %for.cond
+
+ for.cond: ; preds = %for.cond, %entry
+ %mul = fmul <2 x float> %mu, %shuffle
+ %add = fadd <2 x float> %mul, %ad
+ store <2 x float> %add, <2 x float>* %ret, align 16
+ br label %for.cond
+ }
+
+ define void @indexed_2s_rev(<2 x float> %shuf, <2 x float> %mu, <2 x float> %ad, <2 x float>* %ret) #0 {
+ entry:
+ %shuffle = shufflevector <2 x float> %shuf, <2 x float> undef, <2 x i32> zeroinitializer
+ br label %for.cond
+
+ for.cond: ; preds = %for.cond, %entry
+ %mul = fmul <2 x float> %shuffle, %mu
+ %add = fadd <2 x float> %mul, %ad
+ store <2 x float> %add, <2 x float>* %ret, align 16
+ br label %for.cond
+ }
+
+ define void @indexed_2d(<2 x double> %shuf, <2 x double> %mu, <2 x double> %ad, <2 x double>* %ret) #0 {
+ entry:
+ %shuffle = shufflevector <2 x double> %shuf, <2 x double> undef, <2 x i32> zeroinitializer
+ br label %for.cond
+
+ for.cond: ; preds = %for.cond, %entry
+ %mul = fmul <2 x double> %mu, %shuffle
+ %add = fadd <2 x double> %mul, %ad
+ store <2 x double> %add, <2 x double>* %ret, align 16
+ br label %for.cond
+ }
+
+ define void @indexed_4s(<4 x float> %shuf, <4 x float> %mu, <4 x float> %ad, <4 x float>* %ret) #0 {
+ entry:
+ %shuffle = shufflevector <4 x float> %shuf, <4 x float> undef, <4 x i32> zeroinitializer
+ br label %for.cond
+
+ for.cond: ; preds = %for.cond, %entry
+ %mul = fmul <4 x float> %mu, %shuffle
+ %add = fadd <4 x float> %mul, %ad
+ store <4 x float> %add, <4 x float>* %ret, align 16
+ br label %for.cond
+ }
+
+ define void @indexed_4h(<4 x half> %shuf, <4 x half> %mu, <4 x half> %ad, <4 x half>* %ret) #0 {
+ entry:
+ %shuffle = shufflevector <4 x half> %shuf, <4 x half> undef, <4 x i32> zeroinitializer
+ br label %for.cond
+
+ for.cond:
+ %mul = fmul <4 x half> %mu, %shuffle
+ %add = fadd <4 x half> %mul, %ad
+ store <4 x half> %add, <4 x half>* %ret, align 16
+ br label %for.cond
+ }
+
+ define void @indexed_8h(<8 x half> %shuf, <8 x half> %mu, <8 x half> %ad, <8 x half>* %ret) #0 {
+ entry:
+ %shuffle = shufflevector <8 x half> %shuf, <8 x half> undef, <8 x i32> zeroinitializer
+ br label %for.cond
+
+ for.cond:
+ %mul = fmul <8 x half> %mu, %shuffle
+ %add = fadd <8 x half> %mul, %ad
+ store <8 x half> %add, <8 x half>* %ret, align 16
+ br label %for.cond
+ }
+
+ define void @kill_state(<2 x float> %shuf, <2 x float> %mu, <2 x float> %ad,
+ <2 x float>* %ret, <2 x float>* %ret2, float %f) #0 {
+ entry:
+ %zero_elem = extractelement <2 x float> %shuf, i32 0
+ %ins = insertelement <2 x float> undef, float %zero_elem, i32 0
+ %shuffle = shufflevector <2 x float> %ins, <2 x float> undef, <2 x i32> zeroinitializer
+ %ins2 = insertelement <2 x float> %ins, float %f, i32 1
+ store <2 x float> %ins2, <2 x float>* %ret2, align 8
+ br label %for.cond
+
+ for.cond: ; preds = %for.cond, %entry
+ %mul = fmul <2 x float> %mu, %shuffle
+ %add = fadd <2 x float> %mul, %ad
+ store <2 x float> %add, <2 x float>* %ret, align 16
+ br label %for.cond
+ }
+
+ attributes #0 = { "target-cpu"="cortex-a57" }
+
+...
+---
+name: indexed_2s
+alignment: 16
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: fpr64 }
+ - { id: 1, class: fpr64 }
+ - { id: 2, class: fpr64 }
+ - { id: 3, class: fpr64 }
+ - { id: 4, class: gpr64common }
+ - { id: 5, class: fpr64 }
+ - { id: 6, class: fpr64 }
+ - { id: 7, class: fpr128 }
+ - { id: 8, class: fpr128 }
+ - { id: 9, class: fpr64 }
+ - { id: 10, class: fpr64 }
+liveins:
+ - { reg: '$d0', virtual-reg: '%1' }
+ - { reg: '$d1', virtual-reg: '%2' }
+ - { reg: '$d2', virtual-reg: '%3' }
+ - { reg: '$x0', virtual-reg: '%4' }
+frameInfo:
+ maxAlignment: 1
+ maxCallFrameSize: 0
+machineFunctionInfo: {}
+body: |
+ ; CHECK-LABEL: name: indexed_2s
+ ; CHECK: bb.0.entry:
+ ; CHECK-NEXT: liveins: $d0, $d1, $d2, $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64common = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr64 = COPY $d2
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:fpr64 = COPY $d1
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:fpr64 = COPY $d0
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[COPY3]], %subreg.dsub
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:fpr64 = COPY [[COPY1]]
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:fpr64 = COPY [[COPY2]]
+ ; CHECK-NEXT: [[DUPv2i32lane:%[0-9]+]]:fpr64 = DUPv2i32lane [[INSERT_SUBREG]], 0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1.for.cond:
+ ; CHECK-NEXT: [[FMULv2i32_indexed:%[0-9]+]]:fpr64 = FMULv2i32_indexed [[COPY5]], [[INSERT_SUBREG]], 0
+ ; CHECK-NEXT: [[FADDv2f32_:%[0-9]+]]:fpr64 = FADDv2f32 killed [[FMULv2i32_indexed]], [[COPY4]]
+ ; CHECK-NEXT: STRDui killed [[FADDv2f32_]], [[COPY]], 0 :: (store (s64) into %ir.ret, align 16)
+ ; CHECK-NEXT: B %bb.1
+ bb.0.entry:
+ liveins: $d0, $d1, $d2, $x0
+
+ %4:gpr64common = COPY $x0
+ %3:fpr64 = COPY $d2
+ %2:fpr64 = COPY $d1
+ %1:fpr64 = COPY $d0
+ %8:fpr128 = IMPLICIT_DEF
+ %7:fpr128 = INSERT_SUBREG %8, %1, %subreg.dsub
+ %6:fpr64 = COPY %3
+ %5:fpr64 = COPY %2
+ %0:fpr64 = DUPv2i32lane killed %7, 0
+
+ bb.1.for.cond:
+ %9:fpr64 = FMULv2f32 %5, %0
+ %10:fpr64 = FADDv2f32 killed %9, %6
+ STRDui killed %10, %4, 0 :: (store 8 into %ir.ret, align 16)
+ B %bb.1
+
+...
+---
+name: indexed_2s_rev
+alignment: 16
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: fpr64 }
+ - { id: 1, class: fpr64 }
+ - { id: 2, class: fpr64 }
+ - { id: 3, class: fpr64 }
+ - { id: 4, class: gpr64common }
+ - { id: 5, class: fpr64 }
+ - { id: 6, class: fpr64 }
+ - { id: 7, class: fpr128 }
+ - { id: 8, class: fpr128 }
+ - { id: 9, class: fpr64 }
+ - { id: 10, class: fpr64 }
+liveins:
+ - { reg: '$d0', virtual-reg: '%1' }
+ - { reg: '$d1', virtual-reg: '%2' }
+ - { reg: '$d2', virtual-reg: '%3' }
+ - { reg: '$x0', virtual-reg: '%4' }
+frameInfo:
+ maxAlignment: 1
+ maxCallFrameSize: 0
+machineFunctionInfo: {}
+body: |
+ ; CHECK-LABEL: name: indexed_2s_rev
+ ; CHECK: bb.0.entry:
+ ; CHECK-NEXT: liveins: $d0, $d1, $d2, $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64common = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr64 = COPY $d2
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:fpr64 = COPY $d1
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:fpr64 = COPY $d0
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[COPY3]], %subreg.dsub
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:fpr64 = COPY [[COPY1]]
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:fpr64 = COPY [[COPY2]]
+ ; CHECK-NEXT: [[DUPv2i32lane:%[0-9]+]]:fpr64 = DUPv2i32lane [[INSERT_SUBREG]], 0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1.for.cond:
+ ; CHECK-NEXT: [[FMULv2i32_indexed:%[0-9]+]]:fpr64 = FMULv2i32_indexed [[COPY5]], [[INSERT_SUBREG]], 0
+ ; CHECK-NEXT: [[FADDv2f32_:%[0-9]+]]:fpr64 = FADDv2f32 killed [[FMULv2i32_indexed]], [[COPY4]]
+ ; CHECK-NEXT: STRDui killed [[FADDv2f32_]], [[COPY]], 0 :: (store (s64) into %ir.ret, align 16)
+ ; CHECK-NEXT: B %bb.1
+ bb.0.entry:
+ liveins: $d0, $d1, $d2, $x0
+
+ %4:gpr64common = COPY $x0
+ %3:fpr64 = COPY $d2
+ %2:fpr64 = COPY $d1
+ %1:fpr64 = COPY $d0
+ %8:fpr128 = IMPLICIT_DEF
+ %7:fpr128 = INSERT_SUBREG %8, %1, %subreg.dsub
+ %6:fpr64 = COPY %3
+ %5:fpr64 = COPY %2
+ %0:fpr64 = DUPv2i32lane killed %7, 0
+
+ bb.1.for.cond:
+ %9:fpr64 = FMULv2f32 %0, %5
+ %10:fpr64 = FADDv2f32 killed %9, %6
+ STRDui killed %10, %4, 0 :: (store 8 into %ir.ret, align 16)
+ B %bb.1
+
+...
+---
+name: indexed_2d
+alignment: 16
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: fpr128 }
+ - { id: 1, class: fpr128 }
+ - { id: 2, class: fpr128 }
+ - { id: 3, class: fpr128 }
+ - { id: 4, class: gpr64common }
+ - { id: 5, class: fpr128 }
+ - { id: 6, class: fpr128 }
+ - { id: 7, class: fpr128 }
+ - { id: 8, class: fpr128 }
+liveins:
+ - { reg: '$q0', virtual-reg: '%1' }
+ - { reg: '$q1', virtual-reg: '%2' }
+ - { reg: '$q2', virtual-reg: '%3' }
+ - { reg: '$x0', virtual-reg: '%4' }
+frameInfo:
+ maxAlignment: 1
+ maxCallFrameSize: 0
+machineFunctionInfo: {}
+body: |
+ ; CHECK-LABEL: name: indexed_2d
+ ; CHECK: bb.0.entry:
+ ; CHECK-NEXT: liveins: $q0, $q1, $q2, $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64common = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr128 = COPY $q2
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:fpr128 = COPY $q1
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:fpr128 = COPY $q0
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:fpr128 = COPY [[COPY1]]
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:fpr128 = COPY [[COPY2]]
+ ; CHECK-NEXT: [[DUPv2i64lane:%[0-9]+]]:fpr128 = DUPv2i64lane [[COPY3]], 0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1.for.cond:
+ ; CHECK-NEXT: [[FMULv2i64_indexed:%[0-9]+]]:fpr128 = FMULv2i64_indexed [[COPY5]], [[COPY3]], 0
+ ; CHECK-NEXT: [[FADDv2f64_:%[0-9]+]]:fpr128 = FADDv2f64 killed [[FMULv2i64_indexed]], [[COPY4]]
+ ; CHECK-NEXT: STRQui killed [[FADDv2f64_]], [[COPY]], 0 :: (store (s128) into %ir.ret)
+ ; CHECK-NEXT: B %bb.1
+ bb.0.entry:
+ liveins: $q0, $q1, $q2, $x0
+
+ %4:gpr64common = COPY $x0
+ %3:fpr128 = COPY $q2
+ %2:fpr128 = COPY $q1
+ %1:fpr128 = COPY $q0
+ %6:fpr128 = COPY %3
+ %5:fpr128 = COPY %2
+ %0:fpr128 = DUPv2i64lane %1, 0
+
+ bb.1.for.cond:
+ %7:fpr128 = FMULv2f64 %5, %0
+ %8:fpr128 = FADDv2f64 killed %7, %6
+ STRQui killed %8, %4, 0 :: (store 16 into %ir.ret)
+ B %bb.1
+
+...
+---
+name: indexed_4s
+alignment: 16
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: fpr128 }
+ - { id: 1, class: fpr128 }
+ - { id: 2, class: fpr128 }
+ - { id: 3, class: fpr128 }
+ - { id: 4, class: gpr64common }
+ - { id: 5, class: fpr128 }
+ - { id: 6, class: fpr128 }
+ - { id: 7, class: fpr128 }
+ - { id: 8, class: fpr128 }
+liveins:
+ - { reg: '$q0', virtual-reg: '%1' }
+ - { reg: '$q1', virtual-reg: '%2' }
+ - { reg: '$q2', virtual-reg: '%3' }
+ - { reg: '$x0', virtual-reg: '%4' }
+frameInfo:
+ maxAlignment: 1
+ maxCallFrameSize: 0
+machineFunctionInfo: {}
+body: |
+ ; CHECK-LABEL: name: indexed_4s
+ ; CHECK: bb.0.entry:
+ ; CHECK-NEXT: liveins: $q0, $q1, $q2, $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64common = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr128 = COPY $q2
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:fpr128 = COPY $q1
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:fpr128 = COPY $q0
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:fpr128 = COPY [[COPY1]]
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:fpr128 = COPY [[COPY2]]
+ ; CHECK-NEXT: [[DUPv4i32lane:%[0-9]+]]:fpr128 = DUPv4i32lane [[COPY3]], 0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1.for.cond:
+ ; CHECK-NEXT: [[FMULv4i32_indexed:%[0-9]+]]:fpr128 = FMULv4i32_indexed [[COPY5]], [[COPY3]], 0
+ ; CHECK-NEXT: [[FADDv4f32_:%[0-9]+]]:fpr128 = FADDv4f32 killed [[FMULv4i32_indexed]], [[COPY4]]
+ ; CHECK-NEXT: STRQui killed [[FADDv4f32_]], [[COPY]], 0 :: (store (s128) into %ir.ret)
+ ; CHECK-NEXT: B %bb.1
+ bb.0.entry:
+ liveins: $q0, $q1, $q2, $x0
+
+ %4:gpr64common = COPY $x0
+ %3:fpr128 = COPY $q2
+ %2:fpr128 = COPY $q1
+ %1:fpr128 = COPY $q0
+ %6:fpr128 = COPY %3
+ %5:fpr128 = COPY %2
+ %0:fpr128 = DUPv4i32lane %1, 0
+
+ bb.1.for.cond:
+ %7:fpr128 = FMULv4f32 %5, %0
+ %8:fpr128 = FADDv4f32 killed %7, %6
+ STRQui killed %8, %4, 0 :: (store 16 into %ir.ret)
+ B %bb.1
+
+...
+---
+name: indexed_4h
+alignment: 16
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: fpr64 }
+ - { id: 1, class: fpr64 }
+ - { id: 2, class: fpr64 }
+ - { id: 3, class: fpr64 }
+ - { id: 4, class: gpr64common }
+ - { id: 5, class: fpr128 }
+ - { id: 6, class: fpr128 }
+ - { id: 7, class: fpr64 }
+ - { id: 8, class: fpr64 }
+liveins:
+ - { reg: '$d0', virtual-reg: '%1' }
+ - { reg: '$d1', virtual-reg: '%2' }
+ - { reg: '$d2', virtual-reg: '%3' }
+ - { reg: '$x0', virtual-reg: '%4' }
+frameInfo:
+ maxAlignment: 1
+ maxCallFrameSize: 0
+machineFunctionInfo: {}
+body: |
+ ; CHECK-LABEL: name: indexed_4h
+ ; CHECK: bb.0.entry:
+ ; CHECK-NEXT: liveins: $d0, $d1, $d2, $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64common = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr64 = COPY $d2
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:fpr64 = COPY $d1
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:fpr64 = COPY $d0
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:fpr128_lo = INSERT_SUBREG [[DEF]], [[COPY3]], %subreg.dsub
+ ; CHECK-NEXT: [[DUPv4i16lane:%[0-9]+]]:fpr64 = DUPv4i16lane [[INSERT_SUBREG]], 0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1.for.cond:
+ ; CHECK-NEXT: [[FMULv4i16_indexed:%[0-9]+]]:fpr64 = FMULv4i16_indexed [[COPY2]], [[INSERT_SUBREG]], 0
+ ; CHECK-NEXT: [[FADDv4f16_:%[0-9]+]]:fpr64 = FADDv4f16 killed [[FMULv4i16_indexed]], [[COPY1]]
+ ; CHECK-NEXT: STRDui killed [[FADDv4f16_]], [[COPY]], 0 :: (store (s64) into %ir.ret, align 16)
+ ; CHECK-NEXT: B %bb.1
+ bb.0.entry:
+ liveins: $d0, $d1, $d2, $x0
+
+ %4:gpr64common = COPY $x0
+ %3:fpr64 = COPY $d2
+ %2:fpr64 = COPY $d1
+ %1:fpr64 = COPY $d0
+ %6:fpr128 = IMPLICIT_DEF
+ %5:fpr128 = INSERT_SUBREG %6, %1, %subreg.dsub
+ %0:fpr64 = DUPv4i16lane killed %5, 0
+
+ bb.1.for.cond:
+ %7:fpr64 = FMULv4f16 %2, %0
+ %8:fpr64 = FADDv4f16 killed %7, %3
+ STRDui killed %8, %4, 0 :: (store 8 into %ir.ret, align 16)
+ B %bb.1
+
+...
+---
+name: indexed_8h
+alignment: 16
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: fpr128 }
+ - { id: 1, class: fpr128 }
+ - { id: 2, class: fpr128 }
+ - { id: 3, class: fpr128 }
+ - { id: 4, class: gpr64common }
+ - { id: 5, class: fpr128 }
+ - { id: 6, class: fpr128 }
+liveins:
+ - { reg: '$q0', virtual-reg: '%1' }
+ - { reg: '$q1', virtual-reg: '%2' }
+ - { reg: '$q2', virtual-reg: '%3' }
+ - { reg: '$x0', virtual-reg: '%4' }
+frameInfo:
+ maxAlignment: 1
+ maxCallFrameSize: 0
+machineFunctionInfo: {}
+body: |
+ ; CHECK-LABEL: name: indexed_8h
+ ; CHECK: bb.0.entry:
+ ; CHECK-NEXT: liveins: $q0, $q1, $q2, $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64common = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr128 = COPY $q2
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:fpr128 = COPY $q1
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:fpr128_lo = COPY $q0
+ ; CHECK-NEXT: [[DUPv8i16lane:%[0-9]+]]:fpr128 = DUPv8i16lane [[COPY3]], 0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1.for.cond:
+ ; CHECK-NEXT: [[FMULv8i16_indexed:%[0-9]+]]:fpr128 = FMULv8i16_indexed [[COPY2]], [[COPY3]], 0
+ ; CHECK-NEXT: [[FADDv8f16_:%[0-9]+]]:fpr128 = FADDv8f16 killed [[FMULv8i16_indexed]], [[COPY1]]
+ ; CHECK-NEXT: STRQui killed [[FADDv8f16_]], [[COPY]], 0 :: (store (s128) into %ir.ret)
+ ; CHECK-NEXT: B %bb.1
+ bb.0.entry:
+ liveins: $q0, $q1, $q2, $x0
+
+ %4:gpr64common = COPY $x0
+ %3:fpr128 = COPY $q2
+ %2:fpr128 = COPY $q1
+ %1:fpr128 = COPY $q0
+ %0:fpr128 = DUPv8i16lane %1, 0
+
+ bb.1.for.cond:
+ %5:fpr128 = FMULv8f16 %2, %0
+ %6:fpr128 = FADDv8f16 killed %5, %3
+ STRQui killed %6, %4, 0 :: (store 16 into %ir.ret)
+ B %bb.1
+
+...
+---
+name: kill_state
+alignment: 16
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: fpr64 }
+ - { id: 1, class: fpr64 }
+ - { id: 2, class: fpr64 }
+ - { id: 3, class: fpr64 }
+ - { id: 4, class: gpr64common }
+ - { id: 5, class: gpr64common }
+ - { id: 6, class: fpr32 }
+ - { id: 7, class: fpr64 }
+ - { id: 8, class: fpr64 }
+ - { id: 9, class: fpr128 }
+ - { id: 10, class: fpr128 }
+ - { id: 11, class: fpr128 }
+ - { id: 12, class: fpr128 }
+ - { id: 13, class: fpr128 }
+ - { id: 14, class: fpr64 }
+ - { id: 15, class: fpr64 }
+ - { id: 16, class: fpr64 }
+liveins:
+ - { reg: '$d0', virtual-reg: '%1' }
+ - { reg: '$d1', virtual-reg: '%2' }
+ - { reg: '$d2', virtual-reg: '%3' }
+ - { reg: '$x0', virtual-reg: '%4' }
+ - { reg: '$x1', virtual-reg: '%5' }
+ - { reg: '$s3', virtual-reg: '%6' }
+frameInfo:
+ maxAlignment: 1
+ maxCallFrameSize: 0
+machineFunctionInfo: {}
+body: |
+ ; CHECK-LABEL: name: kill_state
+ ; CHECK: bb.0.entry:
+ ; CHECK-NEXT: liveins: $d0, $d1, $d2, $x0, $x1, $s3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr32 = COPY $s3
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY $x1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64common = COPY $x0
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:fpr64 = COPY $d2
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:fpr64 = COPY $d1
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:fpr64 = COPY $d0
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[COPY5]], %subreg.dsub
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:fpr64 = COPY [[COPY3]]
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:fpr64 = COPY [[COPY4]]
+ ; CHECK-NEXT: [[DUPv2i32lane:%[0-9]+]]:fpr64 = DUPv2i32lane [[INSERT_SUBREG]], 0
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:fpr128 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF1]], [[COPY]], %subreg.ssub
+ ; CHECK-NEXT: [[INSvi32lane:%[0-9]+]]:fpr128 = INSvi32lane [[INSERT_SUBREG]], 1, killed [[INSERT_SUBREG1]], 0
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:fpr64 = COPY [[INSvi32lane]].dsub
+ ; CHECK-NEXT: STRDui killed [[COPY8]], [[COPY1]], 0 :: (store (s64) into %ir.ret2)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1.for.cond:
+ ; CHECK-NEXT: [[FMULv2i32_indexed:%[0-9]+]]:fpr64 = FMULv2i32_indexed [[COPY7]], [[INSERT_SUBREG]], 0
+ ; CHECK-NEXT: [[FADDv2f32_:%[0-9]+]]:fpr64 = FADDv2f32 killed [[FMULv2i32_indexed]], [[COPY6]]
+ ; CHECK-NEXT: STRDui killed [[FADDv2f32_]], [[COPY2]], 0 :: (store (s64) into %ir.ret, align 16)
+ ; CHECK-NEXT: B %bb.1
+ bb.0.entry:
+ liveins: $d0, $d1, $d2, $x0, $x1, $s3
+
+ %6:fpr32 = COPY $s3
+ %5:gpr64common = COPY $x1
+ %4:gpr64common = COPY $x0
+ %3:fpr64 = COPY $d2
+ %2:fpr64 = COPY $d1
+ %1:fpr64 = COPY $d0
+ %10:fpr128 = IMPLICIT_DEF
+ %9:fpr128 = INSERT_SUBREG %10, %1, %subreg.dsub
+ %8:fpr64 = COPY %3
+ %7:fpr64 = COPY %2
+ %0:fpr64 = DUPv2i32lane %9, 0
+ %12:fpr128 = IMPLICIT_DEF
+ %11:fpr128 = INSERT_SUBREG %12, %6, %subreg.ssub
+ %13:fpr128 = INSvi32lane killed %9, 1, killed %11, 0
+ %14:fpr64 = COPY %13.dsub
+ STRDui killed %14, %5, 0 :: (store (s64) into %ir.ret2)
+
+ bb.1.for.cond:
+ %15:fpr64 = FMULv2f32 %7, %0
+ %16:fpr64 = FADDv2f32 killed %15, %8
+ STRDui killed %16, %4, 0 :: (store (s64) into %ir.ret, align 16)
+ B %bb.1
+
+...
More information about the llvm-commits
mailing list