[llvm] r274686 - AArch64: Change modeling of zero cycle zeroing.
Matthias Braun via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 6 14:39:34 PDT 2016
Author: matze
Date: Wed Jul 6 16:39:33 2016
New Revision: 274686
URL: http://llvm.org/viewvc/llvm-project?rev=274686&view=rev
Log:
AArch64: Change modeling of zero cycle zeroing.
On CPUs with the zero cycle zeroing feature enabled "movi v.2d" should
be used to zero a vector register. This was previously done at
instruction selection time, however the register coalescer sometimes
widened multiple vregs to the Q width because of that leading to extra
spills. This patch leaves the decision on how to zero a register to the
AsmPrinter phase where it doesn't affect register allocation anymore.
This patch also sets isAsCheapAsAMove=1 on FMOVS0, FMOVD0.
This fixes http://llvm.org/PR27454, rdar://25866262
Differential Revision: http://reviews.llvm.org/D21826
Modified:
llvm/trunk/lib/Target/AArch64/AArch64AsmPrinter.cpp
llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td
llvm/trunk/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll
llvm/trunk/test/CodeGen/AArch64/fp-cond-sel.ll
Modified: llvm/trunk/lib/Target/AArch64/AArch64AsmPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64AsmPrinter.cpp?rev=274686&r1=274685&r2=274686&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64AsmPrinter.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64AsmPrinter.cpp Wed Jul 6 16:39:33 2016
@@ -49,6 +49,7 @@ namespace {
class AArch64AsmPrinter : public AsmPrinter {
AArch64MCInstLower MCInstLowering;
StackMaps SM;
+ const AArch64Subtarget *STI;
public:
AArch64AsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer)
@@ -83,6 +84,7 @@ public:
bool runOnMachineFunction(MachineFunction &F) override {
AArch64FI = F.getInfo<AArch64FunctionInfo>();
+ STI = static_cast<const AArch64Subtarget*>(&F.getSubtarget());
return AsmPrinter::runOnMachineFunction(F);
}
@@ -111,6 +113,9 @@ private:
/// \brief Emit the LOHs contained in AArch64FI.
void EmitLOHs();
+ /// Emit instruction to set float register to zero.
+ void EmitFMov0(const MachineInstr &MI);
+
typedef std::map<const MachineInstr *, MCSymbol *> MInstToMCSymbol;
MInstToMCSymbol LOHInstToLabel;
};
@@ -224,8 +229,7 @@ bool AArch64AsmPrinter::printAsmRegInCla
const TargetRegisterClass *RC,
bool isVector, raw_ostream &O) {
assert(MO.isReg() && "Should only get here with a register!");
- const AArch64RegisterInfo *RI =
- MF->getSubtarget<AArch64Subtarget>().getRegisterInfo();
+ const TargetRegisterInfo *RI = STI->getRegisterInfo();
unsigned Reg = MO.getReg();
unsigned RegToPrint = RC->getRegister(RI->getEncodingValue(Reg));
assert(RI->regsOverlap(RegToPrint, Reg));
@@ -416,6 +420,40 @@ void AArch64AsmPrinter::LowerPATCHPOINT(
EmitToStreamer(OutStreamer, MCInstBuilder(AArch64::HINT).addImm(0));
}
+void AArch64AsmPrinter::EmitFMov0(const MachineInstr &MI) {
+ unsigned DestReg = MI.getOperand(0).getReg();
+ if (STI->hasZeroCycleZeroing()) {
+ // Convert S/D register to corresponding Q register
+ if (AArch64::S0 <= DestReg && DestReg <= AArch64::S31) {
+ DestReg = AArch64::Q0 + (DestReg - AArch64::S0);
+ } else {
+ assert(AArch64::D0 <= DestReg && DestReg <= AArch64::D31);
+ DestReg = AArch64::Q0 + (DestReg - AArch64::D0);
+ }
+ MCInst MOVI;
+ MOVI.setOpcode(AArch64::MOVIv2d_ns);
+ MOVI.addOperand(MCOperand::createReg(DestReg));
+ MOVI.addOperand(MCOperand::createImm(0));
+ EmitToStreamer(*OutStreamer, MOVI);
+ } else {
+ MCInst FMov;
+ switch (MI.getOpcode()) {
+ default: llvm_unreachable("Unexpected opcode");
+ case AArch64::FMOVS0:
+ FMov.setOpcode(AArch64::FMOVWSr);
+ FMov.addOperand(MCOperand::createReg(DestReg));
+ FMov.addOperand(MCOperand::createReg(AArch64::WZR));
+ break;
+ case AArch64::FMOVD0:
+ FMov.setOpcode(AArch64::FMOVXDr);
+ FMov.addOperand(MCOperand::createReg(DestReg));
+ FMov.addOperand(MCOperand::createReg(AArch64::XZR));
+ break;
+ }
+ EmitToStreamer(*OutStreamer, FMov);
+ }
+}
+
// Simple pseudo-instructions have their lowering (with expansion to real
// instructions) auto-generated.
#include "AArch64GenMCPseudoLowering.inc"
@@ -521,6 +559,11 @@ void AArch64AsmPrinter::EmitInstruction(
return;
}
+ case AArch64::FMOVS0:
+ case AArch64::FMOVD0:
+ EmitFMov0(*MI);
+ return;
+
case TargetOpcode::STACKMAP:
return LowerSTACKMAP(*OutStreamer, SM, *MI);
Modified: llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td?rev=274686&r1=274685&r2=274686&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td Wed Jul 6 16:39:33 2016
@@ -301,9 +301,6 @@ def AArch64umaxv : SDNode<"AArch64ISD
//===----------------------------------------------------------------------===//
// AArch64 Instruction Predicate Definitions.
-//
-def HasZCZ : Predicate<"Subtarget->hasZeroCycleZeroing()">;
-def NoZCZ : Predicate<"!Subtarget->hasZeroCycleZeroing()">;
def IsDarwin : Predicate<"Subtarget->isTargetDarwin()">;
def IsNotDarwin: Predicate<"!Subtarget->isTargetDarwin()">;
def ForCodeSize : Predicate<"ForCodeSize">;
@@ -2565,15 +2562,11 @@ defm UCVTF : IntegerToFP<1, "ucvtf", uin
defm FMOV : UnscaledConversion<"fmov">;
// Add pseudo ops for FMOV 0 so we can mark them as isReMaterializable
-let isReMaterializable = 1, isCodeGenOnly = 1 in {
+let isReMaterializable = 1, isCodeGenOnly = 1, isAsCheapAsAMove = 1 in {
def FMOVS0 : Pseudo<(outs FPR32:$Rd), (ins), [(set f32:$Rd, (fpimm0))]>,
- PseudoInstExpansion<(FMOVWSr FPR32:$Rd, WZR)>,
- Sched<[WriteF]>,
- Requires<[NoZCZ]>;
+ Sched<[WriteF]>;
def FMOVD0 : Pseudo<(outs FPR64:$Rd), (ins), [(set f64:$Rd, (fpimm0))]>,
- PseudoInstExpansion<(FMOVXDr FPR64:$Rd, XZR)>,
- Sched<[WriteF]>,
- Requires<[NoZCZ]>;
+ Sched<[WriteF]>;
}
//===----------------------------------------------------------------------===//
@@ -4435,18 +4428,6 @@ def MOVIv2d_ns : SIMDModifiedImmVector
"movi", ".2d",
[(set (v2i64 V128:$Rd), (AArch64movi_edit imm0_255:$imm8))]>;
-
-// Use movi.2d to materialize 0.0 if the HW does zero-cycle zeroing.
-// Complexity is added to break a tie with a plain MOVI.
-let AddedComplexity = 1 in {
-def : Pat<(f32 fpimm0),
- (f32 (EXTRACT_SUBREG (v2i64 (MOVIv2d_ns (i32 0))), ssub))>,
- Requires<[HasZCZ]>;
-def : Pat<(f64 fpimm0),
- (f64 (EXTRACT_SUBREG (v2i64 (MOVIv2d_ns (i32 0))), dsub))>,
- Requires<[HasZCZ]>;
-}
-
def : Pat<(v2i64 immAllZerosV), (MOVIv2d_ns (i32 0))>;
def : Pat<(v4i32 immAllZerosV), (MOVIv2d_ns (i32 0))>;
def : Pat<(v8i16 immAllZerosV), (MOVIv2d_ns (i32 0))>;
Modified: llvm/trunk/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll?rev=274686&r1=274685&r2=274686&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll Wed Jul 6 16:39:33 2016
@@ -47,3 +47,29 @@ declare void @bar(double, double, double
declare void @bari(i32, i32)
declare void @barl(i64, i64)
declare void @barf(float, float)
+
+; We used to produce spills+reloads for a Q register with zero cycle zeroing
+; enabled.
+; CHECK-LABEL: foo:
+; CHECK-NOT: str {{q[0-9]+}}
+; CHECK-NOT: ldr {{q[0-9]+}}
+define double @foo(i32 %n) {
+entry:
+ br label %for.body
+
+for.body:
+ %phi0 = phi double [ 1.0, %entry ], [ %v0, %for.body ]
+ %i.076 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+ %conv21 = sitofp i32 %i.076 to double
+ %call = tail call fast double @sin(double %conv21)
+ %cmp.i = fcmp fast olt double %phi0, %call
+ %v0 = select i1 %cmp.i, double %call, double %phi0
+ %inc = add nuw nsw i32 %i.076, 1
+ %cmp = icmp slt i32 %inc, %n
+ br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+ ret double %v0
+}
+
+declare double @sin(double)
Modified: llvm/trunk/test/CodeGen/AArch64/fp-cond-sel.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/fp-cond-sel.ll?rev=274686&r1=274685&r2=274686&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/fp-cond-sel.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/fp-cond-sel.ll Wed Jul 6 16:39:33 2016
@@ -12,8 +12,8 @@ define void @test_csel(i32 %lhs32, i32 %
%tst1 = icmp ugt i32 %lhs32, %rhs32
%val1 = select i1 %tst1, float 0.0, float 1.0
store float %val1, float* @varfloat
-; CHECK: movi v[[FLT0:[0-9]+]].2d, #0
-; CHECK: fmov s[[FLT1:[0-9]+]], #1.0
+; CHECK-DAG: movi v[[FLT0:[0-9]+]].2d, #0
+; CHECK-DAG: fmov s[[FLT1:[0-9]+]], #1.0
; CHECK: fcsel {{s[0-9]+}}, s[[FLT0]], s[[FLT1]], hi
%rhs64 = sext i32 %rhs32 to i64
More information about the llvm-commits
mailing list