[llvm] r276987 - [X86] Remove CustomInserter for FMA3 instructions. Looks like since we got full commuting support for FMAs after this was added, the coalescer can now get this right on its own.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 28 08:28:57 PDT 2016
Author: ctopper
Date: Thu Jul 28 10:28:56 2016
New Revision: 276987
URL: http://llvm.org/viewvc/llvm-project?rev=276987&view=rev
Log:
[X86] Remove CustomInserter for FMA3 instructions. Looks like since we got full commuting support for FMAs after this was added, the coalescer can now get this right on its own.
Differential Revision: https://reviews.llvm.org/D22799
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/lib/Target/X86/X86InstrFMA.td
llvm/trunk/test/CodeGen/X86/fma-do-not-commute.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=276987&r1=276986&r2=276987&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Thu Jul 28 10:28:56 2016
@@ -24236,164 +24236,6 @@ X86TargetLowering::EmitSjLjDispatchBlock
return BB;
}
-// Replace 213-type (isel default) FMA3 instructions with 231-type for
-// accumulator loops. Writing back to the accumulator allows the coalescer
-// to remove extra copies in the loop.
-// FIXME: Do this on AVX512. We don't support 231 variants yet (PR23937).
-MachineBasicBlock *
-X86TargetLowering::emitFMA3Instr(MachineInstr &MI,
- MachineBasicBlock *MBB) const {
- MachineOperand &AddendOp = MI.getOperand(3);
-
- // Bail out early if the addend isn't a register - we can't switch these.
- if (!AddendOp.isReg())
- return MBB;
-
- MachineFunction &MF = *MBB->getParent();
- MachineRegisterInfo &MRI = MF.getRegInfo();
-
- // Check whether the addend is defined by a PHI:
- assert(MRI.hasOneDef(AddendOp.getReg()) && "Multiple defs in SSA?");
- MachineInstr &AddendDef = *MRI.def_instr_begin(AddendOp.getReg());
- if (!AddendDef.isPHI())
- return MBB;
-
- // Look for the following pattern:
- // loop:
- // %addend = phi [%entry, 0], [%loop, %result]
- // ...
- // %result<tied1> = FMA213 %m2<tied0>, %m1, %addend
-
- // Replace with:
- // loop:
- // %addend = phi [%entry, 0], [%loop, %result]
- // ...
- // %result<tied1> = FMA231 %addend<tied0>, %m1, %m2
-
- for (unsigned i = 1, e = AddendDef.getNumOperands(); i < e; i += 2) {
- assert(AddendDef.getOperand(i).isReg());
- MachineOperand PHISrcOp = AddendDef.getOperand(i);
- MachineInstr &PHISrcInst = *MRI.def_instr_begin(PHISrcOp.getReg());
- if (&PHISrcInst == &MI) {
- // Found a matching instruction.
- unsigned NewFMAOpc = 0;
- switch (MI.getOpcode()) {
- case X86::VFMADD213PDr:
- NewFMAOpc = X86::VFMADD231PDr;
- break;
- case X86::VFMADD213PSr:
- NewFMAOpc = X86::VFMADD231PSr;
- break;
- case X86::VFMADD213SDr:
- NewFMAOpc = X86::VFMADD231SDr;
- break;
- case X86::VFMADD213SSr:
- NewFMAOpc = X86::VFMADD231SSr;
- break;
- case X86::VFMSUB213PDr:
- NewFMAOpc = X86::VFMSUB231PDr;
- break;
- case X86::VFMSUB213PSr:
- NewFMAOpc = X86::VFMSUB231PSr;
- break;
- case X86::VFMSUB213SDr:
- NewFMAOpc = X86::VFMSUB231SDr;
- break;
- case X86::VFMSUB213SSr:
- NewFMAOpc = X86::VFMSUB231SSr;
- break;
- case X86::VFNMADD213PDr:
- NewFMAOpc = X86::VFNMADD231PDr;
- break;
- case X86::VFNMADD213PSr:
- NewFMAOpc = X86::VFNMADD231PSr;
- break;
- case X86::VFNMADD213SDr:
- NewFMAOpc = X86::VFNMADD231SDr;
- break;
- case X86::VFNMADD213SSr:
- NewFMAOpc = X86::VFNMADD231SSr;
- break;
- case X86::VFNMSUB213PDr:
- NewFMAOpc = X86::VFNMSUB231PDr;
- break;
- case X86::VFNMSUB213PSr:
- NewFMAOpc = X86::VFNMSUB231PSr;
- break;
- case X86::VFNMSUB213SDr:
- NewFMAOpc = X86::VFNMSUB231SDr;
- break;
- case X86::VFNMSUB213SSr:
- NewFMAOpc = X86::VFNMSUB231SSr;
- break;
- case X86::VFMADDSUB213PDr:
- NewFMAOpc = X86::VFMADDSUB231PDr;
- break;
- case X86::VFMADDSUB213PSr:
- NewFMAOpc = X86::VFMADDSUB231PSr;
- break;
- case X86::VFMSUBADD213PDr:
- NewFMAOpc = X86::VFMSUBADD231PDr;
- break;
- case X86::VFMSUBADD213PSr:
- NewFMAOpc = X86::VFMSUBADD231PSr;
- break;
-
- case X86::VFMADD213PDYr:
- NewFMAOpc = X86::VFMADD231PDYr;
- break;
- case X86::VFMADD213PSYr:
- NewFMAOpc = X86::VFMADD231PSYr;
- break;
- case X86::VFMSUB213PDYr:
- NewFMAOpc = X86::VFMSUB231PDYr;
- break;
- case X86::VFMSUB213PSYr:
- NewFMAOpc = X86::VFMSUB231PSYr;
- break;
- case X86::VFNMADD213PDYr:
- NewFMAOpc = X86::VFNMADD231PDYr;
- break;
- case X86::VFNMADD213PSYr:
- NewFMAOpc = X86::VFNMADD231PSYr;
- break;
- case X86::VFNMSUB213PDYr:
- NewFMAOpc = X86::VFNMSUB231PDYr;
- break;
- case X86::VFNMSUB213PSYr:
- NewFMAOpc = X86::VFNMSUB231PSYr;
- break;
- case X86::VFMADDSUB213PDYr:
- NewFMAOpc = X86::VFMADDSUB231PDYr;
- break;
- case X86::VFMADDSUB213PSYr:
- NewFMAOpc = X86::VFMADDSUB231PSYr;
- break;
- case X86::VFMSUBADD213PDYr:
- NewFMAOpc = X86::VFMSUBADD231PDYr;
- break;
- case X86::VFMSUBADD213PSYr:
- NewFMAOpc = X86::VFMSUBADD231PSYr;
- break;
- default:
- llvm_unreachable("Unrecognized FMA variant.");
- }
-
- const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
- MachineInstrBuilder MIB =
- BuildMI(MF, MI.getDebugLoc(), TII.get(NewFMAOpc))
- .addOperand(MI.getOperand(0))
- .addOperand(MI.getOperand(3))
- .addOperand(MI.getOperand(2))
- .addOperand(MI.getOperand(1));
- MBB->insert(MachineBasicBlock::iterator(MI), MIB);
- MI.eraseFromParent();
- }
- }
-
- return MBB;
-}
-
MachineBasicBlock *
X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MachineBasicBlock *BB) const {
@@ -24616,39 +24458,6 @@ X86TargetLowering::EmitInstrWithCustomIn
case TargetOpcode::PATCHPOINT:
return emitPatchPoint(MI, BB);
- case X86::VFMADD213PDr:
- case X86::VFMADD213PSr:
- case X86::VFMADD213SDr:
- case X86::VFMADD213SSr:
- case X86::VFMSUB213PDr:
- case X86::VFMSUB213PSr:
- case X86::VFMSUB213SDr:
- case X86::VFMSUB213SSr:
- case X86::VFNMADD213PDr:
- case X86::VFNMADD213PSr:
- case X86::VFNMADD213SDr:
- case X86::VFNMADD213SSr:
- case X86::VFNMSUB213PDr:
- case X86::VFNMSUB213PSr:
- case X86::VFNMSUB213SDr:
- case X86::VFNMSUB213SSr:
- case X86::VFMADDSUB213PDr:
- case X86::VFMADDSUB213PSr:
- case X86::VFMSUBADD213PDr:
- case X86::VFMSUBADD213PSr:
- case X86::VFMADD213PDYr:
- case X86::VFMADD213PSYr:
- case X86::VFMSUB213PDYr:
- case X86::VFMSUB213PSYr:
- case X86::VFNMADD213PDYr:
- case X86::VFNMADD213PSYr:
- case X86::VFNMSUB213PDYr:
- case X86::VFNMSUB213PSYr:
- case X86::VFMADDSUB213PDYr:
- case X86::VFMADDSUB213PSYr:
- case X86::VFMSUBADD213PDYr:
- case X86::VFMSUBADD213PSYr:
- return emitFMA3Instr(MI, BB);
case X86::LCMPXCHG8B_SAVE_EBX:
case X86::LCMPXCHG16B_SAVE_RBX: {
unsigned BasePtr =
Modified: llvm/trunk/lib/Target/X86/X86InstrFMA.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrFMA.td?rev=276987&r1=276986&r2=276987&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrFMA.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrFMA.td Thu Jul 28 10:28:56 2016
@@ -39,7 +39,6 @@ multiclass fma3p_rm<bits<8> opc, string
PatFrag MemFrag128, PatFrag MemFrag256,
ValueType OpVT128, ValueType OpVT256,
SDPatternOperator Op = null_frag> {
- let usesCustomInserter = 1 in
def r : FMA3<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, VR128:$src3),
!strconcat(OpcodeStr,
@@ -55,7 +54,6 @@ multiclass fma3p_rm<bits<8> opc, string
[(set VR128:$dst, (OpVT128 (Op VR128:$src2, VR128:$src1,
(MemFrag128 addr:$src3))))]>;
- let usesCustomInserter = 1 in
def Yr : FMA3<opc, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2, VR256:$src3),
!strconcat(OpcodeStr,
@@ -144,7 +142,6 @@ let Constraints = "$src1 = $dst", isComm
multiclass fma3s_rm<bits<8> opc, string OpcodeStr,
X86MemOperand x86memop, RegisterClass RC,
SDPatternOperator OpNode = null_frag> {
- let usesCustomInserter = 1 in
def r : FMA3<opc, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2, RC:$src3),
!strconcat(OpcodeStr,
Modified: llvm/trunk/test/CodeGen/X86/fma-do-not-commute.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fma-do-not-commute.ll?rev=276987&r1=276986&r2=276987&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fma-do-not-commute.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fma-do-not-commute.ll Thu Jul 28 10:28:56 2016
@@ -9,9 +9,9 @@ target triple = "x86_64-apple-macosx"
; CHECK-NOT: {{.*}}, %xmm0
; %addr lives in rdi.
; %addr2 lives in rsi.
-; CHECK: vmovss (%rsi), [[ADDR2:%xmm[0-9]+]]
+; CHECK: vmovss (%rdi), [[ADDR:%xmm[0-9]+]]
; The assembly syntax is in the reverse order.
-; CHECK: vfmadd231ss (%rdi), [[ADDR2]], %xmm0
+; CHECK: vfmadd231ss (%rsi), [[ADDR]], %xmm0
define void @test1(float* %addr, float* %addr2, float %arg) {
entry:
br label %loop
More information about the llvm-commits
mailing list