[llvm] r217799 - [X86] Fix a bug in X86's peephole optimization.
Akira Hatanaka
ahatanaka at apple.com
Mon Sep 15 11:23:52 PDT 2014
Author: ahatanak
Date: Mon Sep 15 13:23:52 2014
New Revision: 217799
URL: http://llvm.org/viewvc/llvm-project?rev=217799&view=rev
Log:
[X86] Fix a bug in X86's peephole optimization.
Peephole optimization was folding MOVSDrm, which is a zero-extending double
precision floating point load, into ADDPDrr, which is a SIMD add of two packed
double precision floating point values.
(before)
%vreg21<def> = MOVSDrm <fi#0>, 1, %noreg, 0, %noreg; mem:LD8[%7](align=16)(tbaa=<badref>) VR128:%vreg21
%vreg23<def,tied1> = ADDPDrr %vreg20<tied0>, %vreg21; VR128:%vreg23,%vreg20,%vreg21
(after)
%vreg23<def,tied1> = ADDPDrm %vreg20<tied0>, <fi#0>, 1, %noreg, 0, %noreg; mem:LD8[%7](align=16)(tbaa=<badref>) VR128:%vreg23,%vreg20
X86InstrInfo::foldMemoryOperandImpl already had the logic that prevented this
from happening. However the check wasn't being conducted for loads from stack
objects. This commit factors out the logic into a new function and uses it for
checking loads from stack slots are not zero-extending loads.
rdar://problem/18236850
Added:
llvm/trunk/test/CodeGen/X86/peephole-fold-movsd.ll
Modified:
llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=217799&r1=217798&r2=217799&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Mon Sep 15 13:23:52 2014
@@ -4423,6 +4423,25 @@ X86InstrInfo::foldMemoryOperandImpl(Mach
return foldMemoryOperandImpl(MF, MI, Ops[0], MOs, Size, Alignment);
}
+static bool isPartialRegisterLoad(const MachineInstr &LoadMI,
+ const MachineFunction &MF) {
+ unsigned Opc = LoadMI.getOpcode();
+ unsigned RegSize =
+ MF.getRegInfo().getRegClass(LoadMI.getOperand(0).getReg())->getSize();
+
+ if ((Opc == X86::MOVSSrm || Opc == X86::VMOVSSrm) && RegSize > 4)
+ // These instructions only load 32 bits, we can't fold them if the
+ // destination register is wider than 32 bits (4 bytes).
+ return true;
+
+ if ((Opc == X86::MOVSDrm || Opc == X86::VMOVSDrm) && RegSize > 8)
+ // These instructions only load 64 bits, we can't fold them if the
+ // destination register is wider than 64 bits (8 bytes).
+ return true;
+
+ return false;
+}
+
MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
MachineInstr *MI,
const SmallVectorImpl<unsigned> &Ops,
@@ -4430,8 +4449,11 @@ MachineInstr* X86InstrInfo::foldMemoryOp
// If loading from a FrameIndex, fold directly from the FrameIndex.
unsigned NumOps = LoadMI->getDesc().getNumOperands();
int FrameIndex;
- if (isLoadFromStackSlot(LoadMI, FrameIndex))
+ if (isLoadFromStackSlot(LoadMI, FrameIndex)) {
+ if (isPartialRegisterLoad(*LoadMI, MF))
+ return nullptr;
return foldMemoryOperandImpl(MF, MI, Ops, FrameIndex);
+ }
// Check switch flag
if (NoFusing) return nullptr;
@@ -4542,19 +4564,7 @@ MachineInstr* X86InstrInfo::foldMemoryOp
break;
}
default: {
- if ((LoadMI->getOpcode() == X86::MOVSSrm ||
- LoadMI->getOpcode() == X86::VMOVSSrm) &&
- MF.getRegInfo().getRegClass(LoadMI->getOperand(0).getReg())->getSize()
- > 4)
- // These instructions only load 32 bits, we can't fold them if the
- // destination register is wider than 32 bits (4 bytes).
- return nullptr;
- if ((LoadMI->getOpcode() == X86::MOVSDrm ||
- LoadMI->getOpcode() == X86::VMOVSDrm) &&
- MF.getRegInfo().getRegClass(LoadMI->getOperand(0).getReg())->getSize()
- > 8)
- // These instructions only load 64 bits, we can't fold them if the
- // destination register is wider than 64 bits (8 bytes).
+ if (isPartialRegisterLoad(*LoadMI, MF))
return nullptr;
// Folding a normal load. Just copy the load's address operands.
Added: llvm/trunk/test/CodeGen/X86/peephole-fold-movsd.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/peephole-fold-movsd.ll?rev=217799&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/peephole-fold-movsd.ll (added)
+++ llvm/trunk/test/CodeGen/X86/peephole-fold-movsd.ll Mon Sep 15 13:23:52 2014
@@ -0,0 +1,31 @@
+; RUN: llc -march=x86-64 < %s | FileCheck %s
+;
+; Check that x86's peephole optimization doesn't fold a 64-bit load (movsd) into
+; addpd.
+; rdar://problem/18236850
+
+%struct.S1 = type { double, double }
+
+ at g = common global %struct.S1 zeroinitializer, align 8
+
+declare void @foo3(%struct.S1*)
+
+; CHECK: movsd (%rsp), [[R0:%xmm[0-9]+]]
+; CHECK: addpd [[R0]], %xmm{{[0-9]+}}
+
+define void @foo1(double %a.coerce0, double %a.coerce1, double %b.coerce0, double %b.coerce1) {
+ %1 = alloca <2 x double>, align 16
+ %tmpcast = bitcast <2 x double>* %1 to %struct.S1*
+ call void @foo3(%struct.S1* %tmpcast) #2
+ %p2 = getelementptr inbounds %struct.S1* %tmpcast, i64 0, i32 0
+ %2 = load double* %p2, align 16
+ %p3 = getelementptr inbounds %struct.S1* %tmpcast, i64 0, i32 1
+ %3 = load double* %p3, align 8
+ %4 = insertelement <2 x double> undef, double %2, i32 0
+ %5 = insertelement <2 x double> %4, double 0.000000e+00, i32 1
+ %6 = insertelement <2 x double> undef, double %3, i32 1
+ %7 = insertelement <2 x double> %6, double 1.000000e+00, i32 0
+ %8 = fadd <2 x double> %5, %7
+ store <2 x double> %8, <2 x double>* bitcast (%struct.S1* @g to <2 x double>*), align 16
+ ret void
+}
More information about the llvm-commits
mailing list