[llvm-commits] [llvm] r162898 - in /llvm/trunk: lib/Target/ARM/ARMBaseInstrInfo.cpp test/CodeGen/ARM/domain-conv-vmovs.ll
Tim Northover
Tim.Northover at arm.com
Thu Aug 30 03:17:45 PDT 2012
Author: tnorthover
Date: Thu Aug 30 05:17:45 2012
New Revision: 162898
URL: http://llvm.org/viewvc/llvm-project?rev=162898&view=rev
Log:
Add support for moving pure S-register to NEON pipeline if desired
Added:
llvm/trunk/test/CodeGen/ARM/domain-conv-vmovs.ll (with props)
Modified:
llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.cpp
Modified: llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.cpp?rev=162898&r1=162897&r2=162898&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.cpp Thu Aug 30 05:17:45 2012
@@ -3377,7 +3377,8 @@
// converted.
if (Subtarget.isCortexA9() && !isPredicated(MI) &&
(MI->getOpcode() == ARM::VMOVRS ||
- MI->getOpcode() == ARM::VMOVSR))
+ MI->getOpcode() == ARM::VMOVSR ||
+ MI->getOpcode() == ARM::VMOVS))
return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON));
// No other instructions can be swizzled, so just determine their domain.
@@ -3490,10 +3491,78 @@
.addReg(DReg, RegState::Undef)
.addReg(SrcReg)
.addImm(Lane));
-
+
// The destination must be marked as set.
MIB.addReg(DstReg, RegState::Define | RegState::Implicit);
break;
+ case ARM::VMOVS: {
+ if (Domain != ExeNEON)
+ break;
+
+ // Source instruction is %SDst = VMOVS %SSrc, 14, %noreg (; implicits)
+ DstReg = MI->getOperand(0).getReg();
+ SrcReg = MI->getOperand(1).getReg();
+
+ for (unsigned i = MI->getDesc().getNumOperands(); i; --i)
+ MI->RemoveOperand(i-1);
+
+ unsigned DstLane = 0, SrcLane = 0, DDst, DSrc;
+ DDst = getCorrespondingDRegAndLane(TRI, DstReg, DstLane);
+ DSrc = getCorrespondingDRegAndLane(TRI, SrcReg, SrcLane);
+
+ if (DSrc == DDst) {
+ // Destination can be:
+ // %DDst = VDUPLN32d %DDst, Lane, 14, %noreg (; implicits)
+ MI->setDesc(get(ARM::VDUPLN32d));
+ AddDefaultPred(MIB.addReg(DDst, RegState::Define)
+ .addReg(DDst, RegState::Undef)
+ .addImm(SrcLane));
+
+ // Neither the source or the destination are naturally represented any
+ // more, so add them in manually.
+ MIB.addReg(DstReg, RegState::Implicit | RegState::Define);
+ MIB.addReg(SrcReg, RegState::Implicit);
+ break;
+ }
+
+ // In general there's no single instruction that can perform an S <-> S
+ // move in NEON space, but a pair of VEXT instructions *can* do the
+ // job. It turns out that the VEXTs needed will only use DSrc once, with
+ // the position based purely on the combination of lane-0 and lane-1
+ // involved. For example
+ // vmov s0, s2 -> vext.32 d0, d0, d1, #1 vext.32 d0, d0, d0, #1
+ // vmov s1, s3 -> vext.32 d0, d1, d0, #1 vext.32 d0, d0, d0, #1
+ // vmov s0, s3 -> vext.32 d0, d0, d0, #1 vext.32 d0, d1, d0, #1
+ // vmov s1, s2 -> vext.32 d0, d0, d0, #1 vext.32 d0, d0, d1, #1
+ //
+ // Pattern of the MachineInstrs is:
+ // %DDst = VEXTd32 %DSrc1, %DSrc2, Lane, 14, %noreg (;implicits)
+ MachineInstrBuilder NewMIB;
+ NewMIB = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
+ get(ARM::VEXTd32), DDst);
+ NewMIB.addReg(SrcLane == 1 && DstLane == 1 ? DSrc : DDst, RegState::Undef);
+ NewMIB.addReg(SrcLane == 0 && DstLane == 0 ? DSrc : DDst, RegState::Undef);
+ NewMIB.addImm(1);
+ AddDefaultPred(NewMIB);
+
+ if (SrcLane == DstLane)
+ NewMIB.addReg(SrcReg, RegState::Implicit);
+
+ MI->setDesc(get(ARM::VEXTd32));
+ MIB.addReg(DDst, RegState::Define);
+ MIB.addReg(SrcLane == 1 && DstLane == 0 ? DSrc : DDst, RegState::Undef);
+ MIB.addReg(SrcLane == 0 && DstLane == 1 ? DSrc : DDst, RegState::Undef);
+ MIB.addImm(1);
+ AddDefaultPred(MIB);
+
+ if (SrcLane != DstLane)
+ MIB.addReg(SrcReg, RegState::Implicit);
+
+ // As before, the original destination is no longer represented, add it
+ // implicitly.
+ MIB.addReg(DstReg, RegState::Define | RegState::Implicit);
+ break;
+ }
}
}
Added: llvm/trunk/test/CodeGen/ARM/domain-conv-vmovs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/domain-conv-vmovs.ll?rev=162898&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/domain-conv-vmovs.ll (added)
+++ llvm/trunk/test/CodeGen/ARM/domain-conv-vmovs.ll Thu Aug 30 05:17:45 2012
@@ -0,0 +1,64 @@
+; RUN: llc -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a9 -mattr=+neon,+neonfp -float-abi=hard < %s | FileCheck %s
+
+define <2 x float> @test_vmovs_via_vext_lane0to0(float %arg, <2 x float> %in) {
+; CHECK: test_vmovs_via_vext_lane0to0:
+ %vec = insertelement <2 x float> %in, float %arg, i32 0
+ %res = fadd <2 x float> %vec, %vec
+
+; CHECK: vext.32 d1, d1, d0, #1
+; CHECK: vext.32 d1, d1, d1, #1
+; CHECK: vadd.f32 {{d[0-9]+}}, d1, d1
+
+ ret <2 x float> %res
+}
+
+define <2 x float> @test_vmovs_via_vext_lane0to1(float %arg, <2 x float> %in) {
+; CHECK: test_vmovs_via_vext_lane0to1:
+ %vec = insertelement <2 x float> %in, float %arg, i32 1
+ %res = fadd <2 x float> %vec, %vec
+
+; CHECK: vext.32 d1, d1, d1, #1
+; CHECK: vext.32 d1, d1, d0, #1
+; CHECK: vadd.f32 {{d[0-9]+}}, d1, d1
+
+ ret <2 x float> %res
+}
+
+define <2 x float> @test_vmovs_via_vext_lane1to0(float, float %arg, <2 x float> %in) {
+; CHECK: test_vmovs_via_vext_lane1to0:
+ %vec = insertelement <2 x float> %in, float %arg, i32 0
+ %res = fadd <2 x float> %vec, %vec
+
+; CHECK: vext.32 d1, d1, d1, #1
+; CHECK: vext.32 d1, d0, d1, #1
+; CHECK: vadd.f32 {{d[0-9]+}}, d1, d1
+
+ ret <2 x float> %res
+}
+
+define <2 x float> @test_vmovs_via_vext_lane1to1(float, float %arg, <2 x float> %in) {
+; CHECK: test_vmovs_via_vext_lane1to1:
+ %vec = insertelement <2 x float> %in, float %arg, i32 1
+ %res = fadd <2 x float> %vec, %vec
+
+; CHECK: vext.32 d1, d0, d1, #1
+; CHECK: vext.32 d1, d1, d1, #1
+; CHECK: vadd.f32 {{d[0-9]+}}, d1, d1
+
+ ret <2 x float> %res
+}
+
+
+define float @test_vmovs_via_vdup(float, float %ret, float %lhs, float %rhs) {
+; CHECK: test_vmovs_via_vdup:
+
+ ; Do an operation (which will end up NEON because of +neonfp) to convince the
+ ; execution-domain pass that NEON is a good thing to use.
+ %res = fadd float %ret, %ret
+ ; It makes sense for LLVM to do the addition in d0 here, because it's going
+ ; to be returned. This means it will want a "vmov s0, s1":
+; CHECK: vdup.32 d0, d0[1]
+
+ ret float %res
+}
+
Propchange: llvm/trunk/test/CodeGen/ARM/domain-conv-vmovs.ll
------------------------------------------------------------------------------
svn:eol-style = native
More information about the llvm-commits
mailing list