[llvm-commits] [llvm] r127518 - in /llvm/trunk: lib/Target/ARM/ARMFastISel.cpp test/CodeGen/ARM/fast-isel-pred.ll

Eric Christopher echristo at apple.com
Fri Mar 11 17:09:29 PST 2011


Author: echristo
Date: Fri Mar 11 19:09:29 2011
New Revision: 127518

URL: http://llvm.org/viewvc/llvm-project?rev=127518&view=rev
Log:
Sometimes isPredicable lies to us and tells us we don't need the operands.
Go ahead and add them on when we might want to use them and let
later passes remove them.

Fixes rdar://9118569

Added:
    llvm/trunk/test/CodeGen/ARM/fast-isel-pred.ll
Modified:
    llvm/trunk/lib/Target/ARM/ARMFastISel.cpp

Modified: llvm/trunk/lib/Target/ARM/ARMFastISel.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMFastISel.cpp?rev=127518&r1=127517&r2=127518&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMFastISel.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMFastISel.cpp Fri Mar 11 19:09:29 2011
@@ -123,14 +123,15 @@
                                      const TargetRegisterClass *RC,
                                      unsigned Op0, bool Op0IsKill,
                                      const ConstantFP *FPImm);
-    virtual unsigned FastEmitInst_i(unsigned MachineInstOpcode,
-                                    const TargetRegisterClass *RC,
-                                    uint64_t Imm);
     virtual unsigned FastEmitInst_rri(unsigned MachineInstOpcode,
                                       const TargetRegisterClass *RC,
                                       unsigned Op0, bool Op0IsKill,
                                       unsigned Op1, bool Op1IsKill,
                                       uint64_t Imm);
+    virtual unsigned FastEmitInst_i(unsigned MachineInstOpcode,
+                                    const TargetRegisterClass *RC,
+                                    uint64_t Imm);
+
     virtual unsigned FastEmitInst_extractsubreg(MVT RetVT,
                                                 unsigned Op0, bool Op0IsKill,
                                                 uint32_t Idx);
@@ -193,6 +194,7 @@
 
     // OptionalDef handling routines.
   private:
+    bool isARMNEONPred(const MachineInstr *MI);
     bool DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR);
     const MachineInstrBuilder &AddOptionalDefs(const MachineInstrBuilder &MIB);
     void AddLoadStoreOperands(EVT VT, Address &Addr,
@@ -221,6 +223,21 @@
   return true;
 }
 
+bool ARMFastISel::isARMNEONPred(const MachineInstr *MI) {
+  const TargetInstrDesc &TID = MI->getDesc();
+  
+  // If we're a thumb2 or not NEON function we were handled via isPredicable.
+  if ((TID.TSFlags & ARMII::DomainMask) != ARMII::DomainNEON ||
+       AFI->isThumb2Function())
+    return false;
+  
+  for (unsigned i = 0, e = TID.getNumOperands(); i != e; ++i)
+    if (TID.OpInfo[i].isPredicate())
+      return true;
+  
+  return false;
+}
+
 // If the machine is predicable go ahead and add the predicate operands, if
 // it needs default CC operands add those.
 // TODO: If we want to support thumb1 then we'll need to deal with optional
@@ -230,10 +247,12 @@
 ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) {
   MachineInstr *MI = &*MIB;
 
-  // Do we use a predicate?
-  if (TII.isPredicable(MI))
+  // Do we use a predicate? or...
+  // Are we NEON in ARM mode and have a predicate operand? If so, I know
+  // we're not predicable but add it anyways.
+  if (TII.isPredicable(MI) || isARMNEONPred(MI))
     AddDefaultPred(MIB);
-
+  
   // Do we optionally set a predicate?  Preds is size > 0 iff the predicate
   // defines CPSR. All other OptionalDefines in ARM are the CCR register.
   bool CPSR = false;

Added: llvm/trunk/test/CodeGen/ARM/fast-isel-pred.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/fast-isel-pred.ll?rev=127518&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/fast-isel-pred.ll (added)
+++ llvm/trunk/test/CodeGen/ARM/fast-isel-pred.ll Fri Mar 11 19:09:29 2011
@@ -0,0 +1,60 @@
+; ModuleID = 'test.c'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64-n32"
+target triple = "armv7-apple-darwin"
+
+define i32 @main() nounwind ssp {
+entry:
+  %retval = alloca i32, align 4
+  %X = alloca <4 x i32>, align 16
+  %Y = alloca <4 x float>, align 16
+  store i32 0, i32* %retval
+  %tmp = load <4 x i32>* %X, align 16
+  call void @__aa(<4 x i32> %tmp, i8* null, i32 3, <4 x float>* %Y)
+  %0 = load i32* %retval
+  ret i32 %0
+}
+
+define internal void @__aa(<4 x i32> %v, i8* %p, i32 %offset, <4 x float>* %constants) nounwind inlinehint ssp {
+entry:
+  %__a.addr.i = alloca <4 x i32>, align 16
+  %v.addr = alloca <4 x i32>, align 16
+  %p.addr = alloca i8*, align 4
+  %offset.addr = alloca i32, align 4
+  %constants.addr = alloca <4 x float>*, align 4
+  store <4 x i32> %v, <4 x i32>* %v.addr, align 16
+  store i8* %p, i8** %p.addr, align 4
+  store i32 %offset, i32* %offset.addr, align 4
+  store <4 x float>* %constants, <4 x float>** %constants.addr, align 4
+  %tmp = load <4 x i32>* %v.addr, align 16
+  store <4 x i32> %tmp, <4 x i32>* %__a.addr.i, align 16
+  %tmp.i = load <4 x i32>* %__a.addr.i, align 16
+  %0 = bitcast <4 x i32> %tmp.i to <16 x i8>
+  %1 = bitcast <16 x i8> %0 to <4 x i32>
+  %vcvt.i = sitofp <4 x i32> %1 to <4 x float>
+  %tmp1 = load i8** %p.addr, align 4
+  %tmp2 = load i32* %offset.addr, align 4
+  %tmp3 = load <4 x float>** %constants.addr, align 4
+  call void @__bb(<4 x float> %vcvt.i, i8* %tmp1, i32 %tmp2, <4 x float>* %tmp3)
+  ret void
+}
+
+define internal void @__bb(<4 x float> %v, i8* %p, i32 %offset, <4 x float>* %constants) nounwind inlinehint ssp {
+entry:
+  %v.addr = alloca <4 x float>, align 16
+  %p.addr = alloca i8*, align 4
+  %offset.addr = alloca i32, align 4
+  %constants.addr = alloca <4 x float>*, align 4
+  %data = alloca i64, align 4
+  store <4 x float> %v, <4 x float>* %v.addr, align 16
+  store i8* %p, i8** %p.addr, align 4
+  store i32 %offset, i32* %offset.addr, align 4
+  store <4 x float>* %constants, <4 x float>** %constants.addr, align 4
+  %tmp = load i64* %data, align 4
+  %tmp1 = load i8** %p.addr, align 4
+  %tmp2 = load i32* %offset.addr, align 4
+  %add.ptr = getelementptr i8* %tmp1, i32 %tmp2
+  %0 = bitcast i8* %add.ptr to i64*
+  %arrayidx = getelementptr inbounds i64* %0, i32 0
+  store i64 %tmp, i64* %arrayidx
+  ret void
+}





More information about the llvm-commits mailing list