[llvm-commits] [llvm] r48746 - in /llvm/trunk: lib/Target/X86/X86ISelLowering.cpp lib/Target/X86/X86InstrSSE.td test/CodeGen/X86/vec_extract-sse4.ll

Evan Cheng evan.cheng at apple.com
Mon Mar 24 14:52:23 PDT 2008


Author: evancheng
Date: Mon Mar 24 16:52:23 2008
New Revision: 48746

URL: http://llvm.org/viewvc/llvm-project?rev=48746&view=rev
Log:
- SSE4.1 extractfps extracts a f32 into a gr32 register. Very useful! Not. Fix the instruction specification and teaches lowering code to use it only when the only use is a store instruction.

Added:
    llvm/trunk/test/CodeGen/X86/vec_extract-sse4.ll
Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/lib/Target/X86/X86InstrSSE.td

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=48746&r1=48745&r2=48746&view=diff

==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Mon Mar 24 16:52:23 2008
@@ -699,7 +699,7 @@
     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Custom);
     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Custom);
     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Legal);
-    setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Legal);
+    setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
 
     if (Subtarget->is64Bit()) {
       setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v2i64, Legal);
@@ -3718,6 +3718,19 @@
     SDOperand Assert  = DAG.getNode(ISD::AssertZext, MVT::i32, Extract,
                                     DAG.getValueType(VT));
     return DAG.getNode(ISD::TRUNCATE, VT, Assert);
+  } else if (VT == MVT::f32) {
+    // EXTRACTPS outputs to a GPR32 register which will require a movd to copy
+    // the result back to FR32 register. It's only worth matching if the
+    // result has a single use which is a store.
+    if (!Op.hasOneUse())
+      return SDOperand();
+    SDNode *User = *Op.Val->use_begin();
+    if (User->getOpcode() != ISD::STORE)
+      return SDOperand();
+    SDOperand Extract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32,
+                    DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, Op.getOperand(0)),
+                                    Op.getOperand(1));
+    return DAG.getNode(ISD::BIT_CONVERT, MVT::f32, Extract);
   }
   return SDOperand();
 }
@@ -3728,8 +3741,11 @@
   if (!isa<ConstantSDNode>(Op.getOperand(1)))
     return SDOperand();
 
-  if (Subtarget->hasSSE41())
-    return LowerEXTRACT_VECTOR_ELT_SSE4(Op, DAG);
+  if (Subtarget->hasSSE41()) {
+    SDOperand Res = LowerEXTRACT_VECTOR_ELT_SSE4(Op, DAG);
+    if (Res.Val)
+      return Res;
+  }
 
   MVT::ValueType VT = Op.getValueType();
   // TODO: handle v16i8.

Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=48746&r1=48745&r2=48746&view=diff

==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Mon Mar 24 16:52:23 2008
@@ -3380,19 +3380,22 @@
 defm PEXTRD      : SS41I_extract32<0x16, "pextrd">;
 
 
-/// SS41I_extractf32 - SSE 4.1 extract 32 bits to fp reg or memory destination
+/// SS41I_extractf32 - SSE 4.1 extract 32 bits fp value to int reg or memory
+/// destination
 multiclass SS41I_extractf32<bits<8> opc, string OpcodeStr> {
-  def rr : SS4AIi8<opc, MRMSrcReg, (outs FR32:$dst),
+  // Not worth matching to rr form of extractps since the result is in GPR32.
+  def rr : SS4AIi8<opc, MRMSrcReg, (outs GR32:$dst),
                  (ins VR128:$src1, i32i8imm:$src2),
                  !strconcat(OpcodeStr, 
                   "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-                 [(set FR32:$dst,
-                  (extractelt (v4f32 VR128:$src1), imm:$src2))]>, OpSize;
+                 [/*(set GR32:$dst,
+                  (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2))*/]>,
+           OpSize;
   def mr : SS4AIi8<opc, MRMDestMem, (outs), 
                  (ins f32mem:$dst, VR128:$src1, i32i8imm:$src2),
                  !strconcat(OpcodeStr, 
                   "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-                 [(store (extractelt (v4f32 VR128:$src1), imm:$src2),
+                 [(store (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2),
                           addr:$dst)]>, OpSize;
 }
 

Added: llvm/trunk/test/CodeGen/X86/vec_extract-sse4.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_extract-sse4.ll?rev=48746&view=auto

==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_extract-sse4.ll (added)
+++ llvm/trunk/test/CodeGen/X86/vec_extract-sse4.ll Mon Mar 24 16:52:23 2008
@@ -0,0 +1,30 @@
+; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse41 -o %t -f
+; RUN: grep extractps %t | count 1
+; RUN: grep pextrd    %t | count 2
+; RUN: grep pshufd    %t | count 1
+
+define void @t1(float* %R, <4 x float>* %P1) {
+	%X = load <4 x float>* %P1
+	%tmp = extractelement <4 x float> %X, i32 3
+	store float %tmp, float* %R
+	ret void
+}
+
+define float @t2(<4 x float>* %P1) {
+	%X = load <4 x float>* %P1
+	%tmp = extractelement <4 x float> %X, i32 2
+	ret float %tmp
+}
+
+define void @t3(i32* %R, <4 x i32>* %P1) {
+	%X = load <4 x i32>* %P1
+	%tmp = extractelement <4 x i32> %X, i32 3
+	store i32 %tmp, i32* %R
+	ret void
+}
+
+define i32 @t4(<4 x i32>* %P1) {
+	%X = load <4 x i32>* %P1
+	%tmp = extractelement <4 x i32> %X, i32 3
+	ret i32 %tmp
+}





More information about the llvm-commits mailing list