[llvm-commits] [llvm] r171148 - in /llvm/trunk: lib/Target/X86/X86ISelLowering.cpp test/CodeGen/X86/v8i1-masks.ll

Wed Jan 2 04:01:13 PST 2013

Hi Nadav,

After this patch the generated code is wrong.

define <8 x i32> @test(<8 x float> %a, <8 x float> %b) {
  %c1 = fadd <8 x float> %a, %b
  %b1 = fmul <8 x float> %b, %a
  %d  = fsub <8 x float> %b1, %c1
  %res1 = fcmp olt <8 x float> %a, %b1
  %res2 = fcmp olt <8 x float> %c1, %d
  %andr = and <8 x i1>%res1, %res2
  %ex = zext <8 x i1> %andr to <8 x i32>
  ret <8 x i32>%ex
}

W:\LLVM_org\build64\bin\Debug>w:\AVX3\build64\bin\Debug\llc.exe < x.ll -mtriple=x86_64-apple-darwin
        .section        __TEXT,__text,regular,pure_instructions
        .section        __TEXT,__const
        .align  5
LCPI0_0:
        .long   1                       ## 0x1
        .long   1                       ## 0x1
        .long   1                       ## 0x1
        .long   1                       ## 0x1
        .long   1                       ## 0x1
        .long   1                       ## 0x1
        .long   1                       ## 0x1
        .long   1                       ## 0x1
        .section        __TEXT,__text,regular,pure_instructions
        .globl  _test
        .align  4, 0x90
_test:                                  ## @test
        .cfi_startproc
## BB#0:
        vmulps  %ymm0, %ymm1, %ymm1
        vcmpltps        %ymm1, %ymm0, %ymm0
        vandps  LCPI0_0(%rip), %ymm0, %ymm0
        ret
        .cfi_endproc

- Elena

-----Original Message-----
From: llvm-commits-bounces at cs.uiuc.edu [mailto:llvm-commits-bounces at cs.uiuc.edu] On Behalf Of Nadav Rotem
Sent: Thursday, December 27, 2012 10:16
To: llvm-commits at cs.uiuc.edu
Subject: [llvm-commits] [llvm] r171148 - in /llvm/trunk: lib/Target/X86/X86ISelLowering.cpp test/CodeGen/X86/v8i1-masks.ll

Author: nadav
Date: Thu Dec 27 02:15:45 2012
New Revision: 171148

URL: http://llvm.org/viewvc/llvm-project?rev=171148&view=rev
Log:
On AVX/AVX2 the type v8i1 is legalized to v8i16, which is an XMM sized register. In most cases we actually compare or select YMM-sized registers and mixing the two types creates horrible code. This commit optimizes some of the transition sequences.

PR14657.

Added:
    llvm/trunk/test/CodeGen/X86/v8i1-masks.ll
Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=171148&r1=171147&r2=171148&view=diff
==============================================================================

--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Thu Dec 27 02:15:45 
+++ 2012
@@ -15731,9 +15731,92 @@
   return false;
 }
 
+// On AVX/AVX2 the type v8i1 is legalized to v8i16, which is an XMM 
+sized // register. In most cases we actually compare or select 
+YMM-sized registers // and mixing the two types creates horrible code. 
+This method optimizes // some of the transition sequences.
+static SDValue WidenMaskArithmetic(SDNode *N, SelectionDAG &DAG,
+                                 TargetLowering::DAGCombinerInfo &DCI,
+                                 const X86Subtarget *Subtarget) {
+  EVT VT = N->getValueType(0);
+  if (VT.getSizeInBits() != 256)
+    return SDValue();
+
+  assert((N->getOpcode() == ISD::ANY_EXTEND ||
+          N->getOpcode() == ISD::ZERO_EXTEND ||
+          N->getOpcode() == ISD::SIGN_EXTEND) && "Invalid Node");
+
+  SDValue Narrow = N->getOperand(0);
+  EVT NarrowVT = Narrow->getValueType(0);  if (NarrowVT.getSizeInBits() 
+ != 128)
+    return SDValue();
+
+  if (Narrow->getOpcode() != ISD::XOR &&
+      Narrow->getOpcode() != ISD::AND &&
+      Narrow->getOpcode() != ISD::OR)
+    return SDValue();
+
+  SDValue N0  = Narrow->getOperand(0);
+  SDValue N1  = Narrow->getOperand(1);
+  DebugLoc DL = Narrow->getDebugLoc();
+
+  // The Left side has to be a trunc.
+  if (N0.getOpcode() != ISD::TRUNCATE)
+    return SDValue();
+
+  // The type of the truncated inputs.
+  EVT WideVT = N0->getOperand(0)->getValueType(0);
+  if (WideVT != VT)
+    return SDValue();
+
+  // The right side has to be a 'trunc' or a constant vector.
+  bool RHSTrunc = N1.getOpcode() == ISD::TRUNCATE;  bool RHSConst = 
+ (isSplatVector(N1.getNode()) &&
+                   isa<ConstantSDNode>(N1->getOperand(0)));
+  if (!RHSTrunc && !RHSConst)
+    return SDValue();
+
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+  if (!TLI.isOperationLegalOrPromote(Narrow->getOpcode(), WideVT))
+    return SDValue();
+
+  // Set N0 and N1 to hold the inputs to the new wide operation.
+  N0 = N0->getOperand(0);
+  if (RHSConst) {
+    N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT.getScalarType(),
+                     N1->getOperand(0));
+    SmallVector<SDValue, 8> C(WideVT.getVectorNumElements(), N1);
+    N1 = DAG.getNode(ISD::BUILD_VECTOR, DL, WideVT, &C[0], C.size());  
+ } else if (RHSTrunc) {
+    N1 = N1->getOperand(0);
+  }
+
+  // Generate the wide operation.
+  SDValue Op = DAG.getNode(N->getOpcode(), DL, WideVT, N0, N1);
+  unsigned Opcode = N->getOpcode();
+  switch (Opcode) {
+  case ISD::ANY_EXTEND:
+    return Op;
+  case ISD::ZERO_EXTEND: {
+    unsigned InBits = NarrowVT.getScalarType().getSizeInBits();
+    APInt Mask = APInt::getAllOnesValue(InBits);
+    Mask = Mask.zext(VT.getScalarType().getSizeInBits());
+    return DAG.getNode(ISD::AND, DL, VT,
+                       Op, DAG.getConstant(Mask, VT));
+  }
+  case ISD::SIGN_EXTEND:
+    return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT,
+                       Op, DAG.getValueType(NarrowVT));
+  default:
+    llvm_unreachable("Unexpected opcode");
+  }
+}
+
 static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG,
                                  TargetLowering::DAGCombinerInfo &DCI,
                                  const X86Subtarget *Subtarget) {
+  EVT VT = N->getValueType(0);
   if (DCI.isBeforeLegalizeOps())
     return SDValue();
 
@@ -15741,8 +15824,6 @@
   if (R.getNode())
     return R;
 
-  EVT VT = N->getValueType(0);
-
   // Create BLSI, and BLSR instructions
   // BLSI is X & (-X)
   // BLSR is X & (X-1)
@@ -15803,6 +15884,7 @@
 static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
                                 TargetLowering::DAGCombinerInfo &DCI,
                                 const X86Subtarget *Subtarget) {
+  EVT VT = N->getValueType(0);
   if (DCI.isBeforeLegalizeOps())
     return SDValue();
 
@@ -15810,8 +15892,6 @@
   if (R.getNode())
     return R;
 
-  EVT VT = N->getValueType(0);
-
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
 
@@ -15991,6 +16071,7 @@
 static SDValue PerformXorCombine(SDNode *N, SelectionDAG &DAG,
                                  TargetLowering::DAGCombinerInfo &DCI,
                                  const X86Subtarget *Subtarget) {
+  EVT VT = N->getValueType(0);
   if (DCI.isBeforeLegalizeOps())
     return SDValue();
 
@@ -16004,8 +16085,6 @@
   if (!Subtarget->hasBMI())
     return SDValue();
 
-  EVT VT = N->getValueType(0);
-
   if (VT != MVT::i32 && VT != MVT::i64)
     return SDValue();
 
@@ -16671,6 +16750,12 @@
   EVT OpVT = Op.getValueType();
   DebugLoc dl = N->getDebugLoc();
 
+  if (VT.isVector() && VT.getSizeInBits() == 256) {
+    SDValue R = WidenMaskArithmetic(N, DAG, DCI, Subtarget);
+    if (R.getNode())
+      return R;
+  }
+
   if ((VT == MVT::v4i64 && OpVT == MVT::v4i32) ||
       (VT == MVT::v8i32 && OpVT == MVT::v8i16)) {
 
@@ -16768,15 +16853,21 @@
       N0.hasOneUse() &&
       N0.getOperand(0).hasOneUse()) {
     SDValue N00 = N0.getOperand(0);
-    if (N00.getOpcode() != X86ISD::SETCC_CARRY)
-      return SDValue();
-    ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
-    if (!C || C->getZExtValue() != 1)
-      return SDValue();
-    return DAG.getNode(ISD::AND, dl, VT,
-                       DAG.getNode(X86ISD::SETCC_CARRY, dl, VT,
-                                   N00.getOperand(0), N00.getOperand(1)),
-                       DAG.getConstant(1, VT));
+    if (N00.getOpcode() == X86ISD::SETCC_CARRY) {
+      ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+      if (!C || C->getZExtValue() != 1)
+        return SDValue();
+      return DAG.getNode(ISD::AND, dl, VT,
+                         DAG.getNode(X86ISD::SETCC_CARRY, dl, VT,
+                                     N00.getOperand(0), N00.getOperand(1)),
+                         DAG.getConstant(1, VT));
+    }
+  }
+
+  if (VT.isVector() && VT.getSizeInBits() == 256) {
+    SDValue R = WidenMaskArithmetic(N, DAG, DCI, Subtarget);
+    if (R.getNode())
+      return R;
   }
 
   // Optimize vectors in AVX mode:

Added: llvm/trunk/test/CodeGen/X86/v8i1-masks.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/v8i1-masks.ll?rev=171148&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/v8i1-masks.ll (added)
+++ llvm/trunk/test/CodeGen/X86/v8i1-masks.ll Thu Dec 27 02:15:45 2012
@@ -0,0 +1,38 @@
+; RUN: llc -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=corei7-avx 
+-o - < %s | FileCheck %s
+
+;CHECK: and_masks
+;CHECK: vmovups
+;CHECK-NEXT: vcmpltp
+;CHECK-NEXT: vandps
+;CHECK-NEXT: vmovups
+;CHECK: ret
+
+define void @and_masks(<8 x float>* %a, <8 x float>* %b, <8 x float>* 
+%c) nounwind uwtable noinline ssp {
+  %v0 = load <8 x float>* %a, align 16
+  %v1 = load <8 x float>* %b, align 16
+  %m0 = fcmp olt <8 x float> %v1, %v0
+  %v2 = load <8 x float>* %c, align 16
+  %m1 = fcmp olt <8 x float> %v2, %v0
+  %mand = and <8 x i1> %m1, %m0
+  %r = zext <8 x i1> %mand to <8 x i32>
+  store <8 x i32> %r, <8 x i32>* undef, align 16
+  ret void
+}
+
+;CHECK: neg_mask
+;CHECK:  vmovups
+;CHECK-NEXT: vcmpltps
+;CHECK-NEXT: vandps
+;CHECK-NEXT: vmovups
+;CHECK: ret
+
+define void @neg_masks(<8 x float>* %a, <8 x float>* %b, <8 x float>* 
+%c) nounwind uwtable noinline ssp {
+  %v0 = load <8 x float>* %a, align 16
+  %v1 = load <8 x float>* %b, align 16
+  %m0 = fcmp olt <8 x float> %v1, %v0
+  %mand = xor <8 x i1> %m0, <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, 
+i1 1>
+  %r = zext <8 x i1> %mand to <8 x i32>
+  store <8 x i32> %r, <8 x i32>* undef, align 16
+  ret void
+}
+


_______________________________________________
llvm-commits mailing list
llvm-commits at cs.uiuc.edu
http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
---------------------------------------------------------------------
Intel Israel (74) Limited

This e-mail and any attachments may contain confidential material for
the sole use of the intended recipient(s). Any review or distribution
by others is strictly prohibited. If you are not the intended
recipient, please contact the sender and delete all copies.