[llvm] r213342 - X86: Constant fold converting vector setcc results to float.

Patrik Hägglund H patrik.h.hagglund at ericsson.com
Wed Jul 23 07:45:46 PDT 2014


Hi Jim,

This commit is causing a regression as shown by llvm-stress:

> bin/llvm-stress -size 300 -seed 30215 | bin/llc -march=x86-64 -mcpu=corei7 -o /dev/null
llc: ../lib/CodeGen/SelectionDAG/SelectionDAG.cpp:2905: llvm::SDValue llvm::SelectionDAG::getNode(unsigned int, llvm::SDLoc, llvm::EVT, llvm::SDValue): Assertion `VT.getSizeInBits() == Operand.getValueType().getSizeInBits() && "Cannot BITCAST between types of different sizes!"' failed.
0  llc             0x00000000011e2fa5 llvm::sys::PrintStackTrace(_IO_FILE*) + 37
1  llc             0x00000000011e33e3
2  libpthread.so.0 0x00007f52d8be17c0
3  libc.so.6       0x00007f52d7ee5b35 gsignal + 53
4  libc.so.6       0x00007f52d7ee7111 abort + 385
5  libc.so.6       0x00007f52d7ede9f0 __assert_fail + 240
6  llc             0x000000000105a902
7  llc             0x0000000000b55feb llvm::X86TargetLowering::PerformDAGCombine(llvm::SDNode*, llvm::TargetLowering::DAGCombinerInfo&) const + 11771
8  llc             0x0000000000fd8fae
9  llc             0x0000000000fd88eb llvm::SelectionDAG::Combine(llvm::CombineLevel, llvm::AliasAnalysis&, llvm::CodeGenOpt::Level) + 939
10 llc             0x00000000010ccb5e llvm::SelectionDAGISel::CodeGenAndEmitDAG() + 910
11 llc             0x00000000010cbd98 llvm::SelectionDAGISel::SelectAllBasicBlocks(llvm::Function const&) + 7096
12 llc             0x00000000010c9444 llvm::SelectionDAGISel::runOnMachineFunction(llvm::MachineFunction&) + 1332
13 llc             0x0000000000ae0ab6
14 llc             0x0000000000ce7d2c llvm::MachineFunctionPass::runOnFunction(llvm::Function&) + 124
15 llc             0x0000000000ee7bca llvm::FPPassManager::runOnFunction(llvm::Function&) + 362
16 llc             0x0000000000ee7e5b llvm::FPPassManager::runOnModule(llvm::Module&) + 43
17 llc             0x0000000000ee83f7 llvm::legacy::PassManagerImpl::run(llvm::Module&) + 999
18 llc             0x0000000000570690 main + 6832
19 libc.so.6       0x00007f52d7ed1c16 __libc_start_main + 230
20 llc             0x000000000056eaf9
Stack dump:
0.      Program arguments: bin/llc -march=x86-64 -mcpu=corei7 -o /dev/null
1.      Running pass 'Function Pass Manager' on module '<stdin>'.
2.      Running pass 'X86 DAG->DAG Instruction Selection' on function '@autogen_SD30215'
Abort

/Patrik Hägglund

-----Original Message-----
From: llvm-commits-bounces at cs.uiuc.edu [mailto:llvm-commits-bounces at cs.uiuc.edu] On Behalf Of Jim Grosbach
Sent: den 18 juli 2014 02:41
To: llvm-commits at cs.uiuc.edu
Subject: [llvm] r213342 - X86: Constant fold converting vector setcc results to float.

Author: grosbach
Date: Thu Jul 17 19:40:56 2014
New Revision: 213342

URL: http://llvm.org/viewvc/llvm-project?rev=213342&view=rev
Log:
X86: Constant fold converting vector setcc results to float.

Since the result of a SETCC for X86 is 0 or -1 in each lane, we can
move unary operations, in this case [su]int_to_fp through the mask
operation and constant fold the operation away. Generally speaking:
  UNARYOP(AND(VECTOR_CMP(x,y), constant))
      --> AND(VECTOR_CMP(x,y), constant2)
where constant2 is UNARYOP(constant).

This implements the transform where UNARYOP is [su]int_to_fp.

For example, consider the simple function:
define <4 x float> @foo(<4 x float> %val, <4 x float> %test) nounwind {
  %cmp = fcmp oeq <4 x float> %val, %test
  %ext = zext <4 x i1> %cmp to <4 x i32>
  %result = sitofp <4 x i32> %ext to <4 x float>
  ret <4 x float> %result
}

Before this change, the SSE code is generated as:
LCPI0_0:
  .long 1                       ## 0x1
  .long 1                       ## 0x1
  .long 1                       ## 0x1
  .long 1                       ## 0x1
  .section  __TEXT,__text,regular,pure_instructions
  .globl  _foo
  .align  4, 0x90
_foo:                                   ## @foo
  cmpeqps %xmm1, %xmm0
  andps LCPI0_0(%rip), %xmm0
  cvtdq2ps  %xmm0, %xmm0
  retq

After, the code is improved to:
LCPI0_0:
  .long 1065353216              ## float 1.000000e+00
  .long 1065353216              ## float 1.000000e+00
  .long 1065353216              ## float 1.000000e+00
  .long 1065353216              ## float 1.000000e+00
  .section  __TEXT,__text,regular,pure_instructions
  .globl  _foo
  .align  4, 0x90
_foo:                                   ## @foo
  cmpeqps %xmm1, %xmm0
  andps LCPI0_0(%rip), %xmm0
  retq

The cvtdq2ps has been constant folded away and the floating point 1.0f
vector lanes are materialized directly via the ModRM operand of andps.

Added:
    llvm/trunk/test/CodeGen/X86/x86-setcc-int-to-fp-combine.ll
Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=213342&r1=213341&r2=213342&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Thu Jul 17 19:40:56 2014
@@ -21847,8 +21847,59 @@ static SDValue PerformBrCondCombine(SDNo
   return SDValue();
 }
 
+static SDValue performVectorCompareAndMaskUnaryOpCombine(SDNode *N,
+                                                         SelectionDAG &DAG) {
+  // Take advantage of vector comparisons producing 0 or -1 in each lane to
+  // optimize away operation when it's from a constant.
+  //
+  // The general transformation is:
+  //    UNARYOP(AND(VECTOR_CMP(x,y), constant)) -->
+  //       AND(VECTOR_CMP(x,y), constant2)
+  //    constant2 = UNARYOP(constant)
+
+  // Early exit if this isn't a vector operation or if the operand of the
+  // unary operation isn't a bitwise AND.
+  EVT VT = N->getValueType(0);
+  if (!VT.isVector() || N->getOperand(0)->getOpcode() != ISD::AND ||
+      N->getOperand(0)->getOperand(0)->getOpcode() != ISD::SETCC)
+    return SDValue();
+
+  // Now check that the other operand of the AND is a constant splat. We could
+  // make the transformation for non-constant splats as well, but it's unclear
+  // that would be a benefit as it would not eliminate any operations, just
+  // perform one more step in scalar code before moving to the vector unit.
+  if (BuildVectorSDNode *BV =
+          dyn_cast<BuildVectorSDNode>(N->getOperand(0)->getOperand(1))) {
+    // Bail out if the vector isn't a constant splat.
+    if (!BV->getConstantSplatNode())
+      return SDValue();
+
+    // Everything checks out. Build up the new and improved node.
+    SDLoc DL(N);
+    EVT IntVT = BV->getValueType(0);
+    // Create a new constant of the appropriate type for the transformed
+    // DAG.
+    SDValue SourceConst = DAG.getNode(N->getOpcode(), DL, VT, SDValue(BV, 0));
+    // The AND node needs bitcasts to/from an integer vector type around it.
+    SDValue MaskConst = DAG.getNode(ISD::BITCAST, DL, IntVT, SourceConst);
+    SDValue NewAnd = DAG.getNode(ISD::AND, DL, IntVT,
+                                 N->getOperand(0)->getOperand(0), MaskConst);
+    SDValue Res = DAG.getNode(ISD::BITCAST, DL, VT, NewAnd);
+    return Res;
+  }
+
+  return SDValue();
+}
+
 static SDValue PerformSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG,
                                         const X86TargetLowering *XTLI) {
+  // First try to optimize away the conversion entirely when it's
+  // conditionally from a constant. Vectors only.
+  SDValue Res = performVectorCompareAndMaskUnaryOpCombine(N, DAG);
+  if (Res != SDValue())
+    return Res;
+
+  // Now move on to more general possibilities.
   SDValue Op0 = N->getOperand(0);
   EVT InVT = Op0->getValueType(0);
 

Added: llvm/trunk/test/CodeGen/X86/x86-setcc-int-to-fp-combine.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/x86-setcc-int-to-fp-combine.ll?rev=213342&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/x86-setcc-int-to-fp-combine.ll (added)
+++ llvm/trunk/test/CodeGen/X86/x86-setcc-int-to-fp-combine.ll Thu Jul 17 19:40:56 2014
@@ -0,0 +1,18 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+
+define <4 x float> @foo(<4 x float> %val, <4 x float> %test) nounwind {
+; CHECK-LABEL: LCPI0_0
+; CHECK-NEXT: .long 1065353216              ## float 1.000000e+00
+; CHECK-NEXT: .long 1065353216              ## float 1.000000e+00
+; CHECK-NEXT: .long 1065353216              ## float 1.000000e+00
+; CHECK-NEXT: .long 1065353216              ## float 1.000000e+00
+; CHECK-LABEL: foo:
+; CHECK: cmpeqps %xmm1, %xmm0
+; CHECK-NEXT: andps LCPI0_0(%rip), %xmm0
+; CHECK-NEXT: retq
+
+  %cmp = fcmp oeq <4 x float> %val, %test
+  %ext = zext <4 x i1> %cmp to <4 x i32>
+  %result = sitofp <4 x i32> %ext to <4 x float>
+  ret <4 x float> %result
+}


_______________________________________________
llvm-commits mailing list
llvm-commits at cs.uiuc.edu
http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits




More information about the llvm-commits mailing list