[llvm-commits] [llvm] r162160 - in /llvm/trunk: lib/Target/X86/X86ISelLowering.cpp test/CodeGen/X86/2012-08-16-setcc.ll test/CodeGen/X86/fold-load.ll

Nadav Rotem nrotem at apple.com
Fri Aug 17 19:43:28 PDT 2012


Author: nadav
Date: Fri Aug 17 21:43:28 2012
New Revision: 162160

URL: http://llvm.org/viewvc/llvm-project?rev=162160&view=rev
Log:
The X86 backend has a number of optimizations for SETCC nodes which use
arithmetic instructions. However, when small data types are used, a truncate
node appears between the SETCC node and the arithmetic operation. This patch
adds support for this pattern.

Before:
  xorl  %esi, %edi
  testb %dil, %dil
  setne %al
  ret

After:
  xorb  %dil, %sil
  setne %al
  ret

rdar://12081007


Added:
    llvm/trunk/test/CodeGen/X86/2012-08-16-setcc.ll
Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/test/CodeGen/X86/fold-load.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=162160&r1=162159&r2=162160&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Fri Aug 17 21:43:28 2012
@@ -8283,7 +8283,18 @@
 
   unsigned Opcode = 0;
   unsigned NumOperands = 0;
-  switch (Op.getNode()->getOpcode()) {
+
+  // Truncate operations may prevent the merge of the SETCC instruction
+  // and the arithmetic intruction before it. Attempt to truncate the operands
+  // of the arithmetic instruction and use a reduced bit-width instruction.
+  bool NeedTruncation = false;
+  unsigned InOpcode = Op.getNode()->getOpcode();
+  if (Op->getOpcode() == ISD::TRUNCATE && Op->hasOneUse()) {
+    NeedTruncation = true;
+    InOpcode = Op->getOperand(0)->getOpcode();
+  }
+
+  switch (InOpcode) {
   case ISD::ADD:
     // Due to an isel shortcoming, be conservative if this add is likely to be
     // selected as part of a load-modify-store instruction. When the root node
@@ -8339,7 +8350,7 @@
 
       if (User->getOpcode() != ISD::BRCOND &&
           User->getOpcode() != ISD::SETCC &&
-          (User->getOpcode() != ISD::SELECT || UOpNo != 0)) {
+          !(User->getOpcode() == ISD::SELECT && UOpNo == 0)) {
         NonFlagUse = true;
         break;
       }
@@ -8360,11 +8371,9 @@
         goto default_case;
 
     // Otherwise use a regular EFLAGS-setting instruction.
-    switch (Op.getNode()->getOpcode()) {
+    switch (InOpcode) {
     default: llvm_unreachable("unexpected operator!");
-    case ISD::SUB:
-      Opcode = X86ISD::SUB;
-      break;
+    case ISD::SUB: Opcode = X86ISD::SUB; break;
     case ISD::OR:  Opcode = X86ISD::OR;  break;
     case ISD::XOR: Opcode = X86ISD::XOR; break;
     case ISD::AND: Opcode = X86ISD::AND; break;
@@ -8385,6 +8394,34 @@
     break;
   }
 
+  if (NeedTruncation) {
+    SDValue WideVal = Op->getOperand(0);
+    EVT VT = Op.getValueType();
+    EVT WideVT = WideVal.getValueType();
+    unsigned ConvertedOp = 0;
+
+    // Use a target machine opcode to prevent further DAGCombine
+    // optimizations that may separate the arithmetic operations from the
+    // setcc node.
+    switch (WideVal.getOpcode()) {
+      default: break;
+      case ISD::ADD: ConvertedOp = X86ISD::ADD; break;
+      case ISD::SUB: ConvertedOp = X86ISD::SUB; break;
+      case ISD::AND: ConvertedOp = X86ISD::AND; break;
+      case ISD::OR:  ConvertedOp = X86ISD::OR;  break;
+      case ISD::XOR: ConvertedOp = X86ISD::XOR; break;
+    }
+
+    if (ConvertedOp && WideVal.hasOneUse()) {
+      const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+      if (TLI.isOperationLegal(WideVal.getOpcode(), WideVT)) {
+        SDValue V0 = DAG.getNode(ISD::TRUNCATE, dl, VT, WideVal.getOperand(0));
+        SDValue V1 = DAG.getNode(ISD::TRUNCATE, dl, VT, WideVal.getOperand(1));
+        Op = DAG.getNode(ConvertedOp, dl, VT, V0, V1);
+      }
+    }
+  }
+
   if (Opcode == 0)
     // Emit a CMP with 0, which is the TEST pattern.
     return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op,

Added: llvm/trunk/test/CodeGen/X86/2012-08-16-setcc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/2012-08-16-setcc.ll?rev=162160&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/2012-08-16-setcc.ll (added)
+++ llvm/trunk/test/CodeGen/X86/2012-08-16-setcc.ll Fri Aug 17 21:43:28 2012
@@ -0,0 +1,45 @@
+; RUN: llc < %s -mtriple=x86_64-apple-macosx | FileCheck %s
+
+; rdar://12081007
+
+; CHECK: and_1:
+; CHECK: andb
+; CHECK-NEXT: cmovnel
+; CHECK: ret
+define i32 @and_1(i8 zeroext %a, i8 zeroext %b, i32 %x) {
+  %1 = and i8 %b, %a
+  %2 = icmp ne i8 %1, 0
+  %3 = select i1 %2, i32 %x, i32 0
+  ret i32 %3
+}
+
+; CHECK: and_2:
+; CHECK: andb
+; CHECK-NEXT: setne
+; CHECK: ret
+define zeroext i1 @and_2(i8 zeroext %a, i8 zeroext %b) {
+  %1 = and i8 %b, %a
+  %2 = icmp ne i8 %1, 0
+  ret i1 %2
+}
+
+; CHECK: xor_1:
+; CHECK: xorb
+; CHECK-NEXT: cmovnel
+; CHECK: ret
+define i32 @xor_1(i8 zeroext %a, i8 zeroext %b, i32 %x) {
+  %1 = xor i8 %b, %a
+  %2 = icmp ne i8 %1, 0
+  %3 = select i1 %2, i32 %x, i32 0
+  ret i32 %3
+}
+
+; CHECK: xor_2:
+; CHECK: xorb
+; CHECK-NEXT: setne
+; CHECK: ret
+define zeroext i1 @xor_2(i8 zeroext %a, i8 zeroext %b) {
+  %1 = xor i8 %b, %a
+  %2 = icmp ne i8 %1, 0
+  ret i1 %2
+}

Modified: llvm/trunk/test/CodeGen/X86/fold-load.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fold-load.ll?rev=162160&r1=162159&r2=162160&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fold-load.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fold-load.ll Fri Aug 17 21:43:28 2012
@@ -57,13 +57,13 @@
   %0 = load i32* %P, align 4
   %1 = load i32* %Q, align 4
   %2 = xor i32 %0, %1
-  %3 = and i32 %2, 65535
+  %3 = and i32 %2, 89947
   %4 = icmp eq i32 %3, 0
   br i1 %4, label %exit, label %land.end
 
 exit:
   %shr.i.i19 = xor i32 %1, %0
-  %5 = and i32 %shr.i.i19, 2147418112
+  %5 = and i32 %shr.i.i19, 3456789123
   %6 = icmp eq i32 %5, 0
   br label %land.end
 





More information about the llvm-commits mailing list