[llvm-branch-commits] [llvm-branch] r165526 - in /llvm/branches/R600: lib/Target/AMDGPU/R600ISelLowering.cpp test/CodeGen/R600/fcmp-cnd.ll test/CodeGen/R600/fcmp.ll test/CodeGen/R600/selectcc-icmp-select-float.ll

Tue Oct 9 11:49:04 PDT 2012

Author: tstellar
Date: Tue Oct  9 13:49:03 2012
New Revision: 165526

URL: http://llvm.org/viewvc/llvm-project?rev=165526&view=rev
Log:
R600: Fix lowering of fcmp

In most cases, R600 requires that all operands of SELECT_CC nodes have
the same type.  However, we were incorrectly converting between floating
point true(1.0f) / false(0.0f) and interger true(-1) / false(0),
which was causing miscompiles for fcmp instructions that were lowered to
SELECT_CC nodes.

Added:
    llvm/branches/R600/test/CodeGen/R600/fcmp-cnd.ll
    llvm/branches/R600/test/CodeGen/R600/fcmp.ll
    llvm/branches/R600/test/CodeGen/R600/selectcc-icmp-select-float.ll
Modified:
    llvm/branches/R600/lib/Target/AMDGPU/R600ISelLowering.cpp

Modified: llvm/branches/R600/lib/Target/AMDGPU/R600ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/AMDGPU/R600ISelLowering.cpp?rev=165526&r1=165525&r2=165526&view=diff
==============================================================================

--- llvm/branches/R600/lib/Target/AMDGPU/R600ISelLowering.cpp (original)
+++ llvm/branches/R600/lib/Target/AMDGPU/R600ISelLowering.cpp Tue Oct  9 13:49:03 2012
@@ -551,10 +551,8 @@
   EVT CompareVT = LHS.getValueType();
 
   // We need all the operands of SELECT_CC to have the same value type, so if
-  // necessary we need to convert LHS and RHS to be the same type True and
-  // False.  True and False are guaranteed to have the same type as this
-  // SELECT_CC node.
-
+  // necessary we need to change True and False to be the same type as LHS and
+  // RHS, and then convert the result of the select_cc back to the correct type.
   if (isHWTrueValue(True) && isHWFalseValue(False)) {
     if (CompareVT !=  VT) {
       if (VT == MVT::f32 && CompareVT == MVT::i32) {
@@ -563,24 +561,31 @@
             DAG.getConstant(-1, MVT::i32),
             DAG.getConstant(0, MVT::i32),
             CC);
-        return DAG.getNode(ISD::UINT_TO_FP, DL, VT, Boolean);
+        // Convert integer values of true (-1) and false (0) to fp values of
+        // true (1.0f) and false (0.0f).
+        SDValue LSB = DAG.getNode(ISD::AND, DL, MVT::i32, Boolean,
+                                                  DAG.getConstant(1, MVT::i32));
+        return DAG.getNode(ISD::UINT_TO_FP, DL, VT, LSB);
       } else if (VT == MVT::i32 && CompareVT == MVT::f32) {
         SDValue BoolAsFlt = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
             LHS, RHS,
             DAG.getConstantFP(1.0f, MVT::f32),
             DAG.getConstantFP(0.0f, MVT::f32),
             CC);
-        return DAG.getNode(ISD::FP_TO_UINT, DL, VT, BoolAsFlt);
+        // Convert fp values of true (1.0f) and false (0.0f) to integer values
+        // of true (-1) and false (0).
+        SDValue Neg = DAG.getNode(ISD::FNEG, DL, MVT::f32, BoolAsFlt);
+        return DAG.getNode(ISD::FP_TO_SINT, DL, VT, Neg);
       } else {
         // I don't think there will be any other type pairings.
         assert(!"Unhandled operand type parings in SELECT_CC");
       }
     } else {
+      // This SELECT_CC is already legal.
       return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
     }
   }
 
-
   // XXX If True is a hardware TRUE value and False is a hardware FALSE value,
   // we can handle this with a native instruction, but we need to swap true
   // and false and change the conditional.

Added: llvm/branches/R600/test/CodeGen/R600/fcmp-cnd.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/R600/fcmp-cnd.ll?rev=165526&view=auto
==============================================================================
--- llvm/branches/R600/test/CodeGen/R600/fcmp-cnd.ll (added)
+++ llvm/branches/R600/test/CodeGen/R600/fcmp-cnd.ll Tue Oct  9 13:49:03 2012
@@ -0,0 +1,12 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;CHECK: CNDE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @test(i32 addrspace(1)* %out, float addrspace(1)* %in) {
+entry:
+  %0 = load float addrspace(1)* %in
+  %cmp = fcmp oeq float %0, 0.000000e+00
+  %value = select i1 %cmp, i32 2, i32 3 
+  store i32 %value, i32 addrspace(1)* %out
+  ret void
+}

Added: llvm/branches/R600/test/CodeGen/R600/fcmp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/R600/fcmp.ll?rev=165526&view=auto
==============================================================================
--- llvm/branches/R600/test/CodeGen/R600/fcmp.ll (added)
+++ llvm/branches/R600/test/CodeGen/R600/fcmp.ll Tue Oct  9 13:49:03 2012
@@ -0,0 +1,16 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;CHECK: SETE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: MOV T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: FLT_TO_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @test(i32 addrspace(1)* %out, float addrspace(1)* %in) {
+entry:
+  %0 = load float addrspace(1)* %in
+  %arrayidx1 = getelementptr inbounds float addrspace(1)* %in, i32 1
+  %1 = load float addrspace(1)* %arrayidx1
+  %cmp = fcmp oeq float %0, %1
+  %sext = sext i1 %cmp to i32
+  store i32 %sext, i32 addrspace(1)* %out
+  ret void
+}

Added: llvm/branches/R600/test/CodeGen/R600/selectcc-icmp-select-float.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/test/CodeGen/R600/selectcc-icmp-select-float.ll?rev=165526&view=auto
==============================================================================
--- llvm/branches/R600/test/CodeGen/R600/selectcc-icmp-select-float.ll (added)
+++ llvm/branches/R600/test/CodeGen/R600/selectcc-icmp-select-float.ll Tue Oct  9 13:49:03 2012
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; Note additional optimizations may cause this SGT to be replaced with a
+; CND* instruction.
+; CHECK: SGT_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; Test a selectcc with i32 LHS/RHS and float True/False
+
+define void @test(float addrspace(1)* %out, i32 addrspace(1)* %in) {
+entry:
+  %0 = load i32 addrspace(1)* %in
+  %1 = icmp sge i32 %0, 0
+  %2 = select i1 %1, float 1.0, float 0.0
+  store float %2, float addrspace(1)* %out
+  ret void
+}