[llvm-commits] [PATCH] Fix sext(setcc()) => setcc()

Tue Jun 26 04:35:10 PDT 2012

We were emitting 0/1 booleans, instead of 0/-1 booleans.

Moved test to "make check" regression tests as requested.

Also attached the nightly test suite version if you wanted that, else 
you can use just the bugfix + regression test.

Please commit.
-------------- next part --------------
Index: lib/CodeGen/SelectionDAG/TargetLowering.cpp
===================================================================

--- lib/CodeGen/SelectionDAG/TargetLowering.cpp	(revision 159148)
+++ lib/CodeGen/SelectionDAG/TargetLowering.cpp	(working copy)
@@ -2389,21 +2389,29 @@
   }
 
   if (N0 == N1) {
+    // The sext(setcc()) => setcc() optimization relies on the appropriate
+    // constant being emitted.
+    uint64_t EqVal;
+    switch (getBooleanContents(N0.getValueType().isVector())) {
+    default: llvm_unreachable("Unknown boolean contents!");
+    case UndefinedBooleanContent:
+    case ZeroOrOneBooleanContent:
+      EqVal = ISD::isTrueWhenEqual(Cond);
+      break;
+    case ZeroOrNegativeOneBooleanContent:
+      EqVal = ISD::isTrueWhenEqual(Cond) ? -1 : 0;
+      break;
+    }
+
     // We can always fold X == X for integer setcc's.
     if (N0.getValueType().isInteger()) {
-      switch (getBooleanContents(N0.getValueType().isVector())) {
-      case UndefinedBooleanContent: 
-      case ZeroOrOneBooleanContent: 
-        return DAG.getConstant(ISD::isTrueWhenEqual(Cond), VT);
-      case ZeroOrNegativeOneBooleanContent:
-        return DAG.getConstant(ISD::isTrueWhenEqual(Cond) ? -1 : 0, VT);
-      }
+      return DAG.getConstant(EqVal, VT);
     }
     unsigned UOF = ISD::getUnorderedFlavor(Cond);
     if (UOF == 2)   // FP operators that are undefined on NaNs.
-      return DAG.getConstant(ISD::isTrueWhenEqual(Cond), VT);
+      return DAG.getConstant(EqVal, VT);
     if (UOF == unsigned(ISD::isTrueWhenEqual(Cond)))
-      return DAG.getConstant(UOF, VT);
+      return DAG.getConstant(EqVal, VT);
     // Otherwise, we can't fold it.  However, we can simplify it to SETUO/SETO
     // if it is not already.
     ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
Index: test/CodeGen/X86/sext-setcc-self.ll
===================================================================
--- test/CodeGen/X86/sext-setcc-self.ll	(revision 0)
+++ test/CodeGen/X86/sext-setcc-self.ll	(revision 0)
@@ -0,0 +1,55 @@
+; RUN: llc -march=x86 -mcpu=nehalem < %s | FileCheck %s
+
+define <4 x i32> @test_ueq(<4 x float> %in) {
+entry:
+  ; CHECK: pcmpeqd %xmm0, %xmm0
+  ; CHECK-NEXT: ret
+  %0 = fcmp ueq <4 x float> %in, %in
+  %1 = sext <4 x i1> %0 to <4 x i32>
+  ret <4 x i32> %1
+}
+
+define <4 x i32> @test_uge(<4 x float> %in) {
+entry:
+  ; CHECK: pcmpeqd %xmm0, %xmm0
+  ; CHECK-NEXT: ret
+  %0 = fcmp uge <4 x float> %in, %in
+  %1 = sext <4 x i1> %0 to <4 x i32>
+  ret <4 x i32> %1
+}
+
+define <4 x i32> @test_ule(<4 x float> %in) {
+entry:
+  ; CHECK: pcmpeqd %xmm0, %xmm0
+  ; CHECK-NEXT: ret
+  %0 = fcmp ule <4 x float> %in, %in
+  %1 = sext <4 x i1> %0 to <4 x i32>
+  ret <4 x i32> %1
+}
+
+define <4 x i32> @test_one(<4 x float> %in) {
+entry:
+  ; CHECK: xorps %xmm0, %xmm0
+  ; CHECK-NEXT: ret
+  %0 = fcmp one <4 x float> %in, %in
+  %1 = sext <4 x i1> %0 to <4 x i32>
+  ret <4 x i32> %1
+}
+
+define <4 x i32> @test_ogt(<4 x float> %in) {
+entry:
+  ; CHECK: xorps %xmm0, %xmm0
+  ; CHECK-NEXT: ret
+  %0 = fcmp ogt <4 x float> %in, %in
+  %1 = sext <4 x i1> %0 to <4 x i32>
+  ret <4 x i32> %1
+}
+
+define <4 x i32> @test_olt(<4 x float> %in) {
+entry:
+  ; CHECK: xorps %xmm0, %xmm0
+  ; CHECK-NEXT: ret
+  %0 = fcmp olt <4 x float> %in, %in
+  %1 = sext <4 x i1> %0 to <4 x i32>
+  ret <4 x i32> %1
+}
-------------- next part --------------
Index: LLVMSource/sext-setcc-self.ll
===================================================================
--- LLVMSource/sext-setcc-self.ll	(revision 0)
+++ LLVMSource/sext-setcc-self.ll	(revision 0)
@@ -0,0 +1,67 @@
+define <4 x i32> @test_ueq(<4 x float> %val) {
+entry:
+  %cmp = fcmp ueq <4 x float> %val, %val
+  %mask = sext <4 x i1> %cmp to <4 x i32>
+  ret <4 x i32> %mask
+}
+
+define <4 x i32> @test_uge(<4 x float> %val) {
+entry:
+  %cmp = fcmp uge <4 x float> %val, %val
+  %mask = sext <4 x i1> %cmp to <4 x i32>
+  ret <4 x i32> %mask
+}
+
+define <4 x i32> @test_ule(<4 x float> %val) {
+entry:
+  %cmp = fcmp ule <4 x float> %val, %val
+  %mask = sext <4 x i1> %cmp to <4 x i32>
+  ret <4 x i32> %mask
+}
+
+define <4 x i32> @test_one(<4 x float> %val) {
+entry:
+  %cmp = fcmp one <4 x float> %val, %val
+  %mask = sext <4 x i1> %cmp to <4 x i32>
+  ret <4 x i32> %mask
+}
+
+define <4 x i32> @test_ogt(<4 x float> %val) {
+entry:
+  %cmp = fcmp ogt <4 x float> %val, %val
+  %mask = sext <4 x i1> %cmp to <4 x i32>
+  ret <4 x i32> %mask
+}
+
+define <4 x i32> @test_olt(<4 x float> %val) {
+entry:
+  %cmp = fcmp olt <4 x float> %val, %val
+  %mask = sext <4 x i1> %cmp to <4 x i32>
+  ret <4 x i32> %mask
+}
+
+define i32 @main() nounwind {
+  ; test unordered equality comparisons (all ones)
+  %res_ueq = call <4 x i32> @test_ueq(<4 x float> undef)
+  %res_uge = call <4 x i32> @test_uge(<4 x float> undef)
+  %res_ule = call <4 x i32> @test_ule(<4 x float> undef)
+
+  ; test ordered inequality comparisons (all zeros)
+  %res_one = call <4 x i32> @test_one(<4 x float> undef)
+  %res_ogt = call <4 x i32> @test_ogt(<4 x float> undef)
+  %res_olt = call <4 x i32> @test_olt(<4 x float> undef)
+
+  ; combine all results into a single mask (all ones)
+  %tmp_uno = and <4 x i32> %res_uge, %res_ule
+  %res_uno = and <4 x i32> %res_ueq, %tmp_uno
+  %tmp_ord = or <4 x i32> %res_ogt, %res_olt
+  %res_ord = or <4 x i32> %res_one, %tmp_ord
+  %res_ord_inv = xor <4 x i32> %res_ord, <i32 -1, i32 -1, i32 -1, i32 -1>
+  %res = and <4 x i32> %res_uno, %res_ord_inv
+
+  ; compare result to -1,-1,-1,-1
+  %res_scalar = bitcast <4 x i32> %res to i128
+  %ret_cmp = icmp ne i128 %res_scalar, -1
+  %ret_val = zext i1 %ret_cmp to i32
+  ret i32 %ret_val
+}