[llvm-commits] [PATCH] Fix bug in SExt optimsations

James Benton jbenton at vmware.com
Thu Jun 21 05:13:05 PDT 2012


Two diffs, one for llvm which fixes the bug and one for nightly 
test-suite which tests the bug.

Was emailed previously but yet to be committed "Re: [llvm-commits] 
[PATCH] Fix sext(setcc(N0 cc N0)) for vectorpfs".




-------------- next part --------------
Index: lib/CodeGen/SelectionDAG/TargetLowering.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/TargetLowering.cpp	(revision 158398)
+++ lib/CodeGen/SelectionDAG/TargetLowering.cpp	(working copy)
@@ -2389,21 +2389,29 @@
   }
 
   if (N0 == N1) {
+    // The sext(setcc()) => setcc() optimization relies on the appropriate
+    // constant being emitted.
+    uint64_t EqVal;
+    switch (getBooleanContents(N0.getValueType().isVector())) {
+    default: llvm_unreachable("Unknown boolean contents!");
+    case UndefinedBooleanContent:
+    case ZeroOrOneBooleanContent:
+      EqVal = ISD::isTrueWhenEqual(Cond);
+      break;
+    case ZeroOrNegativeOneBooleanContent:
+      EqVal = ISD::isTrueWhenEqual(Cond) ? -1 : 0;
+      break;
+    }
+
     // We can always fold X == X for integer setcc's.
     if (N0.getValueType().isInteger()) {
-      switch (getBooleanContents(N0.getValueType().isVector())) {
-      case UndefinedBooleanContent: 
-      case ZeroOrOneBooleanContent: 
-        return DAG.getConstant(ISD::isTrueWhenEqual(Cond), VT);
-      case ZeroOrNegativeOneBooleanContent:
-        return DAG.getConstant(ISD::isTrueWhenEqual(Cond) ? -1 : 0, VT);
-      }
+      return DAG.getConstant(EqVal, VT);
     }
     unsigned UOF = ISD::getUnorderedFlavor(Cond);
     if (UOF == 2)   // FP operators that are undefined on NaNs.
-      return DAG.getConstant(ISD::isTrueWhenEqual(Cond), VT);
+      return DAG.getConstant(EqVal, VT);
     if (UOF == unsigned(ISD::isTrueWhenEqual(Cond)))
-      return DAG.getConstant(UOF, VT);
+      return DAG.getConstant(EqVal, VT);
     // Otherwise, we can't fold it.  However, we can simplify it to SETUO/SETO
     // if it is not already.
     ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;

-------------- next part --------------
Index: LLVMSource/sext-setcc-self.ll
===================================================================
--- LLVMSource/sext-setcc-self.ll	(revision 0)
+++ LLVMSource/sext-setcc-self.ll	(revision 0)
@@ -0,0 +1,67 @@
+define <4 x i32> @test_ueq(<4 x float> %val) {
+entry:
+  %cmp = fcmp ueq <4 x float> %val, %val
+  %mask = sext <4 x i1> %cmp to <4 x i32>
+  ret <4 x i32> %mask
+}
+
+define <4 x i32> @test_uge(<4 x float> %val) {
+entry:
+  %cmp = fcmp uge <4 x float> %val, %val
+  %mask = sext <4 x i1> %cmp to <4 x i32>
+  ret <4 x i32> %mask
+}
+
+define <4 x i32> @test_ule(<4 x float> %val) {
+entry:
+  %cmp = fcmp ule <4 x float> %val, %val
+  %mask = sext <4 x i1> %cmp to <4 x i32>
+  ret <4 x i32> %mask
+}
+
+define <4 x i32> @test_one(<4 x float> %val) {
+entry:
+  %cmp = fcmp one <4 x float> %val, %val
+  %mask = sext <4 x i1> %cmp to <4 x i32>
+  ret <4 x i32> %mask
+}
+
+define <4 x i32> @test_ogt(<4 x float> %val) {
+entry:
+  %cmp = fcmp ogt <4 x float> %val, %val
+  %mask = sext <4 x i1> %cmp to <4 x i32>
+  ret <4 x i32> %mask
+}
+
+define <4 x i32> @test_olt(<4 x float> %val) {
+entry:
+  %cmp = fcmp olt <4 x float> %val, %val
+  %mask = sext <4 x i1> %cmp to <4 x i32>
+  ret <4 x i32> %mask
+}
+
+define i32 @main() nounwind {
+  ; test unordered equality comparisons (all ones)
+  %res_ueq = call <4 x i32> @test_ueq(<4 x float> undef)
+  %res_uge = call <4 x i32> @test_uge(<4 x float> undef)
+  %res_ule = call <4 x i32> @test_ule(<4 x float> undef)
+
+  ; test ordered inequality comparisons (all zeros)
+  %res_one = call <4 x i32> @test_one(<4 x float> undef)
+  %res_ogt = call <4 x i32> @test_ogt(<4 x float> undef)
+  %res_olt = call <4 x i32> @test_olt(<4 x float> undef)
+
+  ; combine all results into a single mask (all ones)
+  %tmp_uno = and <4 x i32> %res_uge, %res_ule
+  %res_uno = and <4 x i32> %res_ueq, %tmp_uno
+  %tmp_ord = or <4 x i32> %res_ogt, %res_olt
+  %res_ord = or <4 x i32> %res_one, %tmp_ord
+  %res_ord_inv = xor <4 x i32> %res_ord, <i32 -1, i32 -1, i32 -1, i32 -1>
+  %res = and <4 x i32> %res_uno, %res_ord_inv
+
+  ; compare result to -1,-1,-1,-1
+  %res_scalar = bitcast <4 x i32> %res to i128
+  %ret_cmp = icmp ne i128 %res_scalar, -1
+  %ret_val = zext i1 %ret_cmp to i32
+  ret i32 %ret_val
+}



More information about the llvm-commits mailing list