[llvm-commits] [PATCH] Fix sext(setcc()) => setcc()
James Benton
jbenton at vmware.com
Tue Jun 26 04:35:10 PDT 2012
We were emitting 0/1 booleans, instead of 0/-1 booleans.
Moved test to "make check" regression tests as requested.
Also attached the nightly test suite version if you wanted that, else
you can use just the bugfix + regression test.
Please commit.
-------------- next part --------------
Index: lib/CodeGen/SelectionDAG/TargetLowering.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/TargetLowering.cpp (revision 159148)
+++ lib/CodeGen/SelectionDAG/TargetLowering.cpp (working copy)
@@ -2389,21 +2389,29 @@
}
if (N0 == N1) {
+ // The sext(setcc()) => setcc() optimization relies on the appropriate
+ // constant being emitted.
+ uint64_t EqVal;
+ switch (getBooleanContents(N0.getValueType().isVector())) {
+ default: llvm_unreachable("Unknown boolean contents!");
+ case UndefinedBooleanContent:
+ case ZeroOrOneBooleanContent:
+ EqVal = ISD::isTrueWhenEqual(Cond);
+ break;
+ case ZeroOrNegativeOneBooleanContent:
+ EqVal = ISD::isTrueWhenEqual(Cond) ? -1 : 0;
+ break;
+ }
+
// We can always fold X == X for integer setcc's.
if (N0.getValueType().isInteger()) {
- switch (getBooleanContents(N0.getValueType().isVector())) {
- case UndefinedBooleanContent:
- case ZeroOrOneBooleanContent:
- return DAG.getConstant(ISD::isTrueWhenEqual(Cond), VT);
- case ZeroOrNegativeOneBooleanContent:
- return DAG.getConstant(ISD::isTrueWhenEqual(Cond) ? -1 : 0, VT);
- }
+ return DAG.getConstant(EqVal, VT);
}
unsigned UOF = ISD::getUnorderedFlavor(Cond);
if (UOF == 2) // FP operators that are undefined on NaNs.
- return DAG.getConstant(ISD::isTrueWhenEqual(Cond), VT);
+ return DAG.getConstant(EqVal, VT);
if (UOF == unsigned(ISD::isTrueWhenEqual(Cond)))
- return DAG.getConstant(UOF, VT);
+ return DAG.getConstant(EqVal, VT);
// Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO
// if it is not already.
ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
Index: test/CodeGen/X86/sext-setcc-self.ll
===================================================================
--- test/CodeGen/X86/sext-setcc-self.ll (revision 0)
+++ test/CodeGen/X86/sext-setcc-self.ll (revision 0)
@@ -0,0 +1,55 @@
+; RUN: llc -march=x86 -mcpu=nehalem < %s | FileCheck %s
+
+define <4 x i32> @test_ueq(<4 x float> %in) {
+entry:
+ ; CHECK: pcmpeqd %xmm0, %xmm0
+ ; CHECK-NEXT: ret
+ %0 = fcmp ueq <4 x float> %in, %in
+ %1 = sext <4 x i1> %0 to <4 x i32>
+ ret <4 x i32> %1
+}
+
+define <4 x i32> @test_uge(<4 x float> %in) {
+entry:
+ ; CHECK: pcmpeqd %xmm0, %xmm0
+ ; CHECK-NEXT: ret
+ %0 = fcmp uge <4 x float> %in, %in
+ %1 = sext <4 x i1> %0 to <4 x i32>
+ ret <4 x i32> %1
+}
+
+define <4 x i32> @test_ule(<4 x float> %in) {
+entry:
+ ; CHECK: pcmpeqd %xmm0, %xmm0
+ ; CHECK-NEXT: ret
+ %0 = fcmp ule <4 x float> %in, %in
+ %1 = sext <4 x i1> %0 to <4 x i32>
+ ret <4 x i32> %1
+}
+
+define <4 x i32> @test_one(<4 x float> %in) {
+entry:
+ ; CHECK: xorps %xmm0, %xmm0
+ ; CHECK-NEXT: ret
+ %0 = fcmp one <4 x float> %in, %in
+ %1 = sext <4 x i1> %0 to <4 x i32>
+ ret <4 x i32> %1
+}
+
+define <4 x i32> @test_ogt(<4 x float> %in) {
+entry:
+ ; CHECK: xorps %xmm0, %xmm0
+ ; CHECK-NEXT: ret
+ %0 = fcmp ogt <4 x float> %in, %in
+ %1 = sext <4 x i1> %0 to <4 x i32>
+ ret <4 x i32> %1
+}
+
+define <4 x i32> @test_olt(<4 x float> %in) {
+entry:
+ ; CHECK: xorps %xmm0, %xmm0
+ ; CHECK-NEXT: ret
+ %0 = fcmp olt <4 x float> %in, %in
+ %1 = sext <4 x i1> %0 to <4 x i32>
+ ret <4 x i32> %1
+}
-------------- next part --------------
Index: LLVMSource/sext-setcc-self.ll
===================================================================
--- LLVMSource/sext-setcc-self.ll (revision 0)
+++ LLVMSource/sext-setcc-self.ll (revision 0)
@@ -0,0 +1,67 @@
+define <4 x i32> @test_ueq(<4 x float> %val) {
+entry:
+ %cmp = fcmp ueq <4 x float> %val, %val
+ %mask = sext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %mask
+}
+
+define <4 x i32> @test_uge(<4 x float> %val) {
+entry:
+ %cmp = fcmp uge <4 x float> %val, %val
+ %mask = sext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %mask
+}
+
+define <4 x i32> @test_ule(<4 x float> %val) {
+entry:
+ %cmp = fcmp ule <4 x float> %val, %val
+ %mask = sext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %mask
+}
+
+define <4 x i32> @test_one(<4 x float> %val) {
+entry:
+ %cmp = fcmp one <4 x float> %val, %val
+ %mask = sext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %mask
+}
+
+define <4 x i32> @test_ogt(<4 x float> %val) {
+entry:
+ %cmp = fcmp ogt <4 x float> %val, %val
+ %mask = sext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %mask
+}
+
+define <4 x i32> @test_olt(<4 x float> %val) {
+entry:
+ %cmp = fcmp olt <4 x float> %val, %val
+ %mask = sext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %mask
+}
+
+define i32 @main() nounwind {
+ ; test unordered equality comparisons (all ones)
+ %res_ueq = call <4 x i32> @test_ueq(<4 x float> undef)
+ %res_uge = call <4 x i32> @test_uge(<4 x float> undef)
+ %res_ule = call <4 x i32> @test_ule(<4 x float> undef)
+
+ ; test ordered inequality comparisons (all zeros)
+ %res_one = call <4 x i32> @test_one(<4 x float> undef)
+ %res_ogt = call <4 x i32> @test_ogt(<4 x float> undef)
+ %res_olt = call <4 x i32> @test_olt(<4 x float> undef)
+
+ ; combine all results into a single mask (all ones)
+ %tmp_uno = and <4 x i32> %res_uge, %res_ule
+ %res_uno = and <4 x i32> %res_ueq, %tmp_uno
+ %tmp_ord = or <4 x i32> %res_ogt, %res_olt
+ %res_ord = or <4 x i32> %res_one, %tmp_ord
+ %res_ord_inv = xor <4 x i32> %res_ord, <i32 -1, i32 -1, i32 -1, i32 -1>
+ %res = and <4 x i32> %res_uno, %res_ord_inv
+
+ ; compare result to -1,-1,-1,-1
+ %res_scalar = bitcast <4 x i32> %res to i128
+ %ret_cmp = icmp ne i128 %res_scalar, -1
+ %ret_val = zext i1 %ret_cmp to i32
+ ret i32 %ret_val
+}
More information about the llvm-commits
mailing list