[PATCH] [AArch64 NEON] Invalid constant is used in vselect condition.

Kevin Qin kevinqindev at gmail.com
Fri Jan 3 01:57:56 PST 2014


Hi,

In AArch64 backend, If the conditions of vselect are vector constant, then the value of "true" is presented as (i8 1) instead of (i8 255) which we expect.
The reason is, vselect only accept vxi1 as condition operand while BSL use every bit of condition to make select. So legalizer will insert a sign_extend_inreg to promote vxi1 to vxi8.  Next DAGCombiner will use visitSIGN_EXTEND_INREG() to combine it with a build_vector. Because in AArch64, i8 is illegal, constants with i1 are directly promoted to i32, so for i32, left shift 7 and then arithmetic right shift 7 can't correctly sign_extend i1 to i8.


http://llvm-reviews.chandlerc.com/D2503

Files:
  lib/CodeGen/SelectionDAG/DAGCombiner.cpp
  test/CodeGen/AArch64/neon-bitwise-instructions.ll

Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -5527,8 +5527,8 @@
       }
 
       ConstantSDNode *CurrentND = cast<ConstantSDNode>(Op);
-      const APInt &C = CurrentND->getAPIntValue();
-      Elts.push_back(DAG.getConstant(C.shl(ShAmt).ashr(ShAmt),
+      const APInt &C = APInt(VTBits, CurrentND->getAPIntValue().getZExtValue());
+      Elts.push_back(DAG.getConstant(C.shl(ShAmt).ashr(ShAmt).getZExtValue(),
                                      Op.getValueType()));
     }
 
Index: test/CodeGen/AArch64/neon-bitwise-instructions.ll
===================================================================
--- test/CodeGen/AArch64/neon-bitwise-instructions.ll
+++ test/CodeGen/AArch64/neon-bitwise-instructions.ll
@@ -558,6 +558,63 @@
   ret <4 x i32> %4
 }
 
+define <8 x i8> @vselect_v8i8(<8 x i8> %a) {
+;CHECK:  movi	 {{d[0-9]+}}, #0xffff
+;CHECK-NEXT:  bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+  %b = select <8 x i1> <i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>, <8 x i8> %a, <8 x i8> <i8 undef, i8 undef, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
+  ret <8 x i8> %b
+}
+
+define <4 x i16> @vselect_v4i16(<4 x i16> %a) {
+;CHECK:  movi	 {{d[0-9]+}}, #0xffff
+;CHECK-NEXT:  bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+  %b = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x i16> %a, <4 x i16> <i16 undef, i16 0, i16 0, i16 0>
+  ret <4 x i16> %b
+}
+
+define <8 x i8> @vselect_cmp_ne(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) {
+;CHECK:  cmeq {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+;CHECK-NEXT:  not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+;CHECK-NEXT:  bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+  %cmp = icmp ne <8 x i8> %a, %b
+  %d = select <8 x i1> %cmp, <8 x i8> %b, <8 x i8> %c
+  ret <8 x i8> %d
+}
+
+define <8 x i8> @vselect_cmp_eq(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) {
+;CHECK:  cmeq {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+;CHECK-NEXT:  bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+  %cmp = icmp eq <8 x i8> %a, %b
+  %d = select <8 x i1> %cmp, <8 x i8> %b, <8 x i8> %c
+  ret <8 x i8> %d
+}
+
+define <8 x i8> @vselect_cmpz_ne(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) {
+;CHECK:  cmeq {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0
+;CHECK-NEXT:  not {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+;CHECK-NEXT:  bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+  %cmp = icmp ne <8 x i8> %a, zeroinitializer
+  %d = select <8 x i1> %cmp, <8 x i8> %b, <8 x i8> %c
+  ret <8 x i8> %d
+}
+
+define <8 x i8> @vselect_cmpz_eq(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) {
+;CHECK:  cmeq {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0
+;CHECK-NEXT:  bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+  %cmp = icmp eq <8 x i8> %a, zeroinitializer
+  %d = select <8 x i1> %cmp, <8 x i8> %b, <8 x i8> %c
+  ret <8 x i8> %d
+}
+
+define <8 x i8> @vselect_tst(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) {
+;CHECK:  cmtst {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+;CHECK-NEXT:  bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+	%tmp3 = and <8 x i8> %a, %b
+	%tmp4 = icmp ne <8 x i8> %tmp3, zeroinitializer
+  %d = select <8 x i1> %tmp4, <8 x i8> %b, <8 x i8> %c
+  ret <8 x i8> %d
+}
+
 define <2 x i64> @bsl2xi64(<2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3) {
 ;CHECK:  bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
   %1 = and <2 x i64> %v1, %v2
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D2503.1.patch
Type: text/x-patch
Size: 3473 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20140103/fb91f30a/attachment.bin>


More information about the llvm-commits mailing list