[PATCH] ARMEB: Fix trunc DAG combining for vector types

Tue Jun 24 06:32:54 PDT 2014

Hi all,

The DAG combiner for trunc operations performs a specific folding of buildvector, bitcast and truncate operations into a single buildvector operation. The current implementation is intended for little endian mode only and does not take the big endian order of data into account.

This patch implements the element selection for buildvector for the least significant data (trunc) based upon the "higher side" of vector element.

Although this patch is issued for the ARM BE target, it affects all big endian targets. 

Please review, thanks,
Christian

http://reviews.llvm.org/D4274

Files:
  lib/CodeGen/SelectionDAG/DAGCombiner.cpp
  test/CodeGen/ARM/big-endian-neon-trunc.ll

Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp
===================================================================

--- lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -6028,13 +6028,14 @@
       unsigned BuildVecNumElts =  BuildVect.getNumOperands();
       unsigned TruncVecNumElts = VT.getVectorNumElements();
       unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
+      unsigned TruncEltBaseOffset = isLE ? 0 : (TruncEltOffset-1);
 
       assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
              "Invalid number of elements");
 
       SmallVector<SDValue, 8> Opnds;
       for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
-        Opnds.push_back(BuildVect.getOperand(i));
+        Opnds.push_back(BuildVect.getOperand(i+TruncEltBaseOffset));
 
       return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Opnds);
     }
Index: test/CodeGen/ARM/big-endian-neon-trunc.ll
===================================================================
--- test/CodeGen/ARM/big-endian-neon-trunc.ll
+++ test/CodeGen/ARM/big-endian-neon-trunc.ll
@@ -0,0 +1,52 @@
+; RUN: llc < %s -mtriple armeb-eabi -mattr v7,neon -o - | FileCheck %s
+
+; to check the folding of buildvector, bitcast, and truncate
+; for example
+; (v2i32 trunc (bitcast ((v4i32) buildvector a, b, c, d ) v2i64)) to
+; ((v2i32 (buildvector b, d ))
+
+define void @buildvec_bitcast_trunc_2i64_to_2i32( <2 x i64>* %loadaddr, <2 x i32>* %storeaddr ) {
+; CHECK-LABEL: buildvec_bitcast_trunc_2i64_to_2i32:
+; CHECK:       mul [[REG1:r[0-9]+]],
+; CHECK:       mul [[REG2:r[0-9]+]],
+; CHECK:       vmov.32 [[REG3:d[0-9]+]][0], [[REG1]]
+; CHECK:       vmov.32 [[REG3]][1], [[REG2]]
+; CHECK:       vstr [[REG3]]
+  %1 = load <2 x i64>* %loadaddr
+  %2 = mul <2 x i64> %1, %1
+  %3 = trunc <2 x i64> %2 to <2 x i32>
+  store <2 x i32> %3, <2 x i32>* %storeaddr
+  ret void
+}
+
+define void @buildvec_bitcast_trunc_2i64_to_2i16( <2 x i64>* %loadaddr, <2 x i16>* %storeaddr ) {
+; CHECK-LABEL: buildvec_bitcast_trunc_2i64_to_2i16:
+; CHECK:       mul [[REG1:r[0-9]+]],
+; CHECK:       mul [[REG2:r[0-9]+]],
+; CHECK:       vmov.32 [[REG3:d[0-9]+]][0], [[REG1]]
+; CHECK:       vmov.32 [[REG3]][1], [[REG2]]
+; CHECK:       vuzp.16 [[REG3]]
+  %1 = load <2 x i64>* %loadaddr
+  %2 = mul <2 x i64> %1, %1
+  %3 = trunc <2 x i64> %2 to <2 x i16>
+  store <2 x i16> %3, <2 x i16>* %storeaddr
+  ret void
+}
+
+define void @buildvec_bitcast_trunc_2i64_to_2i8( <2 x i64>* %loadaddr, <2 x i8>* %storeaddr ) {
+; CHECK-LABEL: buildvec_bitcast_trunc_2i64_to_2i8:
+; CHECK:       mul [[REG1:r[0-9]+]],
+; CHECK:       mul [[REG2:r[0-9]+]],
+; CHECK:       vmov.32 [[REG3:d[0-9]+]][0], [[REG1]]
+; CHECK:       vmov.32 [[REG3]][1], [[REG2]]
+; CHECK:       vmov.32 [[REG4:r[0-9]+]], [[REG3]][1]
+; CHECK:       strb [[REG4]]
+; CHECK:       vmov.32 [[REG5:r[0-9]+]], [[REG3]][0]
+; CHECK:       strb [[REG5]]
+  %1 = load <2 x i64>* %loadaddr
+  %2 = mul <2 x i64> %1, %1
+  %3 = trunc <2 x i64> %2 to <2 x i8>
+  store <2 x i8> %3, <2 x i8>* %storeaddr
+  ret void
+}
+
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D4274.10786.patch
Type: text/x-patch
Size: 3095 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20140624/c98c2cfa/attachment.bin>