[llvm] 95c7dd8 - Revert "[Hexagon] Don't build two halves of HVX vector in parallel"

Thu Dec 30 08:13:38 PST 2021

Author: Krzysztof Parzyszek
Date: 2021-12-30T07:57:11-08:00
New Revision: 95c7dd8810b0bc93c0f76a285f1bcc3bd73f6a50

URL: https://github.com/llvm/llvm-project/commit/95c7dd8810b0bc93c0f76a285f1bcc3bd73f6a50
DIFF: https://github.com/llvm/llvm-project/commit/95c7dd8810b0bc93c0f76a285f1bcc3bd73f6a50.diff

LOG: Revert "[Hexagon] Don't build two halves of HVX vector in parallel"

This reverts commit ba07f300c6d67a2c6dde8eef216b7a77ac4600bb.

A build-vector sequence is made of pairs: rotate+insert. When constructing
a single vector, this results in a chain of 2*N instructions. The rotate
operation is a permute operation, but the insert uses a multiplication
resource: insert and rotate can execute in the same cycle, but obviously
they cannot operate on the same vector. The original halving idea is still
beneficial since it does allow for insert/rotate overlap, and for hiding
insert's latency.

Added: 
    

Modified: 
    llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
    llvm/test/CodeGen/Hexagon/autohvx/isel-build-vector.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
index a151f3de170a..569ad8b337db 100755

--- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
@@ -659,10 +659,10 @@ HexagonTargetLowering::buildHvxVectorReg(ArrayRef<SDValue> Values,
   // Find most common element to initialize vector with. This is to avoid
   // unnecessary vinsert/valign for cases where the same value is present
   // many times. Creates a histogram of the vector's elements to find the
-  // most common element.
+  // most common element n.
   assert(4*Words.size() == Subtarget.getVectorLength());
-  SmallVector<int,32> VecHist(32);
-  int MaxAt = 0;
+  int VecHist[32];
+  int n = 0;
   for (unsigned i = 0; i != NumWords; ++i) {
     VecHist[i] = 0;
     if (Words[i].isUndef())
@@ -671,29 +671,60 @@ HexagonTargetLowering::buildHvxVectorReg(ArrayRef<SDValue> Values,
       if (Words[i] == Words[j])
         VecHist[i]++;
 
-    if (VecHist[i] > VecHist[MaxAt])
-      MaxAt = i;
+    if (VecHist[i] > VecHist[n])
+      n = i;
   }
 
-  // If each value is 
diff erent, don't do splat, just insert them one by one.
-  bool NoSplat = VecHist[MaxAt] <= 1;
-  SDValue RotV = NoSplat
-                     ? DAG.getUNDEF(VecTy)
-                     : DAG.getNode(ISD::SPLAT_VECTOR, dl, VecTy, Words[MaxAt]);
-  int Rn = 0;
-  for (unsigned i = 0; i != NumWords; ++i) {
+  SDValue HalfV = getZero(dl, VecTy, DAG);
+  if (VecHist[n] > 1) {
+    SDValue SplatV = DAG.getNode(ISD::SPLAT_VECTOR, dl, VecTy, Words[n]);
+    HalfV = DAG.getNode(HexagonISD::VALIGN, dl, VecTy,
+                       {HalfV, SplatV, DAG.getConstant(HwLen/2, dl, MVT::i32)});
+  }
+  SDValue HalfV0 = HalfV;
+  SDValue HalfV1 = HalfV;
+
+  // Construct two halves in parallel, then or them together. Rn and Rm count
+  // number of rotations needed before the next element. One last rotation is
+  // performed post-loop to position the last element.
+  int Rn = 0, Rm = 0;
+  SDValue Sn, Sm;
+  SDValue N = HalfV0;
+  SDValue M = HalfV1;
+  for (unsigned i = 0; i != NumWords/2; ++i) {
+
     // Rotate by element count since last insertion.
-    if (NoSplat || Words[i] != Words[MaxAt]) {
-      RotV = DAG.getNode(HexagonISD::VROR, dl, VecTy,
-                         {RotV, DAG.getConstant(Rn, dl, MVT::i32)});
-      RotV = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy, {RotV, Words[i]});
+    if (Words[i] != Words[n] || VecHist[n] <= 1) {
+      Sn = DAG.getConstant(Rn, dl, MVT::i32);
+      HalfV0 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {N, Sn});
+      N = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy,
+                      {HalfV0, Words[i]});
       Rn = 0;
     }
+    if (Words[i+NumWords/2] != Words[n] || VecHist[n] <= 1) {
+      Sm = DAG.getConstant(Rm, dl, MVT::i32);
+      HalfV1 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {M, Sm});
+      M = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy,
+                      {HalfV1, Words[i+NumWords/2]});
+      Rm = 0;
+    }
     Rn += 4;
+    Rm += 4;
   }
   // Perform last rotation.
-  return DAG.getNode(HexagonISD::VROR, dl, VecTy,
-                     {RotV, DAG.getConstant(Rn, dl, MVT::i32)});
+  Sn = DAG.getConstant(Rn+HwLen/2, dl, MVT::i32);
+  Sm = DAG.getConstant(Rm, dl, MVT::i32);
+  HalfV0 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {N, Sn});
+  HalfV1 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {M, Sm});
+
+  SDValue T0 = DAG.getBitcast(tyVector(VecTy, MVT::i32), HalfV0);
+  SDValue T1 = DAG.getBitcast(tyVector(VecTy, MVT::i32), HalfV1);
+
+  SDValue DstV = DAG.getNode(ISD::OR, dl, ty(T0), {T0, T1});
+
+  SDValue OutV =
+      DAG.getBitcast(tyVector(ty(DstV), VecTy.getVectorElementType()), DstV);
+  return OutV;
 }
 
 SDValue

diff  --git a/llvm/test/CodeGen/Hexagon/autohvx/isel-build-vector.ll b/llvm/test/CodeGen/Hexagon/autohvx/isel-build-vector.ll
index 159001c11301..e6b8445f5121 100644
--- a/llvm/test/CodeGen/Hexagon/autohvx/isel-build-vector.ll
+++ b/llvm/test/CodeGen/Hexagon/autohvx/isel-build-vector.ll
@@ -6,31 +6,35 @@ define <32 x i32> @fred(i32 %a0) #0 {
 ; CHECK:         .cfi_startproc
 ; CHECK-NEXT:  // %bb.0:
 ; CHECK-NEXT:    {
-; CHECK-NEXT:     r3:2 = combine(#76,#7)
-; CHECK-NEXT:     r1 = #12
-; CHECK-NEXT:     r4 = #9
+; CHECK-NEXT:     r3:2 = combine(#20,#9)
+; CHECK-NEXT:     v0 = vxor(v0,v0)
+; CHECK-NEXT:     r1 = #24
+; CHECK-NEXT:     r4 = #12
 ; CHECK-NEXT:    }
 ; CHECK-NEXT:    {
-; CHECK-NEXT:     v0 = vror(v0,r1)
+; CHECK-NEXT:     v1 = vror(v0,r1)
 ; CHECK-NEXT:    }
 ; CHECK-NEXT:    {
-; CHECK-NEXT:     v0.w = vinsert(r2)
-; CHECK-NEXT:     r2 = #20
+; CHECK-NEXT:     v1.w = vinsert(r2)
+; CHECK-NEXT:     r4 = #7
+; CHECK-NEXT:     r2 = #116
+; CHECK-NEXT:     v0 = vror(v0,r4)
 ; CHECK-NEXT:    }
 ; CHECK-NEXT:    {
-; CHECK-NEXT:     v0 = vror(v0,r3)
+; CHECK-NEXT:     v0.w = vinsert(r4)
 ; CHECK-NEXT:    }
 ; CHECK-NEXT:    {
-; CHECK-NEXT:     v0.w = vinsert(r4)
+; CHECK-NEXT:     v1 = vror(v1,r3)
 ; CHECK-NEXT:    }
 ; CHECK-NEXT:    {
+; CHECK-NEXT:     v1.w = vinsert(r0)
 ; CHECK-NEXT:     v0 = vror(v0,r2)
 ; CHECK-NEXT:    }
 ; CHECK-NEXT:    {
-; CHECK-NEXT:     v0.w = vinsert(r0)
+; CHECK-NEXT:     v1 = vror(v1,r3)
 ; CHECK-NEXT:    }
 ; CHECK-NEXT:    {
-; CHECK-NEXT:     v0 = vror(v0,r2)
+; CHECK-NEXT:     v0 = vor(v0,v1)
 ; CHECK-NEXT:     jumpr r31
 ; CHECK-NEXT:    }
   %v0 = insertelement <32 x i32> undef, i32 undef, i32 0