[llvm] ba07f30 - [Hexagon] Don't build two halves of HVX vector in parallel

Krzysztof Parzyszek via llvm-commits llvm-commits at lists.llvm.org
Wed Dec 29 11:06:23 PST 2021


Author: Krzysztof Parzyszek
Date: 2021-12-29T11:00:01-08:00
New Revision: ba07f300c6d67a2c6dde8eef216b7a77ac4600bb

URL: https://github.com/llvm/llvm-project/commit/ba07f300c6d67a2c6dde8eef216b7a77ac4600bb
DIFF: https://github.com/llvm/llvm-project/commit/ba07f300c6d67a2c6dde8eef216b7a77ac4600bb.diff

LOG: [Hexagon] Don't build two halves of HVX vector in parallel

There can only be one permute operations per packet, so this actually
pessimizes the code (due to the extra "or").

Added: 
    

Modified: 
    llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
    llvm/test/CodeGen/Hexagon/autohvx/isel-build-vector.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
index 569ad8b337db..a151f3de170a 100755
--- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
@@ -659,10 +659,10 @@ HexagonTargetLowering::buildHvxVectorReg(ArrayRef<SDValue> Values,
   // Find most common element to initialize vector with. This is to avoid
   // unnecessary vinsert/valign for cases where the same value is present
   // many times. Creates a histogram of the vector's elements to find the
-  // most common element n.
+  // most common element.
   assert(4*Words.size() == Subtarget.getVectorLength());
-  int VecHist[32];
-  int n = 0;
+  SmallVector<int,32> VecHist(32);
+  int MaxAt = 0;
   for (unsigned i = 0; i != NumWords; ++i) {
     VecHist[i] = 0;
     if (Words[i].isUndef())
@@ -671,60 +671,29 @@ HexagonTargetLowering::buildHvxVectorReg(ArrayRef<SDValue> Values,
       if (Words[i] == Words[j])
         VecHist[i]++;
 
-    if (VecHist[i] > VecHist[n])
-      n = i;
+    if (VecHist[i] > VecHist[MaxAt])
+      MaxAt = i;
   }
 
-  SDValue HalfV = getZero(dl, VecTy, DAG);
-  if (VecHist[n] > 1) {
-    SDValue SplatV = DAG.getNode(ISD::SPLAT_VECTOR, dl, VecTy, Words[n]);
-    HalfV = DAG.getNode(HexagonISD::VALIGN, dl, VecTy,
-                       {HalfV, SplatV, DAG.getConstant(HwLen/2, dl, MVT::i32)});
-  }
-  SDValue HalfV0 = HalfV;
-  SDValue HalfV1 = HalfV;
-
-  // Construct two halves in parallel, then or them together. Rn and Rm count
-  // number of rotations needed before the next element. One last rotation is
-  // performed post-loop to position the last element.
-  int Rn = 0, Rm = 0;
-  SDValue Sn, Sm;
-  SDValue N = HalfV0;
-  SDValue M = HalfV1;
-  for (unsigned i = 0; i != NumWords/2; ++i) {
-
+  // If each value is 
diff erent, don't do splat, just insert them one by one.
+  bool NoSplat = VecHist[MaxAt] <= 1;
+  SDValue RotV = NoSplat
+                     ? DAG.getUNDEF(VecTy)
+                     : DAG.getNode(ISD::SPLAT_VECTOR, dl, VecTy, Words[MaxAt]);
+  int Rn = 0;
+  for (unsigned i = 0; i != NumWords; ++i) {
     // Rotate by element count since last insertion.
-    if (Words[i] != Words[n] || VecHist[n] <= 1) {
-      Sn = DAG.getConstant(Rn, dl, MVT::i32);
-      HalfV0 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {N, Sn});
-      N = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy,
-                      {HalfV0, Words[i]});
+    if (NoSplat || Words[i] != Words[MaxAt]) {
+      RotV = DAG.getNode(HexagonISD::VROR, dl, VecTy,
+                         {RotV, DAG.getConstant(Rn, dl, MVT::i32)});
+      RotV = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy, {RotV, Words[i]});
       Rn = 0;
     }
-    if (Words[i+NumWords/2] != Words[n] || VecHist[n] <= 1) {
-      Sm = DAG.getConstant(Rm, dl, MVT::i32);
-      HalfV1 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {M, Sm});
-      M = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy,
-                      {HalfV1, Words[i+NumWords/2]});
-      Rm = 0;
-    }
     Rn += 4;
-    Rm += 4;
   }
   // Perform last rotation.
-  Sn = DAG.getConstant(Rn+HwLen/2, dl, MVT::i32);
-  Sm = DAG.getConstant(Rm, dl, MVT::i32);
-  HalfV0 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {N, Sn});
-  HalfV1 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {M, Sm});
-
-  SDValue T0 = DAG.getBitcast(tyVector(VecTy, MVT::i32), HalfV0);
-  SDValue T1 = DAG.getBitcast(tyVector(VecTy, MVT::i32), HalfV1);
-
-  SDValue DstV = DAG.getNode(ISD::OR, dl, ty(T0), {T0, T1});
-
-  SDValue OutV =
-      DAG.getBitcast(tyVector(ty(DstV), VecTy.getVectorElementType()), DstV);
-  return OutV;
+  return DAG.getNode(HexagonISD::VROR, dl, VecTy,
+                     {RotV, DAG.getConstant(Rn, dl, MVT::i32)});
 }
 
 SDValue

diff  --git a/llvm/test/CodeGen/Hexagon/autohvx/isel-build-vector.ll b/llvm/test/CodeGen/Hexagon/autohvx/isel-build-vector.ll
index e6b8445f5121..159001c11301 100644
--- a/llvm/test/CodeGen/Hexagon/autohvx/isel-build-vector.ll
+++ b/llvm/test/CodeGen/Hexagon/autohvx/isel-build-vector.ll
@@ -6,35 +6,31 @@ define <32 x i32> @fred(i32 %a0) #0 {
 ; CHECK:         .cfi_startproc
 ; CHECK-NEXT:  // %bb.0:
 ; CHECK-NEXT:    {
-; CHECK-NEXT:     r3:2 = combine(#20,#9)
-; CHECK-NEXT:     v0 = vxor(v0,v0)
-; CHECK-NEXT:     r1 = #24
-; CHECK-NEXT:     r4 = #12
+; CHECK-NEXT:     r3:2 = combine(#76,#7)
+; CHECK-NEXT:     r1 = #12
+; CHECK-NEXT:     r4 = #9
 ; CHECK-NEXT:    }
 ; CHECK-NEXT:    {
-; CHECK-NEXT:     v1 = vror(v0,r1)
+; CHECK-NEXT:     v0 = vror(v0,r1)
 ; CHECK-NEXT:    }
 ; CHECK-NEXT:    {
-; CHECK-NEXT:     v1.w = vinsert(r2)
-; CHECK-NEXT:     r4 = #7
-; CHECK-NEXT:     r2 = #116
-; CHECK-NEXT:     v0 = vror(v0,r4)
+; CHECK-NEXT:     v0.w = vinsert(r2)
+; CHECK-NEXT:     r2 = #20
 ; CHECK-NEXT:    }
 ; CHECK-NEXT:    {
-; CHECK-NEXT:     v0.w = vinsert(r4)
+; CHECK-NEXT:     v0 = vror(v0,r3)
 ; CHECK-NEXT:    }
 ; CHECK-NEXT:    {
-; CHECK-NEXT:     v1 = vror(v1,r3)
+; CHECK-NEXT:     v0.w = vinsert(r4)
 ; CHECK-NEXT:    }
 ; CHECK-NEXT:    {
-; CHECK-NEXT:     v1.w = vinsert(r0)
 ; CHECK-NEXT:     v0 = vror(v0,r2)
 ; CHECK-NEXT:    }
 ; CHECK-NEXT:    {
-; CHECK-NEXT:     v1 = vror(v1,r3)
+; CHECK-NEXT:     v0.w = vinsert(r0)
 ; CHECK-NEXT:    }
 ; CHECK-NEXT:    {
-; CHECK-NEXT:     v0 = vor(v0,v1)
+; CHECK-NEXT:     v0 = vror(v0,r2)
 ; CHECK-NEXT:     jumpr r31
 ; CHECK-NEXT:    }
   %v0 = insertelement <32 x i32> undef, i32 undef, i32 0


        


More information about the llvm-commits mailing list