[llvm] [AArch64][SME] Enable dynamic shuffle for fixed length types. (PR #72490)
    Sander de Smalen via llvm-commits 
    llvm-commits at lists.llvm.org
       
    Tue Feb 13 03:35:40 PST 2024
    
    
  
================
@@ -26123,26 +26123,50 @@ static SDValue GenerateFixedLengthSVETBL(SDValue Op, SDValue Op1, SDValue Op2,
 
   // Ignore two operands if no SVE2 or all index numbers couldn't
   // be represented.
-  if (!IsSingleOp && (!Subtarget.hasSVE2() || MinSVESize != MaxSVESize))
+  if (!IsSingleOp && !Subtarget.hasSVE2())
     return SDValue();
 
   EVT VTOp1 = Op.getOperand(0).getValueType();
   unsigned BitsPerElt = VTOp1.getVectorElementType().getSizeInBits();
   unsigned IndexLen = MinSVESize / BitsPerElt;
   unsigned ElementsPerVectorReg = VTOp1.getVectorNumElements();
   uint64_t MaxOffset = APInt(BitsPerElt, -1, false).getZExtValue();
+  EVT MaskEltType = VTOp1.getVectorElementType().changeTypeToInteger();
+  EVT MaskType = EVT::getVectorVT(*DAG.getContext(), MaskEltType, IndexLen);
+  bool MinMaxEqual = (MinSVESize == MaxSVESize);
   assert(ElementsPerVectorReg <= IndexLen && ShuffleMask.size() <= IndexLen &&
          "Incorrectly legalised shuffle operation");
 
   SmallVector<SDValue, 8> TBLMask;
+  // If MinSVESize is not equal to MaxSVESize then we need to know which
+  // TBL mask element needs adjustment.
+  SmallVector<SDValue, 8> MulByVLMask;
+
+  // Bail out for 8-bits element types, because with 2048-bit SVE register
+  // size 8 bits is only sufficient to index into the first source vector.
+  if (!IsSingleOp && !MinMaxEqual && BitsPerElt == 8)
+    return SDValue();
+
   for (int Index : ShuffleMask) {
     // Handling poison index value.
     if (Index < 0)
       Index = 0;
     // If we refer to the second operand then we have to add elements
-    // number in hardware register minus number of elements in a type.
-    if ((unsigned)Index >= ElementsPerVectorReg)
-      Index += IndexLen - ElementsPerVectorReg;
+    // number in hardware register minus number of elements in a type in
+    // case if MinSVESize equals to MaxSVESize, otherwise just add normalized
+    // value and record this element in MulByVLMask to be adjusted in the
+    // runtime.
----------------
sdesmalen-arm wrote:
I would phrase this comment as:
> If the mask refers to elements in the second operand, then we have to offset the index by the number of elements in a vector. If this is number is not known at compile-time, we need to maintain a mask with 'VL' values to add at runtime.
https://github.com/llvm/llvm-project/pull/72490
    
    
More information about the llvm-commits
mailing list