[llvm] [SystemZ] Fix Operand Retrieval for Vector Reduction Intrinsic in `shouldExpandReduction` (PR #88874)

Wed Apr 17 02:33:26 PDT 2024

================
@@ -1323,44 +1323,40 @@ SystemZTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
   return BaseT::getIntrinsicInstrCost(ICA, CostKind);
 }
 
-bool SystemZTTIImpl::shouldExpandReduction(const IntrinsicInst *II) const {
-  // Always expand on Subtargets without vector instructions.
-  if (!ST->hasVector())
-    return true;
-
-  // Find the type of the vector operand of the intrinsic
-  // This assumes that each vector reduction intrinsic only
-  // has one vector operand.
-  FixedVectorType *VType = nullptr;
+// Find the type of the first vector operand of the intrinsic.
+// Returns nullptr in case no vector operand was found.
+FixedVectorType *getFirstOperandVectorType(const IntrinsicInst *II) {
   for (unsigned I = 0; I < II->getNumOperands(); ++I) {
     auto *T = II->getOperand(I)->getType();
     if (T->isVectorTy()) {
-      VType = cast<FixedVectorType>(T);
-      break;
+      return cast<FixedVectorType>(T);
     }
   }
+  return nullptr;
+}
 
-  // If we did not find a vector operand, do not continue.
-  if (VType == nullptr)
-    return true;
+// determine if the given vector type represents a full
+// machine vector register.
+bool isVectorFull(FixedVectorType *VType) {
+  unsigned MaxElts = SystemZ::VectorBits / VType->getScalarSizeInBits();
+  return VType->getNumElements() >= MaxElts;
+}
 
-  // If the vector operand is not a full vector, the reduction
-  // should be expanded.
-  unsigned NumElts = VType->getNumElements();
-  unsigned ScalarSize = VType->getScalarSizeInBits();
-  unsigned MaxElts = SystemZ::VectorBits / ScalarSize;
-  if (NumElts < MaxElts)
+bool SystemZTTIImpl::shouldExpandReduction(const IntrinsicInst *II) const {
+  // Always expand on Subtargets without vector instructions.
+  if (!ST->hasVector())
     return true;
 
-  // Handling of full vector operands depends on the
-  // individual intrinsic.
+  // Whether or not to expand is a per-intrinsic decision.
   switch (II->getIntrinsicID()) {
   default:
     return true;
   // Do not expand vector.reduce.add...
   case Intrinsic::vector_reduce_add:
-    // ...unless the scalar size is i64 or larger, since the
-    // performance benefit is dubious there
-    return ScalarSize >= 64;
+    auto *VType = getFirstOperandVectorType(II);
+    // ...unless the scalar size is i64 or larger,
+    // or the operand vector is not full, since the
+    // performance benefit is dubious in those cases
+    return (VType->getScalarSizeInBits() >= 64) || not isVectorFull(VType);
   }
----------------
dominik-steenken wrote:

You're right, that would be easier. i was thinking in terms of element counts, but we don't really need that.

https://github.com/llvm/llvm-project/pull/88874