[llvm] [PowerPC] Fix vector_shuffle combines when inputs are scalar_to_vector of differing types. (PR #80784)

Mon Nov 4 12:09:53 PST 2024

================
@@ -15686,6 +15690,50 @@ static SDValue getSToVPermuted(SDValue OrigSToV, SelectionDAG &DAG,
                      OrigSToV.getOperand(0));
 }
 
+static bool isShuffleMaskInRange(const SmallVectorImpl<int> &ShuffV,
+                                 int HalfVec, int LHSLastElementDefined,
+                                 int RHSLastElementDefined) {
+  for (int I : seq<int>(0, ShuffV.size())) {
+    int Index = ShuffV[I];
+    if (Index < 0) // Skip explicitly undefined mask indices.
+      continue;
+    // Handle first input vector of the vector_shuffle.
+    if ((LHSLastElementDefined >= 0) && (Index < HalfVec) &&
+        (Index > LHSLastElementDefined))
+      return false;
+    // Handle second input vector of the vector_shuffle.
+    if ((RHSLastElementDefined >= 0) &&
+        (Index > HalfVec + RHSLastElementDefined))
+      return false;
+  }
+  return true;
+}
+
+static SDValue generateSToVPermutedForVecShuffle(
+    int ScalarSize, uint64_t ShuffleEltWidth, unsigned &NumValidElts,
+    int FirstElt, int &LastElt, SDValue VecShuffOperand, SDValue SToVNode,
+    SelectionDAG &DAG, const PPCSubtarget &Subtarget) {
+  EVT VecShuffOperandType = VecShuffOperand.getValueType();
+  // Set up the values for the shuffle vector fixup.
+  NumValidElts = ScalarSize / VecShuffOperandType.getScalarSizeInBits();
+  // The last element depends on if the input comes from the LHS or RHS.
+  //
+  // For example:
+  // (shuff (s_to_v i32), (bitcast (s_to_v i64), v4i32), ...)
+  //
+  // For the LHS: The last element that comes from the LHS is actually 0, not 3
+  // because elements 1 and higher of a scalar_to_vector are undefined.
+  // For the RHS: The last element that comes from the RHS is actually 5, not 7
+  // because elements 1 and higher of a scalar_to_vector are undefined.
+  // It is also not 4 because the original scalar_to_vector is wider and
+  // actually contains two i32 elements.
+  LastElt = ScalarSize / (ShuffleEltWidth + 1) + FirstElt;
----------------
amy-kwan wrote:

Digger and I briefly discussed this and the suggestion should instead be:
```
LastElt = ScalarSize >  ShuffleEltWidth ? ScalarSize / ShuffleEltWidth -1 + FirstElt : FirstElt
```
I will test this and report back.

https://github.com/llvm/llvm-project/pull/80784