[llvm] add narrowExtractedVectorUnaryOp to simplify cast nodes (PR #87977)

Mon Apr 8 04:03:23 PDT 2024

================
@@ -24083,6 +24083,55 @@ static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract,
                      BinOp->getFlags());
 }
 
+/// If we are extracting a subvector produced by a wide unary operator try
+/// to use a narrow unary operator and/or avoid extraction.
+static SDValue narrowExtractedVectorUnaryOp(SDNode *Extract, SelectionDAG &DAG,
+                                          bool LegalOperations) {
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  SDValue UnaryOp = Extract->getOperand(0);
+  unsigned UnaryOpcode = UnaryOp.getOpcode();
+  
+  if (UnaryOpcode != ISD::FP_TO_SINT || UnaryOp->getNumValues() != 1)
+    return SDValue();
+
+  // The extract index must be a constant, so we can map it to a concat operand.
+  auto *ExtractIndexC = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
+  if (!ExtractIndexC)
+    return SDValue();
+
+  EVT WideUVT = UnaryOp.getValueType();
+  if (!WideUVT.isFixedLengthVector())
+    return SDValue();
+  
+  EVT VT = Extract->getValueType(0);
+  unsigned ExtractIndex = ExtractIndexC->getZExtValue();
+  assert(ExtractIndex % VT.getVectorNumElements() == 0 &&
+         "Extract index is not a multiple of the vector length.");
+
+  // Bail out if this is not a proper multiple width extraction.
+  unsigned WideWidth = WideUVT.getSizeInBits();
+  unsigned NarrowWidth = VT.getSizeInBits();
+  if (WideWidth % NarrowWidth != 0)
+    return SDValue();
+
+  unsigned NarrowingRatio = WideWidth / NarrowWidth;
+  unsigned WideNumElts = WideUVT.getVectorNumElements();
+
+  // Bail out if the target does not support a narrower version of the unaryop.
+  EVT NarrowUVT = EVT::getVectorVT(*DAG.getContext(), WideUVT.getScalarType(),
+                                   WideNumElts / NarrowingRatio);
+  if (!TLI.isOperationLegalOrCustomOrPromote(UnaryOpcode, NarrowUVT,
+                                             LegalOperations))
+    return SDValue();
+  
----------------
vedantparanjape-amd wrote:

I made the change, it now fails at isel. The transformation seems okay to me: 
```
Optimized type-legalized selection DAG: %bb.0 'fptosi_4f16_to_4i32:'
SelectionDAG has 8 nodes:
  t0: ch,glue = EntryToken
        t2: v8f16,ch = CopyFromReg t0, Register:v8f16 %1
      t10: v4f16 = extract_subvector t2, Constant:i64<0>
    t11: v4i32 = fp_to_sint t10
  t7: ch = CopyToReg t0, Register:v4i32 %2, t11
```

```
ISEL: Starting selection on root node: t16: v4f32 = X86ISD::VFPEXT t2
ISEL: Starting pattern match
  Initial Opcode index to 626311
  Match failed at index 626314
  Continuing at 626400
  Skipped scope entry (due to false predicate) at index 626404, continuing at 626435
  TypeSwitch[v4f32] from 626438 to 626441
  Match failed at index 626441
  Continuing at 626467
  Continuing at 626468
LLVM ERROR: Cannot select: t16: v4f32 = X86ISD::VFPEXT t2
  t2: v8f16,ch = CopyFromReg t0, Register:v8f16 %1
    t1: v8f16 = Register %1
In function: fptosi_4f16_to_4i32
PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace.
Stack dump:
```

https://github.com/llvm/llvm-project/pull/87977