[llvm] Port NVPTXTargetLowering::LowerCONCAT_VECTORS to llvm/lib/CodeGen/SelectionDAG (PR #120030)

Sun Dec 15 18:02:14 PST 2024

https://github.com/Esan5 created https://github.com/llvm/llvm-project/pull/120030

Ports `NVPTXTargetLowering::LowerCONCAT_VECTORS` to `llvm/lib/CodeGen/SelectionDAG` as requested in https://github.com/llvm/llvm-project/issues/116695.

>From f4ccd5d55ee8731e55a3e4529fb6755eba5556c8 Mon Sep 17 00:00:00 2001
From: Ethan Kaji <ethan.kaji at gmail.com>
Date: Sun, 15 Dec 2024 19:31:53 -0600
Subject: [PATCH] first try

---
 llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 26 ++++++++++++++++---
 llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp   | 24 -----------------
 llvm/lib/Target/NVPTX/NVPTXISelLowering.h     |  1 -
 3 files changed, 22 insertions(+), 29 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index ca87168929f964..09025ae4d71ab9 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -191,6 +191,7 @@ class SelectionDAGLegalize {
   SDValue ExpandExtractFromVectorThroughStack(SDValue Op);
   SDValue ExpandInsertToVectorThroughStack(SDValue Op);
   SDValue ExpandVectorBuildThroughStack(SDNode* Node);
+  SDValue ExpandConcatVectors(SDNode* Node);
 
   SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP);
   SDValue ExpandConstant(ConstantSDNode *CP);
@@ -1517,10 +1518,27 @@ SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) {
                      BaseVecAlignment);
 }
 
+SDValue SelectionDAGLegalize::ExpandConcatVectors(SDNode *Node) {
+  assert(Node->getOpcode() == ISD::CONCAT_VECTORS && "Unexpected opcode!");
+  SDLoc Dl(Node);
+  SmallVector<SDValue, 0> Ops;
+  unsigned NumOperands = Node->getNumOperands();
+  for (unsigned I = 0; I < NumOperands; ++I) {
+    SDValue SubOp = Node->getOperand(I);
+    EVT VectorValueType =
+        SubOp->getValueType(0);
+    EVT ElementValueType = VectorValueType.getVectorElementType();
+    unsigned NumSubElem = VectorValueType.getVectorNumElements();
+    for (unsigned J = 0; J < NumSubElem; ++J) {
+      Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, Dl, ElementValueType,
+                                SubOp, DAG.getIntPtrConstant(J, Dl)));
+    }
+  }
+  return DAG.getBuildVector(Node->getValueType(0), Dl, Ops);
+}
+
 SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
-  assert((Node->getOpcode() == ISD::BUILD_VECTOR ||
-          Node->getOpcode() == ISD::CONCAT_VECTORS) &&
-         "Unexpected opcode!");
+  assert(Node->getOpcode() == ISD::BUILD_VECTOR && "Unexpected opcode!");
 
   // We can't handle this case efficiently.  Allocate a sufficiently
   // aligned object on the stack, store each operand into it, then load
@@ -3371,7 +3389,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     Results.push_back(ExpandInsertToVectorThroughStack(SDValue(Node, 0)));
     break;
   case ISD::CONCAT_VECTORS:
-    Results.push_back(ExpandVectorBuildThroughStack(Node));
+    Results.push_back(ExpandConcatVectors(Node));
     break;
   case ISD::SCALAR_TO_VECTOR:
     Results.push_back(ExpandSCALAR_TO_VECTOR(Node));
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index ce94dded815b8f..0b6d631154ea36 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -2356,28 +2356,6 @@ SDValue NVPTXTargetLowering::LowerSTACKSAVE(SDValue Op,
   return DAG.getMergeValues({ASC, SDValue(SS.getNode(), 1)}, DL);
 }
 
-// By default CONCAT_VECTORS is lowered by ExpandVectorBuildThroughStack()
-// (see LegalizeDAG.cpp). This is slow and uses local memory.
-// We use extract/insert/build vector just as what LegalizeOp() does in llvm 2.5
-SDValue
-NVPTXTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const {
-  SDNode *Node = Op.getNode();
-  SDLoc dl(Node);
-  SmallVector<SDValue, 8> Ops;
-  unsigned NumOperands = Node->getNumOperands();
-  for (unsigned i = 0; i < NumOperands; ++i) {
-    SDValue SubOp = Node->getOperand(i);
-    EVT VVT = SubOp.getNode()->getValueType(0);
-    EVT EltVT = VVT.getVectorElementType();
-    unsigned NumSubElem = VVT.getVectorNumElements();
-    for (unsigned j = 0; j < NumSubElem; ++j) {
-      Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, SubOp,
-                                DAG.getIntPtrConstant(j, dl)));
-    }
-  }
-  return DAG.getBuildVector(Node->getValueType(0), dl, Ops);
-}
-
 SDValue NVPTXTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
   // Handle bitcasting from v2i8 without hitting the default promotion
   // strategy which goes through stack memory.
@@ -2920,8 +2898,6 @@ NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
     return LowerINSERT_VECTOR_ELT(Op, DAG);
   case ISD::VECTOR_SHUFFLE:
     return LowerVECTOR_SHUFFLE(Op, DAG);
-  case ISD::CONCAT_VECTORS:
-    return LowerCONCAT_VECTORS(Op, DAG);
   case ISD::STORE:
     return LowerSTORE(Op, DAG);
   case ISD::LOAD:
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
index c8b589ae39413e..06022703d51a32 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -623,7 +623,6 @@ class NVPTXTargetLowering : public TargetLowering {
   SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) const;
 
   SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
-  SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;