[llvm-branch-commits] [llvm] [Hexagon] Avoid contracting predicates in createHvxPrefixPred (#183081) (PR #184735)

Thu Mar 5 08:25:17 PST 2026

https://github.com/dyung updated https://github.com/llvm/llvm-project/pull/184735

>From e2405245fc6d93ae0a7324be45e23903899b552d Mon Sep 17 00:00:00 2001
From: Krzysztof Parzyszek <Krzysztof.Parzyszek at amd.com>
Date: Tue, 24 Feb 2026 11:24:16 -0600
Subject: [PATCH] [Hexagon] Avoid contracting predicates in createHvxPrefixPred
 (#183081)

The function createHvxPrefixPred should only need to expand a predicate
to match the result's bytes-per-bit. Otherwise, contracting of the
predicate may lead to an input that is shorter than 4 bytes, making it
unsuitable for VINSERTW0.

When calling createHvxPrefixPred for vector concatention, re-group the
inputs to the concat to make sure that the resulting inputs to
createHvxPrefixPred would not need contraction.

Fixes https://github.com/llvm/llvm-project/issues/181362
---
 llvm/lib/Target/Hexagon/HexagonISelLowering.h |  4 +
 .../Target/Hexagon/HexagonISelLoweringHVX.cpp | 88 ++++++++++++-------
 .../autohvx/isel-hvx-rescale-predicate.ll     | 14 +++
 3 files changed, 76 insertions(+), 30 deletions(-)
 create mode 100644 llvm/test/CodeGen/Hexagon/autohvx/isel-hvx-rescale-predicate.ll

diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/llvm/lib/Target/Hexagon/HexagonISelLowering.h
index 3ef43ae7ad838..f42840cd284d7 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.h
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.h
@@ -515,6 +515,10 @@ class HexagonTargetLowering : public TargetLowering {
                              SelectionDAG &DAG) const;
 
   SDValue combineTruncateBeforeLegal(SDValue Op, DAGCombinerInfo &DCI) const;
+
+  SDValue combineConcatOfShuffles(SDValue Op, SelectionDAG &DAG) const;
+  SDValue combineConcatOfScalarPreds(SDValue Op, unsigned BitBytes,
+                                     SelectionDAG &DAG) const;
   SDValue combineConcatVectorsBeforeLegal(SDValue Op, DAGCombinerInfo & DCI)
       const;
   SDValue combineVectorShuffleBeforeLegal(SDValue Op, DAGCombinerInfo & DCI)
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
index cbe1498bb24a9..ea0ad78c70bc6 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
@@ -1106,11 +1106,8 @@ HexagonTargetLowering::createHvxPrefixPred(SDValue PredV, const SDLoc &dl,
   SDValue W0 = isUndef(PredV)
                   ? DAG.getUNDEF(MVT::i64)
                   : DAG.getNode(HexagonISD::P2D, dl, MVT::i64, PredV);
-  if (Bytes < BitBytes) {
-    Words[IdxW].push_back(HiHalf(W0, DAG));
-    Words[IdxW].push_back(LoHalf(W0, DAG));
-  } else
-    Words[IdxW].push_back(W0);
+  Words[IdxW].push_back(HiHalf(W0, DAG));
+  Words[IdxW].push_back(LoHalf(W0, DAG));
 
   while (Bytes < BitBytes) {
     IdxW ^= 1;
@@ -1131,27 +1128,7 @@ HexagonTargetLowering::createHvxPrefixPred(SDValue PredV, const SDLoc &dl,
     Bytes *= 2;
   }
 
-  while (Bytes > BitBytes) {
-    IdxW ^= 1;
-    Words[IdxW].clear();
-
-    if (Bytes <= 4) {
-      for (const SDValue &W : Words[IdxW ^ 1]) {
-        SDValue T = contractPredicate(W, dl, DAG);
-        Words[IdxW].push_back(T);
-      }
-    } else {
-      for (const SDValue &W : Words[IdxW ^ 1]) {
-        Words[IdxW].push_back(W);
-      }
-    }
-    Bytes /= 2;
-  }
-
   assert(Bytes == BitBytes);
-  if (BitBytes == 1 && PredTy == MVT::v2i1)
-    ByteTy = MVT::getVectorVT(MVT::i16, HwLen);
-
   SDValue Vec = ZeroFill ? getZero(dl, ByteTy, DAG) : DAG.getUNDEF(ByteTy);
   SDValue S4 = DAG.getConstant(HwLen-4, dl, MVT::i32);
   for (const SDValue &W : Words[IdxW]) {
@@ -1835,13 +1812,17 @@ HexagonTargetLowering::LowerHvxConcatVectors(SDValue Op, SelectionDAG &DAG)
   // corresponds to.
   unsigned BitBytes = HwLen / VecTy.getVectorNumElements();
 
+  // Make sure that createHvxPrefixPred will only ever need to expand
+  // the predicate, i.e. bytes-per-bit in the input is not greater than
+  // the target bytes-per-bit in the result.
+  SDValue Combined = combineConcatOfScalarPreds(Op, BitBytes, DAG);
   SmallVector<SDValue,8> Prefixes;
-  for (SDValue V : Op.getNode()->op_values()) {
+  for (SDValue V : Combined.getNode()->op_values()) {
     SDValue P = createHvxPrefixPred(V, dl, BitBytes, true, DAG);
     Prefixes.push_back(P);
   }
 
-  unsigned InpLen = ty(Op.getOperand(0)).getVectorNumElements();
+  unsigned InpLen = ty(Combined.getOperand(0)).getVectorNumElements();
   MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
   SDValue S = DAG.getConstant(HwLen - InpLen*BitBytes, dl, MVT::i32);
   SDValue Res = getZero(dl, ByteTy, DAG);
@@ -3834,8 +3815,8 @@ HexagonTargetLowering::combineTruncateBeforeLegal(SDValue Op,
 }
 
 SDValue
-HexagonTargetLowering::combineConcatVectorsBeforeLegal(
-    SDValue Op, DAGCombinerInfo &DCI) const {
+HexagonTargetLowering::combineConcatOfShuffles(SDValue Op,
+                                               SelectionDAG &DAG) const {
   // Fold
   //   concat (shuffle x, y, m1), (shuffle x, y, m2)
   // into
@@ -3843,7 +3824,6 @@ HexagonTargetLowering::combineConcatVectorsBeforeLegal(
   if (Op.getNumOperands() != 2)
     return SDValue();
 
-  SelectionDAG &DAG = DCI.DAG;
   const SDLoc &dl(Op);
   SDValue V0 = Op.getOperand(0);
   SDValue V1 = Op.getOperand(1);
@@ -3899,6 +3879,54 @@ HexagonTargetLowering::combineConcatVectorsBeforeLegal(
   return DAG.getVectorShuffle(LongTy, dl, Cat, DAG.getUNDEF(LongTy), LongMask);
 }
 
+// Reassociate concat(p1, p2, ...) into
+//   concat(concat(p1, ...), concat(pi, ...), ...)
+// where each inner concat produces a predicate where each bit corresponds
+// to at most BitBytes bytes.
+// Concatenating predicates decreases the number of bytes per each predicate
+// bit.
+SDValue
+HexagonTargetLowering::combineConcatOfScalarPreds(SDValue Op, unsigned BitBytes,
+                                                  SelectionDAG &DAG) const {
+  const SDLoc &dl(Op);
+  SmallVector<SDValue> Ops(Op->ops());
+  MVT ResTy = ty(Op);
+  MVT InpTy = ty(Ops[0]);
+  unsigned InpLen = InpTy.getVectorNumElements(); // Scalar predicate
+  unsigned ResLen = ResTy.getVectorNumElements(); // HVX vector predicate
+  assert(InpLen <= 8 && "Too long for scalar predicate");
+  assert(ResLen > 8 && "Too short for HVX vector predicate");
+
+  unsigned Bytes = 8 / InpLen; // Bytes-per-bit in input
+
+  // Already in the right form?
+  if (Bytes <= BitBytes)
+    return Op;
+
+  ArrayRef<SDValue> Inputs(Ops);
+  unsigned SliceLen = Bytes / BitBytes;
+
+  SmallVector<SDValue> Cats;
+  // (8 / BitBytes) is the desired length of the result of the inner concat.
+  for (unsigned i = 0; i != ResLen / (8 / BitBytes); ++i) {
+    SDValue Cat = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i1,
+                              Inputs.slice(SliceLen * i, SliceLen));
+    Cats.push_back(Cat);
+  }
+
+  return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResTy, Cats);
+}
+
+SDValue HexagonTargetLowering::combineConcatVectorsBeforeLegal(
+    SDValue Op, DAGCombinerInfo &DCI) const {
+  MVT ResTy = ty(Op);
+  MVT ElemTy = ResTy.getVectorElementType();
+
+  if (ElemTy != MVT::i1) {
+    return combineConcatOfShuffles(Op, DCI.DAG);
+  }
+  return SDValue();
+}
 SDValue
 HexagonTargetLowering::PerformHvxDAGCombine(SDNode *N, DAGCombinerInfo &DCI)
       const {
diff --git a/llvm/test/CodeGen/Hexagon/autohvx/isel-hvx-rescale-predicate.ll b/llvm/test/CodeGen/Hexagon/autohvx/isel-hvx-rescale-predicate.ll
new file mode 100644
index 0000000000000..057c9cdee5b76
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/autohvx/isel-hvx-rescale-predicate.ll
@@ -0,0 +1,14 @@
+; REQUIRES: asserts
+; RUN: llc -mtriple=hexagon < %s | FileCheck %s
+
+; Check that this doesn't crash.
+; CHECK: vror
+
+define <128 x i8> @foo(<128 x i8> %a0, <128 x i8> %a1, <2 x i16> %a2) #0 {
+  %v1 = icmp sge <2 x i16> %a2, zeroinitializer
+  %v2 = shufflevector <2 x i1> %v1, <2 x i1> poison, <128 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
+  %v3 = select <128 x i1> %v2, <128 x i8> %a0, <128 x i8> %a1
+  ret <128 x i8> %v3
+}
+
+attributes #0 = { "target-cpu"="hexagonv68" "target-features"="+hvxv68,+hvx-length128b" }