[llvm] [Hexagon] Add support for V128i1/V64i1/V32i1 predicate store/load in HVX (PR #180701)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 10 23:52:39 PST 2026
https://github.com/pkarveti updated https://github.com/llvm/llvm-project/pull/180701
>From 9d1cb77698bed8c8b54774a790af30b3eb6e2bbf Mon Sep 17 00:00:00 2001
From: pavani karveti <quic_pkarveti at quicinc.com>
Date: Tue, 10 Feb 2026 13:35:04 +0530
Subject: [PATCH] [Hexagon] Add support for V128i1/V64i1/V32i1 predicate
store/load in HVX
Change-Id: Icb19a54d731c066b04088d00df4932ea681256fc
---
llvm/lib/Target/Hexagon/HexagonISelLowering.h | 2 +
.../Target/Hexagon/HexagonISelLoweringHVX.cpp | 132 +++++++++++++++++-
.../Hexagon/hvx-predicate-store-load.ll | 89 ++++++++++++
3 files changed, 222 insertions(+), 1 deletion(-)
create mode 100644 llvm/test/CodeGen/Hexagon/hvx-predicate-store-load.ll
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/llvm/lib/Target/Hexagon/HexagonISelLowering.h
index f882fe03d465a..76070c1f8a896 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.h
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.h
@@ -487,6 +487,8 @@ class HexagonTargetLowering : public TargetLowering {
SDValue LowerHvxPred64ToFp(SDValue Op, SelectionDAG &DAG) const;
SDValue ExpandHvxFpToInt(SDValue Op, SelectionDAG &DAG) const;
SDValue ExpandHvxIntToFp(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerHvxStore(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerHvxLoad(SDValue Op, SelectionDAG &DAG) const;
VectorPair SplitVectorOp(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
index 363f9fd69e3b9..b1181dfa13a10 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
@@ -128,6 +128,12 @@ HexagonTargetLowering::initializeHVXLowering() {
if (Subtarget.useHVX128BOps()) {
setOperationAction(ISD::BITCAST, MVT::v32i1, Custom);
setOperationAction(ISD::BITCAST, MVT::v64i1, Custom);
+ setOperationAction(ISD::STORE, MVT::v32i1, Custom);
+ setOperationAction(ISD::LOAD, MVT::v32i1, Custom);
+ setOperationAction(ISD::STORE, MVT::v64i1, Custom);
+ setOperationAction(ISD::LOAD, MVT::v64i1, Custom);
+ setOperationAction(ISD::STORE, MVT::v128i1, Custom);
+ setOperationAction(ISD::LOAD, MVT::v128i1, Custom);
}
if (Subtarget.useHVX128BOps() && Subtarget.useHVXV68Ops() &&
Subtarget.useHVXFloatingPoint()) {
@@ -2170,6 +2176,127 @@ HexagonTargetLowering::LowerHvxBitcast(SDValue Op, SelectionDAG &DAG) const {
return Op;
}
+SDValue HexagonTargetLowering::LowerHvxStore(SDValue Op,
+ SelectionDAG &DAG) const {
+ const SDLoc &dl(Op);
+ StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
+ SDValue Val = SN->getValue();
+ MVT ValTy = ty(Val);
+
+ // Check if this is a store of an HVX bool vector (predicate)
+ if (!isHvxBoolTy(ValTy))
+ return SDValue();
+
+ unsigned NumBits = ValTy.getVectorNumElements();
+ MachineMemOperand *MMO = SN->getMemOperand();
+
+ // Check alignment requirements based on predicate size
+ unsigned RequiredAlign = (NumBits == 32) ? 4 : 8;
+ if (MMO->getBaseAlign().value() % RequiredAlign != 0)
+ return SDValue();
+
+ unsigned HwLen = Subtarget.getVectorLength();
+ MVT WordTy = MVT::getVectorVT(MVT::i32, HwLen / 4);
+
+ // Compress the predicate into a vector register
+ SDValue VQ = compressHvxPred(Val, dl, WordTy, DAG);
+
+ // Extract words from the compressed vector
+ SmallVector<SDValue, 4> Words;
+ for (unsigned i = 0; i != NumBits / 32; ++i) {
+ SDValue W = extractHvxElementReg(VQ, DAG.getConstant(i, dl, MVT::i32), dl,
+ MVT::i32, DAG);
+ Words.push_back(W);
+ }
+
+ SDValue Chain = SN->getChain();
+ SDValue BasePtr = SN->getBasePtr();
+ MachinePointerInfo PtrInfo = MMO->getPointerInfo();
+
+ if (NumBits == 32)
+ return DAG.getStore(Chain, dl, Words[0], BasePtr, PtrInfo,
+ MMO->getBaseAlign());
+
+ if (NumBits == 64) {
+ SDValue W64 = getCombine(Words[1], Words[0], dl, MVT::i64, DAG);
+ return DAG.getStore(Chain, dl, W64, BasePtr, PtrInfo, MMO->getBaseAlign());
+ }
+
+ if (NumBits == 128) {
+ SDValue Lo64 = getCombine(Words[1], Words[0], dl, MVT::i64, DAG);
+ SDValue Hi64 = getCombine(Words[3], Words[2], dl, MVT::i64, DAG);
+
+ Chain =
+ DAG.getStore(Chain, dl, Lo64, BasePtr, PtrInfo, MMO->getBaseAlign());
+
+ SDValue Offset8 = DAG.getConstant(8, dl, MVT::i32);
+ SDValue Ptr8 = DAG.getNode(ISD::ADD, dl, MVT::i32, BasePtr, Offset8);
+ return DAG.getStore(Chain, dl, Hi64, Ptr8, PtrInfo.getWithOffset(8),
+ Align(8));
+ }
+
+ return SDValue();
+}
+
+SDValue HexagonTargetLowering::LowerHvxLoad(SDValue Op,
+ SelectionDAG &DAG) const {
+ const SDLoc &dl(Op);
+ LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
+ MVT ResTy = ty(Op);
+
+ // Check if this is a load of an HVX bool vector (predicate)
+ if (!isHvxBoolTy(ResTy))
+ return SDValue();
+
+ unsigned NumBits = ResTy.getVectorNumElements();
+ MachineMemOperand *MMO = LN->getMemOperand();
+
+ unsigned RequiredAlign = (NumBits == 32) ? 4 : 8;
+ if (MMO->getBaseAlign().value() % RequiredAlign != 0)
+ return SDValue();
+
+ SDValue Chain = LN->getChain();
+ SDValue BasePtr = LN->getBasePtr();
+ MachinePointerInfo PtrInfo = MMO->getPointerInfo();
+
+ if (NumBits == 32) {
+ SDValue W32 =
+ DAG.getLoad(MVT::i32, dl, Chain, BasePtr, PtrInfo, MMO->getBaseAlign());
+ SDValue Pred = DAG.getNode(ISD::BITCAST, dl, MVT::v32i1, W32);
+ SDValue Ops[] = {Pred, W32.getValue(1)};
+ return DAG.getMergeValues(Ops, dl);
+ }
+
+ if (NumBits == 64) {
+ SDValue W64 =
+ DAG.getLoad(MVT::i64, dl, Chain, BasePtr, PtrInfo, MMO->getBaseAlign());
+ SDValue Pred = DAG.getNode(ISD::BITCAST, dl, MVT::v64i1, W64);
+ SDValue Ops[] = {Pred, W64.getValue(1)};
+ return DAG.getMergeValues(Ops, dl);
+ }
+
+ if (NumBits == 128) {
+ SDValue Lo64 =
+ DAG.getLoad(MVT::i64, dl, Chain, BasePtr, PtrInfo, MMO->getBaseAlign());
+ Chain = Lo64.getValue(1);
+
+ SDValue Offset8 = DAG.getConstant(8, dl, MVT::i32);
+ SDValue Ptr8 = DAG.getNode(ISD::ADD, dl, MVT::i32, BasePtr, Offset8);
+ SDValue Hi64 = DAG.getLoad(MVT::i64, dl, Chain, Ptr8,
+ PtrInfo.getWithOffset(8), Align(8));
+
+ SDValue LoPred = DAG.getNode(ISD::BITCAST, dl, MVT::v64i1, Lo64);
+ SDValue HiPred = DAG.getNode(ISD::BITCAST, dl, MVT::v64i1, Hi64);
+ SDValue Pred =
+ DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v128i1, LoPred, HiPred);
+
+ SDValue Ops[] = {Pred, Hi64.getValue(1)};
+ return DAG.getMergeValues(Ops, dl);
+ }
+
+ return SDValue();
+}
+
SDValue
HexagonTargetLowering::LowerHvxExtend(SDValue Op, SelectionDAG &DAG) const {
// Sign- and zero-extends are legal.
@@ -3509,6 +3636,7 @@ HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const {
switch (Opc) {
default:
break;
+ // clang-format off
case ISD::BUILD_VECTOR: return LowerHvxBuildVector(Op, DAG);
case ISD::SPLAT_VECTOR: return LowerHvxSplatVector(Op, DAG);
case ISD::CONCAT_VECTORS: return LowerHvxConcatVectors(Op, DAG);
@@ -3538,7 +3666,8 @@ HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::MLOAD:
case ISD::MSTORE: return LowerHvxMaskedOp(Op, DAG);
// Unaligned loads will be handled by the default lowering.
- case ISD::LOAD: return SDValue();
+ case ISD::LOAD: return LowerHvxLoad(Op, DAG);
+ case ISD::STORE: return LowerHvxStore(Op, DAG);
case ISD::FP_EXTEND: return LowerHvxFpExtend(Op, DAG);
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT: return LowerHvxFpToInt(Op, DAG);
@@ -3549,6 +3678,7 @@ HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const {
case HexagonISD::SMUL_LOHI:
case HexagonISD::UMUL_LOHI:
case HexagonISD::USMUL_LOHI: return LowerHvxMulLoHi(Op, DAG);
+ // clang-format on
}
#ifndef NDEBUG
Op.dumpr(&DAG);
diff --git a/llvm/test/CodeGen/Hexagon/hvx-predicate-store-load.ll b/llvm/test/CodeGen/Hexagon/hvx-predicate-store-load.ll
new file mode 100644
index 0000000000000..0960f37830ded
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/hvx-predicate-store-load.ll
@@ -0,0 +1,89 @@
+; RUN: llc -mtriple=hexagon -mattr=+hvxv75,+hvx-length128b < %s | FileCheck %s
+
+; Test v32i1 store operation
+; CHECK-LABEL: test_v32i1_store:
+; CHECK: memw(r{{[0-9]+}}+#0) = r{{[0-9]+}}
+define void @test_v32i1_store(i32 %idx) {
+entry:
+ %ptr = tail call ptr @malloc(i64 1088)
+ %aligned_ptr_int = ptrtoint ptr %ptr to i32
+ %aligned_int = add i32 %aligned_ptr_int, 63
+ %aligned_int_masked = and i32 %aligned_int, -64
+ %aligned_ptr = inttoptr i32 %aligned_int_masked to ptr
+ %shifted = shl i32 %idx, 3
+ %cond = icmp slt i32 %shifted, 100
+ %pred_elem = insertelement <32 x i1> poison, i1 %cond, i64 0
+ %pred_broadcast = shufflevector <32 x i1> %pred_elem, <32 x i1> poison, <32 x i32> zeroinitializer
+ %mask = and <32 x i1> %pred_broadcast, <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>
+ store <32 x i1> %mask, ptr %aligned_ptr, align 64
+ ret void
+}
+
+; Test v32i1 load operation
+; CHECK-LABEL: test_v32i1_load:
+; CHECK: r{{[0-9]+}} = memw(r{{[0-9]+}}+#0)
+define <32 x i1> @test_v32i1_load(ptr %ptr) {
+entry:
+ %pred = load <32 x i1>, ptr %ptr, align 64
+ ret <32 x i1> %pred
+}
+
+; Test v64i1 store operation
+; CHECK-LABEL: test_v64i1_store:
+; CHECK: memd(r{{[0-9]+}}+#0) = r{{[0-9]+}}:{{[0-9]+}}
+define void @test_v64i1_store(i32 %idx) {
+entry:
+ %ptr = tail call ptr @malloc(i64 1088)
+ %aligned_ptr_int = ptrtoint ptr %ptr to i32
+ %aligned_int = add i32 %aligned_ptr_int, 63
+ %aligned_int_masked = and i32 %aligned_int, -64
+ %aligned_ptr = inttoptr i32 %aligned_int_masked to ptr
+ %shifted = shl i32 %idx, 3
+ %cond = icmp slt i32 %shifted, 100
+ %pred_elem = insertelement <64 x i1> poison, i1 %cond, i64 0
+ %pred_broadcast = shufflevector <64 x i1> %pred_elem, <64 x i1> poison, <64 x i32> zeroinitializer
+ %mask = and <64 x i1> %pred_broadcast, <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>
+ store <64 x i1> %mask, ptr %aligned_ptr, align 64
+ ret void
+}
+
+; Test v64i1 load operation
+; CHECK-LABEL: test_v64i1_load:
+; CHECK: r{{[0-9]+}}:{{[0-9]+}} = memd(r{{[0-9]+}}+#0)
+define <64 x i1> @test_v64i1_load(ptr %ptr) {
+entry:
+ %pred = load <64 x i1>, ptr %ptr, align 64
+ ret <64 x i1> %pred
+}
+
+; Test v128i1 store operation
+; CHECK-LABEL: test_v128i1_store:
+; CHECK: memd(r{{[0-9]+}}+#0) = r{{[0-9]+}}:{{[0-9]+}}
+; CHECK: memd(r{{[0-9]+}}+#{{[0-9]+}}) = r{{[0-9]+}}:{{[0-9]+}}
+define void @test_v128i1_store(i32 %idx) {
+entry:
+ %ptr = tail call ptr @malloc(i64 1088)
+ %aligned_ptr_int = ptrtoint ptr %ptr to i32
+ %aligned_int = add i32 %aligned_ptr_int, 63
+ %aligned_int_masked = and i32 %aligned_int, -64
+ %aligned_ptr = inttoptr i32 %aligned_int_masked to ptr
+ %shifted = shl i32 %idx, 3
+ %cond = icmp slt i32 %shifted, 100
+ %pred_elem = insertelement <128 x i1> poison, i1 %cond, i64 0
+ %pred_broadcast = shufflevector <128 x i1> %pred_elem, <128 x i1> poison, <128 x i32> zeroinitializer
+ %mask = and <128 x i1> %pred_broadcast, <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>
+ store <128 x i1> %mask, ptr %aligned_ptr, align 64
+ ret void
+}
+
+; Test v128i1 load operation
+; CHECK-LABEL: test_v128i1_load:
+; CHECK-DAG: r{{[0-9]+}}:{{[0-9]+}} = memd(r{{[0-9]+}}+#0)
+; CHECK-DAG: r{{[0-9]+}}:{{[0-9]+}} = memd(r{{[0-9]+}}+#{{[0-9]+}})
+define <128 x i1> @test_v128i1_load(ptr %ptr) {
+entry:
+ %pred = load <128 x i1>, ptr %ptr, align 64
+ ret <128 x i1> %pred
+}
+
+declare ptr @malloc(i64)
More information about the llvm-commits
mailing list