[llvm] a2a89eb - [Hexagon] Fix lowering loads/stores of scalar vNi1
Krzysztof Parzyszek via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 17 12:52:45 PST 2022
Author: Krzysztof Parzyszek
Date: 2022-11-17T12:48:01-08:00
New Revision: a2a89eb019e971c02ea4e18ce5e01242b686421f
URL: https://github.com/llvm/llvm-project/commit/a2a89eb019e971c02ea4e18ce5e01242b686421f
DIFF: https://github.com/llvm/llvm-project/commit/a2a89eb019e971c02ea4e18ce5e01242b686421f.diff
LOG: [Hexagon] Fix lowering loads/stores of scalar vNi1
Don't treat them as i1, all predicate bits need to be loaded or stored.
Added:
Modified:
llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
llvm/test/CodeGen/Hexagon/isel-memory-vNi1.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
index 45a3b3c6c7029..11a57b5d2faf2 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -3040,15 +3040,16 @@ SDValue
HexagonTargetLowering::LowerLoad(SDValue Op, SelectionDAG &DAG) const {
MVT Ty = ty(Op);
const SDLoc &dl(Op);
- // Lower loads of scalar predicate vectors (v2i1, v4i1, v8i1) to loads of i1
- // followed by a TYPECAST.
LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
- bool DoCast = (Ty == MVT::v2i1 || Ty == MVT::v4i1 || Ty == MVT::v8i1);
- if (DoCast) {
+ MVT MemTy = LN->getMemoryVT().getSimpleVT();
+ ISD::LoadExtType ET = LN->getExtensionType();
+
+ bool LoadPred = MemTy == MVT::v2i1 || MemTy == MVT::v4i1 || MemTy == MVT::v8i1;
+ if (LoadPred) {
SDValue NL = DAG.getLoad(
- LN->getAddressingMode(), LN->getExtensionType(), MVT::i1, dl,
- LN->getChain(), LN->getBasePtr(), LN->getOffset(), LN->getPointerInfo(),
- /*MemoryVT*/ MVT::i1, LN->getAlign(), LN->getMemOperand()->getFlags(),
+ LN->getAddressingMode(), ISD::ZEXTLOAD, MVT::i32, dl, LN->getChain(),
+ LN->getBasePtr(), LN->getOffset(), LN->getPointerInfo(),
+ /*MemoryVT*/ MVT::i8, LN->getAlign(), LN->getMemOperand()->getFlags(),
LN->getAAInfo(), LN->getRanges());
LN = cast<LoadSDNode>(NL.getNode());
}
@@ -3060,10 +3061,15 @@ HexagonTargetLowering::LowerLoad(SDValue Op, SelectionDAG &DAG) const {
// Call LowerUnalignedLoad for all loads, it recognizes loads that
// don't need extra aligning.
SDValue LU = LowerUnalignedLoad(SDValue(LN, 0), DAG);
- if (DoCast) {
- SDValue TC = DAG.getNode(HexagonISD::TYPECAST, dl, Ty, LU);
+ if (LoadPred) {
+ SDValue TP = getInstr(Hexagon::C2_tfrrp, dl, MemTy, {LU}, DAG);
+ if (ET == ISD::SEXTLOAD) {
+ TP = DAG.getSExtOrTrunc(TP, dl, Ty);
+ } else if (ET != ISD::NON_EXTLOAD) {
+ TP = DAG.getZExtOrTrunc(TP, dl, Ty);
+ }
SDValue Ch = cast<LoadSDNode>(LU.getNode())->getChain();
- return DAG.getMergeValues({TC, Ch}, dl);
+ return DAG.getMergeValues({TP, Ch}, dl);
}
return LU;
}
@@ -3075,11 +3081,11 @@ HexagonTargetLowering::LowerStore(SDValue Op, SelectionDAG &DAG) const {
SDValue Val = SN->getValue();
MVT Ty = ty(Val);
- bool DoCast = (Ty == MVT::v2i1 || Ty == MVT::v4i1 || Ty == MVT::v8i1);
- if (DoCast) {
- SDValue TC = DAG.getNode(HexagonISD::TYPECAST, dl, MVT::i1, Val);
- SDValue NS = DAG.getStore(SN->getChain(), dl, TC, SN->getBasePtr(),
- SN->getMemOperand());
+ if (Ty == MVT::v2i1 || Ty == MVT::v4i1 || Ty == MVT::v8i1) {
+ // Store the exact predicate (all bits).
+ SDValue TR = getInstr(Hexagon::C2_tfrpr, dl, MVT::i32, {Val}, DAG);
+ SDValue NS = DAG.getTruncStore(SN->getChain(), dl, TR, SN->getBasePtr(),
+ MVT::i8, SN->getMemOperand());
if (SN->isIndexed()) {
NS = DAG.getIndexedStore(NS, dl, SN->getBasePtr(), SN->getOffset(),
SN->getAddressingMode());
diff --git a/llvm/test/CodeGen/Hexagon/isel-memory-vNi1.ll b/llvm/test/CodeGen/Hexagon/isel-memory-vNi1.ll
index 1173624a21f20..c12d5677fdbfa 100644
--- a/llvm/test/CodeGen/Hexagon/isel-memory-vNi1.ll
+++ b/llvm/test/CodeGen/Hexagon/isel-memory-vNi1.ll
@@ -120,7 +120,10 @@ define void @f4(<8 x i1>* %a0, i64 %a1) #0 {
; CHECK-NEXT: p0 = vcmpb.eq(r3:2,r5:4)
; CHECK-NEXT: }
; CHECK-NEXT: {
-; CHECK-NEXT: r1 = mux(p0,#0,#1)
+; CHECK-NEXT: p0 = not(p0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r1 = p0
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: memb(r0+#0) = r1
@@ -148,10 +151,13 @@ define void @f5(<4 x i1>* %a0, i32 %a1) #0 {
; CHECK-NEXT: p0 = vcmph.eq(r3:2,r5:4)
; CHECK-NEXT: }
; CHECK-NEXT: {
-; CHECK-NEXT: r1 = mux(p0,#0,#1)
+; CHECK-NEXT: p0 = not(p0)
; CHECK-NEXT: }
; CHECK-NEXT: {
-; CHECK-NEXT: memb(r0+#0) = r1
+; CHECK-NEXT: r2 = p0
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: memb(r0+#0) = r2
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: jumpr r31
@@ -203,10 +209,10 @@ define void @f6(<2 x i1>* %a0, i16 %a1) #0 {
; CHECK-NEXT: if (!p1) r5 = add(r1,#0)
; CHECK-NEXT: }
; CHECK-NEXT: {
-; CHECK-NEXT: r1 = setbit(r2,#6)
+; CHECK-NEXT: if (!p1) r6 = add(r3,#0)
; CHECK-NEXT: }
; CHECK-NEXT: {
-; CHECK-NEXT: if (!p1) r6 = add(r3,#0)
+; CHECK-NEXT: r1 = setbit(r2,#6)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: r3 = setbit(r4,#4)
@@ -224,13 +230,7 @@ define void @f6(<2 x i1>* %a0, i16 %a1) #0 {
; CHECK-NEXT: r5 |= or(r4,r2)
; CHECK-NEXT: }
; CHECK-NEXT: {
-; CHECK-NEXT: p0 = r5
-; CHECK-NEXT: }
-; CHECK-NEXT: {
-; CHECK-NEXT: r1 = mux(p0,#1,#0)
-; CHECK-NEXT: }
-; CHECK-NEXT: {
-; CHECK-NEXT: memb(r0+#0) = r1
+; CHECK-NEXT: memb(r0+#0) = r5
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: jumpr r31
More information about the llvm-commits
mailing list