[llvm-branch-commits] [llvm] 37c65eb - legalize IVE, v2i8, v4i8
Jeffrey Byrnes via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Mon Oct 17 15:30:17 PDT 2022
Author: Jeffrey Byrnes
Date: 2022-10-17T15:29:54-07:00
New Revision: 37c65ebbcc0b7106fba7bb791a36d7ddabc60ece
URL: https://github.com/llvm/llvm-project/commit/37c65ebbcc0b7106fba7bb791a36d7ddabc60ece
DIFF: https://github.com/llvm/llvm-project/commit/37c65ebbcc0b7106fba7bb791a36d7ddabc60ece.diff
LOG: legalize IVE, v2i8, v4i8
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
llvm/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/lib/Target/AMDGPU/SIInstructions.td
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 62ac1fcd95ce..37d907059687 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -577,6 +577,10 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) {
break;
}
+ if (VT.getScalarSizeInBits() == 8) {
+ break;
+ }
+
assert(VT.getVectorElementType().bitsEq(MVT::i32));
unsigned RegClassID =
SIRegisterInfo::getSGPRClassForBitWidth(NumVectorElts * 32)->getID();
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index c32f92cd0da0..f25bcdd28d9c 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -171,7 +171,8 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
MVT::v6i32, MVT::v7i32, MVT::v8i32, MVT::v16i32, MVT::i1,
MVT::v32i32},
Custom);
-
+
+ //setTruncStoreAction(MVT::i8, MVT::i32, Expand);
setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand);
setTruncStoreAction(MVT::v3i32, MVT::v3i16, Expand);
setTruncStoreAction(MVT::v4i32, MVT::v4i16, Expand);
@@ -5729,6 +5730,12 @@ SDValue SITargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
if (NumElts == 4 && EltSize == 16 && KIdx) {
+ //errs() << "special case for v4i16\n";
+ //errs() << "VecVT, Op1VT, EltVT: ";
+ errs() << VecVT.getEVTString() << " " << InsVal.getValueType().getEVTString() << " ";
+ errs() << EltVT.getEVTString() << "\n";
+
+
SDValue BCVec = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Vec);
SDValue LoHalf = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, BCVec,
@@ -5755,6 +5762,46 @@ SDValue SITargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
return DAG.getNode(ISD::BITCAST, SL, VecVT, Concat);
}
+ if (NumElts == 4 && EltSize == 8 && KIdx) {
+ errs() << "special case for v4i8\n";
+ errs() << "VecVT, Op1VT, EltVT: ";
+ errs() << VecVT.getEVTString() << " " << InsVal.getValueType().getEVTString() << " ";
+ errs() << EltVT.getEVTString() << "\n";
+
+
+ errs() << "First bitcast\n";
+ SDValue BCVec = DAG.getNode(ISD::BITCAST, SL, MVT::v2i16, Vec);
+
+ SDValue LoHalf = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i16, BCVec,
+ DAG.getConstant(0, SL, MVT::i32));
+ SDValue HiHalf = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i16, BCVec,
+ DAG.getConstant(1, SL, MVT::i32));
+
+ errs() << "Second bitcast\n";
+ SDValue LoVec = DAG.getNode(ISD::BITCAST, SL, MVT::v2i8, LoHalf);
+ errs() << "Third bitcast\n";
+ SDValue HiVec = DAG.getNode(ISD::BITCAST, SL, MVT::v2i8, HiHalf);
+
+ unsigned Idx = KIdx->getZExtValue();
+ bool InsertLo = Idx < 2;
+ SDValue InsHalf = DAG.getNode(ISD::INSERT_VECTOR_ELT, SL, MVT::v2i8,
+ InsertLo ? LoVec : HiVec,
+ DAG.getNode(ISD::BITCAST, SL, MVT::i8, InsVal),
+ DAG.getConstant(InsertLo ? Idx : (Idx - 2), SL, MVT::i32));
+
+ errs() << "Fourth bitcast\n";
+ InsHalf = DAG.getNode(ISD::BITCAST, SL, MVT::i16, InsHalf);
+
+ SDValue Concat = InsertLo ?
+ DAG.getBuildVector(MVT::v2i16, SL, { InsHalf, HiHalf }) :
+ DAG.getBuildVector(MVT::v2i16, SL, { LoHalf, InsHalf });
+
+ return DAG.getNode(ISD::BITCAST, SL, VecVT, Concat);
+ }
+
+
+
+
// Static indexing does not lower to stack access, and hence there is no need
// for special custom lowering to avoid stack access.
if (isa<ConstantSDNode>(Idx))
@@ -5885,11 +5932,12 @@ SDValue SITargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
return DAG.getNode(ISD::BITCAST, SL, ResultVT, Result);
}
+/*
if (ResultVT == MVT::i8) {
SDValue Result = DAG.getNode(ISD::TRUNCATE, SL, MVT::i8, Elt);
return DAG.getNode(ISD::BITCAST, SL, ResultVT, Result);
}
-
+*/
return DAG.getAnyExtOrTrunc(Elt, SL, ResultVT);
}
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index b0bf6aca56b5..f9129eaf3828 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -2400,6 +2400,8 @@ def : GCNPat <
(S_CMP_EQ_U32 (S_AND_B32 (i32 1), $a), (i32 1))
>;
+
+
def : GCNPat <
(i1 (UniformUnaryFrag<trunc> i16:$a)),
(S_CMP_EQ_U32 (S_AND_B32 (i32 1), $a), (i32 1))
@@ -2794,9 +2796,9 @@ def : GCNPat <
(v2i8 (V_LSHLREV_B32_e64 (i8 8), SReg_32:$src1))
>;
+/*
def : GCNPat <
- (v4i8 (build_vector (i8 SReg_32:$src0), (i8 SReg_32:$src1), (i8 SReg_32:$src2), (i8 SReg_32:$src3))),
-
+ (v4i8 (build_vector (i8 SReg_32:$src0), (i8 SReg_32:$src1), (i8 SReg_32:$src2), (i8 SReg_32:$src3))),
(v4i8
@@ -2833,32 +2835,8 @@ def : GCNPat <
)
>;
-
-/*
-def : GCNPat <
- (v4i8 (build_vector (i8 SReg_32:$src0), (i8 SReg_32:$src1), (i8 SReg_32:$src2), (i8 SReg_32:$src3))),
- (v4i8 (i32 (V_OR_B32_e64 (i32 (S_LSHL_B32 SReg_32:$src3, (i32 24))), (i32 (V_OR_B32_e64 (i32 (S_LSHL_B32 SReg_32:$src2, (i32 16))), (i32 (V_OR_B32_e64 (i32 (S_LSHL_B32 SReg_32:$src1, (i32 8))), SReg_32:$src0)))))))
->;
-*/
-/*
-def : GCNPat <
- (v2i8 (build_vector (i8:$src0), (i8:$src1))),
- (v2i8 (i16 (V_OR_B32_e64 (i16 (S_LSHL_B32 SReg_32:$src1, (i32 8))), SReg_32:$src0)))
->;
-
-
-def : GCNPat <
- (v2i8 (build_vector i8:$src0, (i8 undef))),
- (COPY $src0)
->;
-
-def : GCNPat <
- (v2i8 (DivergentBinFrag<build_vector> (i8 undef), (i8 SReg_32:$src1))),
- (V_LSHLREV_B32_e64 (i32 16), SReg_32:$src1)
->;
*/
-
foreach Ty = [i16, f16] in {
defvar vecTy = !if(!eq(Ty, i16), v2i16, v2f16);
More information about the llvm-branch-commits
mailing list