[llvm] 0792161 - [Hexagon] Fix operation actions for v128f16
Krzysztof Parzyszek via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 8 15:40:16 PST 2022
Author: Krzysztof Parzyszek
Date: 2022-02-08T15:28:37-08:00
New Revision: 0792161c006a6a22f55c6bc85ac2fe257982c119
URL: https://github.com/llvm/llvm-project/commit/0792161c006a6a22f55c6bc85ac2fe257982c119
DIFF: https://github.com/llvm/llvm-project/commit/0792161c006a6a22f55c6bc85ac2fe257982c119.diff
LOG: [Hexagon] Fix operation actions for v128f16
There were more cases of operations that should have been "Custom" for
v128f16, but ended up "Legal" (e.g. load and store).
Added:
Modified:
llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
llvm/test/CodeGen/Hexagon/autohvx/vector-load-store-basic.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
index 14b4f7c56c57b..6476e022d086b 100755
--- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
@@ -24,7 +24,6 @@ static const MVT LegalW64[] = { MVT::v128i8, MVT::v64i16, MVT::v32i32 };
static const MVT LegalV128[] = { MVT::v128i8, MVT::v64i16, MVT::v32i32 };
static const MVT LegalW128[] = { MVT::v256i8, MVT::v128i16, MVT::v64i32 };
-
void
HexagonTargetLowering::initializeHVXLowering() {
if (Subtarget.useHVX64BOps()) {
@@ -79,81 +78,85 @@ HexagonTargetLowering::initializeHVXLowering() {
// Handle bitcasts of vector predicates to scalars (e.g. v32i1 to i32).
// Note: v16i1 -> i16 is handled in type legalization instead of op
// legalization.
- setOperationAction(ISD::BITCAST, MVT::i16, Custom);
- setOperationAction(ISD::BITCAST, MVT::i32, Custom);
- setOperationAction(ISD::BITCAST, MVT::i64, Custom);
+ setOperationAction(ISD::BITCAST, MVT::i16, Custom);
+ setOperationAction(ISD::BITCAST, MVT::i32, Custom);
+ setOperationAction(ISD::BITCAST, MVT::i64, Custom);
setOperationAction(ISD::BITCAST, MVT::v16i1, Custom);
- setOperationAction(ISD::BITCAST, MVT::v128i1, Custom);
- setOperationAction(ISD::BITCAST, MVT::i128, Custom);
- setOperationAction(ISD::VECTOR_SHUFFLE, ByteV, Legal);
- setOperationAction(ISD::VECTOR_SHUFFLE, ByteW, Legal);
+ setOperationAction(ISD::BITCAST, MVT::v128i1, Custom);
+ setOperationAction(ISD::BITCAST, MVT::i128, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, ByteV, Legal);
+ setOperationAction(ISD::VECTOR_SHUFFLE, ByteW, Legal);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
if (Subtarget.useHVX128BOps() && Subtarget.useHVXV68Ops() &&
Subtarget.useHVXFloatingPoint()) {
- setOperationAction(ISD::FMINNUM, MVT::v64f16, Legal);
- setOperationAction(ISD::FMAXNUM, MVT::v64f16, Legal);
- setOperationAction(ISD::FADD, MVT::v64f16, Legal);
- setOperationAction(ISD::FSUB, MVT::v64f16, Legal);
- setOperationAction(ISD::FMUL, MVT::v64f16, Legal);
- setOperationAction(ISD::FADD, MVT::v32f32, Legal);
- setOperationAction(ISD::FSUB, MVT::v32f32, Legal);
- setOperationAction(ISD::FMUL, MVT::v32f32, Legal);
- setOperationAction(ISD::FMINNUM, MVT::v32f32, Legal);
- setOperationAction(ISD::FMAXNUM, MVT::v32f32, Legal);
- setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64f16, Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v64f16, Custom);
- setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32f32, Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v32f32, Custom);
-
- // Handle ISD::BUILD_VECTOR for v32f32 in a custom way to generate vsplat
- setOperationAction(ISD::BUILD_VECTOR, MVT::v32f32, Custom);
+
+ static const MVT FloatV[] = { MVT::v64f16, MVT::v32f32 };
+ static const MVT FloatW[] = { MVT::v128f16, MVT::v64f32 };
+
+ for (MVT T : FloatV) {
+ setOperationAction(ISD::FADD, T, Legal);
+ setOperationAction(ISD::FSUB, T, Legal);
+ setOperationAction(ISD::FMUL, T, Legal);
+ setOperationAction(ISD::FMINNUM, T, Legal);
+ setOperationAction(ISD::FMAXNUM, T, Legal);
+
+ setOperationAction(ISD::INSERT_SUBVECTOR, T, Custom);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, T, Custom);
+
+ setOperationAction(ISD::SPLAT_VECTOR, T, Legal);
+ setOperationAction(ISD::SPLAT_VECTOR, T, Legal);
+
+ setOperationAction(ISD::MLOAD, T, Custom);
+ setOperationAction(ISD::MSTORE, T, Custom);
+ // Custom-lower BUILD_VECTOR. The standard (target-independent)
+ // handling of it would convert it to a load, which is not always
+ // the optimal choice.
+ setOperationAction(ISD::BUILD_VECTOR, T, Custom);
+ }
+
// BUILD_VECTOR with f16 operands cannot be promoted without
// promoting the result, so lower the node to vsplat or constant pool
- setOperationAction(ISD::BUILD_VECTOR, MVT::f16, Custom);
- setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::f16, Custom);
- setOperationAction(ISD::SPLAT_VECTOR, MVT::f16, Custom);
- setOperationAction(ISD::SPLAT_VECTOR, MVT::v64f16, Legal);
- setOperationAction(ISD::SPLAT_VECTOR, MVT::v32f32, Legal);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::f16, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::f16, Custom);
+ setOperationAction(ISD::SPLAT_VECTOR, MVT::f16, Custom);
+
// Vector shuffle is always promoted to ByteV and a bitcast to f16 is
// generated.
setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v128f16, ByteW);
- setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64f16, ByteV);
- setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64f32, ByteW);
- setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v32f32, ByteV);
-
- // Custom-lower BUILD_VECTOR for vector pairs. The standard (target-
- // independent) handling of it would convert it to a load, which is
- // not always the optimal choice.
- setOperationAction(ISD::BUILD_VECTOR, MVT::v64f32, Custom);
- // Make concat-vectors custom to handle concats of more than 2 vectors.
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v128f16, Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v64f32, Custom);
-
- setOperationAction(ISD::LOAD, MVT::v64f32, Custom);
- setOperationAction(ISD::STORE, MVT::v64f32, Custom);
- setOperationAction(ISD::FADD, MVT::v64f32, Custom);
- setOperationAction(ISD::FSUB, MVT::v64f32, Custom);
- setOperationAction(ISD::FMUL, MVT::v64f32, Custom);
- setOperationAction(ISD::FMINNUM, MVT::v64f32, Custom);
- setOperationAction(ISD::FMAXNUM, MVT::v64f32, Custom);
- setOperationAction(ISD::VSELECT, MVT::v64f32, Custom);
+ setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64f16, ByteV);
+ setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64f32, ByteW);
+ setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v32f32, ByteV);
+
+ for (MVT P : FloatW) {
+ setOperationAction(ISD::LOAD, P, Custom);
+ setOperationAction(ISD::STORE, P, Custom);
+ setOperationAction(ISD::FADD, P, Custom);
+ setOperationAction(ISD::FSUB, P, Custom);
+ setOperationAction(ISD::FMUL, P, Custom);
+ setOperationAction(ISD::FMINNUM, P, Custom);
+ setOperationAction(ISD::FMAXNUM, P, Custom);
+ setOperationAction(ISD::VSELECT, P, Custom);
+
+ // Custom-lower BUILD_VECTOR. The standard (target-independent)
+ // handling of it would convert it to a load, which is not always
+ // the optimal choice.
+ setOperationAction(ISD::BUILD_VECTOR, P, Custom);
+ // Make concat-vectors custom to handle concats of more than 2 vectors.
+ setOperationAction(ISD::CONCAT_VECTORS, P, Custom);
+
+ setOperationAction(ISD::MLOAD, P, Custom);
+ setOperationAction(ISD::MSTORE, P, Custom);
+ }
if (Subtarget.useHVXQFloatOps()) {
setOperationAction(ISD::FP_EXTEND, MVT::v64f32, Custom);
- setOperationAction(ISD::FP_ROUND, MVT::v64f16, Legal);
+ setOperationAction(ISD::FP_ROUND, MVT::v64f16, Legal);
} else if (Subtarget.useHVXIEEEFPOps()) {
setOperationAction(ISD::FP_EXTEND, MVT::v64f32, Legal);
- setOperationAction(ISD::FP_ROUND, MVT::v64f16, Legal);
+ setOperationAction(ISD::FP_ROUND, MVT::v64f16, Legal);
}
-
- setOperationAction(ISD::MLOAD, MVT::v32f32, Custom);
- setOperationAction(ISD::MSTORE, MVT::v32f32, Custom);
- setOperationAction(ISD::MLOAD, MVT::v64f16, Custom);
- setOperationAction(ISD::MSTORE, MVT::v64f16, Custom);
- setOperationAction(ISD::MLOAD, MVT::v64f32, Custom);
- setOperationAction(ISD::MSTORE, MVT::v64f32, Custom);
}
for (MVT T : LegalV) {
@@ -1411,6 +1414,17 @@ HexagonTargetLowering::LowerHvxBuildVector(SDValue Op, SelectionDAG &DAG)
for (unsigned i = 0; i != Size; ++i)
Ops.push_back(Op.getOperand(i));
+ // First, split the BUILD_VECTOR for vector pairs. We could generate
+ // some pairs directly (via splat), but splats should be generated
+ // by the combiner prior to getting here.
+ if (VecTy.getSizeInBits() == 16*Subtarget.getVectorLength()) {
+ ArrayRef<SDValue> A(Ops);
+ MVT SingleTy = typeSplit(VecTy).first;
+ SDValue V0 = buildHvxVectorReg(A.take_front(Size/2), dl, SingleTy, DAG);
+ SDValue V1 = buildHvxVectorReg(A.drop_front(Size/2), dl, SingleTy, DAG);
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, V0, V1);
+ }
+
if (VecTy.getVectorElementType() == MVT::i1)
return buildHvxVectorPred(Ops, dl, VecTy, DAG);
@@ -1427,14 +1441,6 @@ HexagonTargetLowering::LowerHvxBuildVector(SDValue Op, SelectionDAG &DAG)
return DAG.getBitcast(tyVector(VecTy, MVT::f16), T0);
}
- if (VecTy.getSizeInBits() == 16*Subtarget.getVectorLength()) {
- ArrayRef<SDValue> A(Ops);
- MVT SingleTy = typeSplit(VecTy).first;
- SDValue V0 = buildHvxVectorReg(A.take_front(Size/2), dl, SingleTy, DAG);
- SDValue V1 = buildHvxVectorReg(A.drop_front(Size/2), dl, SingleTy, DAG);
- return DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, V0, V1);
- }
-
return buildHvxVectorReg(Ops, dl, VecTy, DAG);
}
diff --git a/llvm/test/CodeGen/Hexagon/autohvx/vector-load-store-basic.ll b/llvm/test/CodeGen/Hexagon/autohvx/vector-load-store-basic.ll
index 3b01f971bad2f..25eccd3d2b648 100644
--- a/llvm/test/CodeGen/Hexagon/autohvx/vector-load-store-basic.ll
+++ b/llvm/test/CodeGen/Hexagon/autohvx/vector-load-store-basic.ll
@@ -161,4 +161,225 @@ define void @f9(<32 x float>* %a0, <32 x float>* %a1) #0 {
ret void
}
+define void @f10(<256 x i8>* %a0, <256 x i8>* %a1) #0 {
+; CHECK-LABEL: f10:
+; CHECK: // %bb.0:
+; CHECK-NEXT: {
+; CHECK-NEXT: v0 = vmem(r0+#2)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: v1.cur = vmem(r0+#3)
+; CHECK-NEXT: vmem(r1+#5) = v1
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: vmem(r1+#4) = v0
+; CHECK-NEXT: }
+ %v0 = getelementptr <256 x i8>, <256 x i8>* %a0, i32 1
+ %v1 = load <256 x i8>, <256 x i8>* %v0, align 128
+ %v2 = getelementptr <256 x i8>, <256 x i8>* %a1, i32 2
+ store <256 x i8> %v1, <256 x i8>* %v2, align 128
+ ret void
+}
+
+define void @f11(<128 x i16>* %a0, <128 x i16>* %a1) #0 {
+; CHECK-LABEL: f11:
+; CHECK: // %bb.0:
+; CHECK-NEXT: {
+; CHECK-NEXT: v0 = vmem(r0+#2)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: v1.cur = vmem(r0+#3)
+; CHECK-NEXT: vmem(r1+#5) = v1
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: vmem(r1+#4) = v0
+; CHECK-NEXT: }
+ %v0 = getelementptr <128 x i16>, <128 x i16>* %a0, i32 1
+ %v1 = load <128 x i16>, <128 x i16>* %v0, align 128
+ %v2 = getelementptr <128 x i16>, <128 x i16>* %a1, i32 2
+ store <128 x i16> %v1, <128 x i16>* %v2, align 128
+ ret void
+}
+
+define void @f12(<64 x i32>* %a0, <64 x i32>* %a1) #0 {
+; CHECK-LABEL: f12:
+; CHECK: // %bb.0:
+; CHECK-NEXT: {
+; CHECK-NEXT: v0 = vmem(r0+#2)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: v1.cur = vmem(r0+#3)
+; CHECK-NEXT: vmem(r1+#5) = v1
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: vmem(r1+#4) = v0
+; CHECK-NEXT: }
+ %v0 = getelementptr <64 x i32>, <64 x i32>* %a0, i32 1
+ %v1 = load <64 x i32>, <64 x i32>* %v0, align 128
+ %v2 = getelementptr <64 x i32>, <64 x i32>* %a1, i32 2
+ store <64 x i32> %v1, <64 x i32>* %v2, align 128
+ ret void
+}
+
+define void @f13(<128 x half>* %a0, <128 x half>* %a1) #0 {
+; CHECK-LABEL: f13:
+; CHECK: // %bb.0:
+; CHECK-NEXT: {
+; CHECK-NEXT: v0 = vmem(r0+#2)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: v1.cur = vmem(r0+#3)
+; CHECK-NEXT: vmem(r1+#5) = v1
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: vmem(r1+#4) = v0
+; CHECK-NEXT: }
+ %v0 = getelementptr <128 x half>, <128 x half>* %a0, i32 1
+ %v1 = load <128 x half>, <128 x half>* %v0, align 128
+ %v2 = getelementptr <128 x half>, <128 x half>* %a1, i32 2
+ store <128 x half> %v1, <128 x half>* %v2, align 128
+ ret void
+}
+
+define void @f14(<64 x float>* %a0, <64 x float>* %a1) #0 {
+; CHECK-LABEL: f14:
+; CHECK: // %bb.0:
+; CHECK-NEXT: {
+; CHECK-NEXT: v0 = vmem(r0+#2)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: v1.cur = vmem(r0+#3)
+; CHECK-NEXT: vmem(r1+#5) = v1
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: vmem(r1+#4) = v0
+; CHECK-NEXT: }
+ %v0 = getelementptr <64 x float>, <64 x float>* %a0, i32 1
+ %v1 = load <64 x float>, <64 x float>* %v0, align 128
+ %v2 = getelementptr <64 x float>, <64 x float>* %a1, i32 2
+ store <64 x float> %v1, <64 x float>* %v2, align 128
+ ret void
+}
+
+define void @f15(<256 x i8>* %a0, <256 x i8>* %a1) #0 {
+; CHECK-LABEL: f15:
+; CHECK: // %bb.0:
+; CHECK-NEXT: {
+; CHECK-NEXT: v0 = vmemu(r0+#3)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: v1 = vmemu(r0+#2)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmemu(r1+#5) = v0
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: vmemu(r1+#4) = v1
+; CHECK-NEXT: }
+ %v0 = getelementptr <256 x i8>, <256 x i8>* %a0, i32 1
+ %v1 = load <256 x i8>, <256 x i8>* %v0, align 1
+ %v2 = getelementptr <256 x i8>, <256 x i8>* %a1, i32 2
+ store <256 x i8> %v1, <256 x i8>* %v2, align 1
+ ret void
+}
+
+define void @f16(<128 x i16>* %a0, <128 x i16>* %a1) #0 {
+; CHECK-LABEL: f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: {
+; CHECK-NEXT: v0 = vmemu(r0+#3)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: v1 = vmemu(r0+#2)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmemu(r1+#5) = v0
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: vmemu(r1+#4) = v1
+; CHECK-NEXT: }
+ %v0 = getelementptr <128 x i16>, <128 x i16>* %a0, i32 1
+ %v1 = load <128 x i16>, <128 x i16>* %v0, align 1
+ %v2 = getelementptr <128 x i16>, <128 x i16>* %a1, i32 2
+ store <128 x i16> %v1, <128 x i16>* %v2, align 1
+ ret void
+}
+
+define void @f17(<64 x i32>* %a0, <64 x i32>* %a1) #0 {
+; CHECK-LABEL: f17:
+; CHECK: // %bb.0:
+; CHECK-NEXT: {
+; CHECK-NEXT: v0 = vmemu(r0+#3)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: v1 = vmemu(r0+#2)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmemu(r1+#5) = v0
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: vmemu(r1+#4) = v1
+; CHECK-NEXT: }
+ %v0 = getelementptr <64 x i32>, <64 x i32>* %a0, i32 1
+ %v1 = load <64 x i32>, <64 x i32>* %v0, align 1
+ %v2 = getelementptr <64 x i32>, <64 x i32>* %a1, i32 2
+ store <64 x i32> %v1, <64 x i32>* %v2, align 1
+ ret void
+}
+
+define void @f18(<128 x half>* %a0, <128 x half>* %a1) #0 {
+; CHECK-LABEL: f18:
+; CHECK: // %bb.0:
+; CHECK-NEXT: {
+; CHECK-NEXT: v0 = vmemu(r0+#3)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: v1 = vmemu(r0+#2)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmemu(r1+#5) = v0
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: vmemu(r1+#4) = v1
+; CHECK-NEXT: }
+ %v0 = getelementptr <128 x half>, <128 x half>* %a0, i32 1
+ %v1 = load <128 x half>, <128 x half>* %v0, align 1
+ %v2 = getelementptr <128 x half>, <128 x half>* %a1, i32 2
+ store <128 x half> %v1, <128 x half>* %v2, align 1
+ ret void
+}
+
+define void @f19(<64 x float>* %a0, <64 x float>* %a1) #0 {
+; CHECK-LABEL: f19:
+; CHECK: // %bb.0:
+; CHECK-NEXT: {
+; CHECK-NEXT: v0 = vmemu(r0+#3)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: v1 = vmemu(r0+#2)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: vmemu(r1+#5) = v0
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: vmemu(r1+#4) = v1
+; CHECK-NEXT: }
+ %v0 = getelementptr <64 x float>, <64 x float>* %a0, i32 1
+ %v1 = load <64 x float>, <64 x float>* %v0, align 1
+ %v2 = getelementptr <64 x float>, <64 x float>* %a1, i32 2
+ store <64 x float> %v1, <64 x float>* %v2, align 1
+ ret void
+}
+
+
attributes #0 = { nounwind "target-cpu"="hexagonv69" "target-features"="+hvxv69,+hvx-length128b,+hvx-qfloat" }
More information about the llvm-commits
mailing list