[llvm] 0f9109c - [NVPTX] Eliminate StoreRetval instructions with undef operand
Andrew Savonichev via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 10 00:40:13 PST 2022
Author: Daniil Kovalev
Date: 2022-02-10T11:39:43+03:00
New Revision: 0f9109cc9d89a0da1b9fa556136286df66505912
URL: https://github.com/llvm/llvm-project/commit/0f9109cc9d89a0da1b9fa556136286df66505912
DIFF: https://github.com/llvm/llvm-project/commit/0f9109cc9d89a0da1b9fa556136286df66505912.diff
LOG: [NVPTX] Eliminate StoreRetval instructions with undef operand
Previously a lot of StoreRetval instructions with undef operand were
generated on NVPTX target when a big struct was returned by value.
It resulted in a lot of unneeded st.param.* instructions in final
assembly. The patch solves the issue by implementing the logic in
NVPTX-specific part of DAG combiner.
Differential Revision: https://reviews.llvm.org/D118973
Added:
llvm/test/CodeGen/NVPTX/store-retval.ll
Modified:
llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 4b59671bf7195..2cda034f047c1 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -4475,6 +4475,17 @@ static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
return SDValue();
}
+static SDValue PerformStoreRetvalCombine(SDNode *N) {
+ // Operands from the 2nd to the last one are the values to be stored
+ for (std::size_t I = 2, OpsCount = N->ops().size(); I != OpsCount; ++I)
+ if (!N->getOperand(I).isUndef())
+ return SDValue();
+
+ // Operand 0 is the previous value in the chain. Cannot return EntryToken
+ // as the previous value will become unused and eliminated later.
+ return N->getOperand(0);
+}
+
/// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
///
static SDValue PerformADDCombine(SDNode *N,
@@ -4803,6 +4814,10 @@ SDValue NVPTXTargetLowering::PerformDAGCombine(SDNode *N,
return PerformREMCombine(N, DCI, OptLevel);
case ISD::SETCC:
return PerformSETCCCombine(N, DCI);
+ case NVPTXISD::StoreRetval:
+ case NVPTXISD::StoreRetvalV2:
+ case NVPTXISD::StoreRetvalV4:
+ return PerformStoreRetvalCombine(N);
}
return SDValue();
}
diff --git a/llvm/test/CodeGen/NVPTX/store-retval.ll b/llvm/test/CodeGen/NVPTX/store-retval.ll
new file mode 100644
index 0000000000000..8efe126dac6d2
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/store-retval.ll
@@ -0,0 +1,74 @@
+; RUN: llc < %s --mtriple=nvptx-unknown-unknown | FileCheck %s
+;
+; This is IR generated with clang using -O3 optimization level
+; and nvptx-unknown-unknown target from the following C code.
+;
+; struct StNoalign { unsigned int field[5]; };
+; struct StAlign8 { _Alignas(8) unsigned int field[5]; };
+; struct StAlign16 { _Alignas(16) unsigned int field[5]; };
+;
+; #define DECLARE_FUNC(StName) \
+; struct StName func_##StName(struct StName in) { \
+; struct StName ret; \
+; ret.field[4] = in.field[0]; \
+; return ret; \
+; } \
+;
+; DECLARE_FUNC(StNoalign)
+; DECLARE_FUNC(StAlign8)
+; DECLARE_FUNC(StAlign16)
+
+%struct.StNoalign = type { [5 x i32] }
+
+define %struct.StNoalign @func_StNoalign(%struct.StNoalign* nocapture noundef readonly byval(%struct.StNoalign) align 4 %in) {
+ ; CHECK-LABEL: .func{{.*}}func_StNoalign
+ ; CHECK: ld.param.u32 [[R1:%r[0-9]+]], [func_StNoalign_param_0];
+ ; CHECK-NOT: st.param.b32 [func_retval0+0], %r{{[0-9]+}};
+ ; CHECK-NOT: st.param.b32 [func_retval0+4], %r{{[0-9]+}};
+ ; CHECK-NOT: st.param.b32 [func_retval0+8], %r{{[0-9]+}};
+ ; CHECK-NOT: st.param.b32 [func_retval0+12], %r{{[0-9]+}};
+ ; CHECK: st.param.b32 [func_retval0+16], [[R1]];
+ ; CHECK-NEXT: ret;
+ %arrayidx = getelementptr inbounds %struct.StNoalign, %struct.StNoalign* %in, i32 0, i32 0, i32 0
+ %1 = load i32, i32* %arrayidx, align 4
+ %.fca.0.4.insert = insertvalue %struct.StNoalign { [5 x i32] [i32 undef, i32 undef, i32 undef, i32 undef, i32 poison] }, i32 %1, 0, 4
+ ret %struct.StNoalign %.fca.0.4.insert
+}
+
+%struct.StAlign8 = type { [5 x i32], [4 x i8] }
+
+define %struct.StAlign8 @func_StAlign8(%struct.StAlign8* nocapture noundef readonly byval(%struct.StAlign8) align 8 %in) {
+ ; CHECK-LABEL: .func{{.*}}func_StAlign8
+ ; CHECK: ld.param.u32 [[R1:%r[0-9]+]], [func_StAlign8_param_0];
+ ; CHECK-NOT: st.param.b32 [func_retval0+0], %r{{[0-9]+}};
+ ; CHECK-NOT: st.param.b32 [func_retval0+4], %r{{[0-9]+}};
+ ; CHECK-NOT: st.param.b32 [func_retval0+8], %r{{[0-9]+}};
+ ; CHECK-NOT: st.param.b32 [func_retval0+12], %r{{[0-9]+}};
+ ; CHECK: st.param.b32 [func_retval0+16], [[R1]];
+ ; CHECK-NOT: st.param.v4.b8 [func_retval0+20], {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}};
+ ; CHECK-NEXT: ret;
+ %arrayidx = getelementptr inbounds %struct.StAlign8, %struct.StAlign8* %in, i32 0, i32 0, i32 0
+ %1 = load i32, i32* %arrayidx, align 8
+ %.fca.0.4.insert = insertvalue %struct.StAlign8 { [5 x i32] [i32 undef, i32 undef, i32 undef, i32 undef, i32 poison], [4 x i8] poison }, i32 %1, 0, 4
+ ret %struct.StAlign8 %.fca.0.4.insert
+}
+
+%struct.StAlign16 = type { [5 x i32], [12 x i8] }
+
+define %struct.StAlign16 @func_StAlign16(%struct.StAlign16* nocapture noundef readonly byval(%struct.StAlign16) align 16 %in) {
+ ; CHECK-LABEL: .func{{.*}}func_StAlign16
+ ; CHECK: ld.param.u32 [[R1:%r[0-9]+]], [func_StAlign16_param_0];
+ ; CHECK-NOT: st.param.b32 [func_retval0+0], %r{{[0-9]+}};
+ ; CHECK-NOT: st.param.b32 [func_retval0+4], %r{{[0-9]+}};
+ ; CHECK-NOT: st.param.b32 [func_retval0+8], %r{{[0-9]+}};
+ ; CHECK-NOT: st.param.b32 [func_retval0+12], %r{{[0-9]+}};
+ ; CHECK: st.param.b32 [func_retval0+16], [[R1]];
+ ; CHECK-NOT: st.param.v4.b8 [func_retval0+20], {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}};
+ ; CHECK-NOT: st.param.v4.b8 [func_retval0+24], {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}};
+ ; CHECK-NOT: st.param.v4.b8 [func_retval0+28], {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}};
+ ; CHECK-NEXT: ret;
+ %arrayidx = getelementptr inbounds %struct.StAlign16, %struct.StAlign16* %in, i32 0, i32 0, i32 0
+ %1 = load i32, i32* %arrayidx, align 16
+ %.fca.0.4.insert = insertvalue %struct.StAlign16 { [5 x i32] [i32 undef, i32 undef, i32 undef, i32 undef, i32 poison], [12 x i8] poison }, i32 %1, 0, 4
+ ret %struct.StAlign16 %.fca.0.4.insert
+}
More information about the llvm-commits
mailing list