[llvm] e83ad23 - [TTI] Pre-commit cost model tests splat-loads.
Vasileios Porpodas via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 21 14:46:29 PDT 2022
Author: Vasileios Porpodas
Date: 2022-04-21T14:45:51-07:00
New Revision: e83ad23daf130982facc9b6f6ee1bc0600c956d0
URL: https://github.com/llvm/llvm-project/commit/e83ad23daf130982facc9b6f6ee1bc0600c956d0
DIFF: https://github.com/llvm/llvm-project/commit/e83ad23daf130982facc9b6f6ee1bc0600c956d0.diff
LOG: [TTI] Pre-commit cost model tests splat-loads.
Added:
llvm/test/Analysis/CostModel/AArch64/splat-load.ll
llvm/test/Analysis/CostModel/X86/splat-load.ll
Modified:
Removed:
################################################################################
diff --git a/llvm/test/Analysis/CostModel/AArch64/splat-load.ll b/llvm/test/Analysis/CostModel/AArch64/splat-load.ll
new file mode 100644
index 0000000000000..5e21ececdc9da
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/AArch64/splat-load.ll
@@ -0,0 +1,149 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
+; RUN: opt < %s -mtriple=aarch64--linux-gnu -passes='print<cost-model>' 2>&1 -disable-output | FileCheck %s
+
+; This test checks that the cost of a splat-load shuffle is correctly detected
+; as 0, because the combined load + broadcast is lowered to a `ld1r` instruction.
+;
+; NOTE: The code in this test is a hack. Since TTI cannot currently detect a
+; proper broadcast pattern from a scalar load (like the one that follows),
+; we use a vector load as the shuffle's operand to trigger the pattern.
+;
+; %load = load double, double *%ptr
+; %insert = insertelement <2 x double> poison, double %load, i32 0
+; %bcast = shufflevector <2 x double> %insert, <2 x double> poison, <2 x i32> zeroinitializer
+
+
+define void @splat_load_2xdouble(<2 x double> *%ptr) {
+; CHECK-LABEL: 'splat_load_2xdouble'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <2 x double>, <2 x double>* %ptr, align 16
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat_load = shufflevector <2 x double> %load, <2 x double> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+ %load = load <2 x double>, <2 x double> *%ptr
+ %splat_load = shufflevector <2 x double> %load, <2 x double> poison, <2 x i32> zeroinitializer
+ ret void
+}
+
+define void @splat_load_2xfloat(<2 x float> *%ptr) {
+; CHECK-LABEL: 'splat_load_2xfloat'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <2 x float>, <2 x float>* %ptr, align 8
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat_load = shufflevector <2 x float> %load, <2 x float> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+ %load = load <2 x float>, <2 x float> *%ptr
+ %splat_load = shufflevector <2 x float> %load, <2 x float> poison, <2 x i32> zeroinitializer
+ ret void
+}
+
+define void @splat_load_4xfloat(<4 x float> *%ptr) {
+; CHECK-LABEL: 'splat_load_4xfloat'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <4 x float>, <4 x float>* %ptr, align 16
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat_load = shufflevector <4 x float> %load, <4 x float> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+ %load = load <4 x float>, <4 x float> *%ptr
+ %splat_load = shufflevector <4 x float> %load, <4 x float> poison, <4 x i32> zeroinitializer
+ ret void
+}
+
+define void @splat_load_2xi32(<2 x i32> *%ptr) {
+; CHECK-LABEL: 'splat_load_2xi32'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <2 x i32>, <2 x i32>* %ptr, align 8
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat_load = shufflevector <2 x i32> %load, <2 x i32> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+ %load = load <2 x i32>, <2 x i32> *%ptr
+ %splat_load = shufflevector <2 x i32> %load, <2 x i32> poison, <2 x i32> zeroinitializer
+ ret void
+}
+
+define void @splat_load_4xi32(<4 x i32> *%ptr) {
+; CHECK-LABEL: 'splat_load_4xi32'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <4 x i32>, <4 x i32>* %ptr, align 16
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat_load = shufflevector <4 x i32> %load, <4 x i32> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+ %load = load <4 x i32>, <4 x i32> *%ptr
+ %splat_load = shufflevector <4 x i32> %load, <4 x i32> poison, <4 x i32> zeroinitializer
+ ret void
+}
+
+define void @splat_load_4xi16(<4 x i16> *%ptr) {
+; CHECK-LABEL: 'splat_load_4xi16'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <4 x i16>, <4 x i16>* %ptr, align 8
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat_load = shufflevector <4 x i16> %load, <4 x i16> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+ %load = load <4 x i16>, <4 x i16> *%ptr
+ %splat_load = shufflevector <4 x i16> %load, <4 x i16> poison, <4 x i32> zeroinitializer
+ ret void
+}
+
+define void @splat_load_8xi16(<8 x i16> *%ptr) {
+; CHECK-LABEL: 'splat_load_8xi16'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <8 x i16>, <8 x i16>* %ptr, align 16
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat_load = shufflevector <8 x i16> %load, <8 x i16> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+ %load = load <8 x i16>, <8 x i16> *%ptr
+ %splat_load = shufflevector <8 x i16> %load, <8 x i16> poison, <8 x i32> zeroinitializer
+ ret void
+}
+
+define void @splat_load_8xi8(<8 x i8> *%ptr) {
+; CHECK-LABEL: 'splat_load_8xi8'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <8 x i8>, <8 x i8>* %ptr, align 8
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat_load = shufflevector <8 x i8> %load, <8 x i8> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+ %load = load <8 x i8>, <8 x i8> *%ptr
+ %splat_load = shufflevector <8 x i8> %load, <8 x i8> poison, <8 x i32> zeroinitializer
+ ret void
+}
+
+define void @splat_load_16xi8(<16 x i8> *%ptr) {
+; CHECK-LABEL: 'splat_load_16xi8'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <16 x i8>, <16 x i8>* %ptr, align 16
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat_load = shufflevector <16 x i8> %load, <16 x i8> poison, <16 x i32> zeroinitializer
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+ %load = load <16 x i8>, <16 x i8> *%ptr
+ %splat_load = shufflevector <16 x i8> %load, <16 x i8> poison, <16 x i32> zeroinitializer
+ ret void
+}
+
+; `<2 x i8>` is not supported by `ld1r` so the shuffle cost should not be 0.
+define void @splat_load_2xi8(<2 x i8> *%ptr) {
+; CHECK-LABEL: 'splat_load_2xi8'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %load = load <2 x i8>, <2 x i8>* %ptr, align 2
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat_load = shufflevector <2 x i8> %load, <2 x i8> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+ %load = load <2 x i8>, <2 x i8> *%ptr
+ %splat_load = shufflevector <2 x i8> %load, <2 x i8> poison, <2 x i32> zeroinitializer
+ ret void
+}
+
+; `<4 x i8>` is not supported by `ld1r` so the shuffle cost should not be 0.
+define void @splat_load_4xi8(<4 x i8> *%ptr) {
+; CHECK-LABEL: 'splat_load_4xi8'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %load = load <4 x i8>, <4 x i8>* %ptr, align 4
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat_load = shufflevector <4 x i8> %load, <4 x i8> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+ %load = load <4 x i8>, <4 x i8> *%ptr
+ %splat_load = shufflevector <4 x i8> %load, <4 x i8> poison, <4 x i32> zeroinitializer
+ ret void
+}
+
+; `<2 x i16>` is not supported by `ld1r`, so the shuffle cost should not be 0.
+define void @splat_load_2xi16(<2 x i16> *%ptr) {
+; CHECK-LABEL: 'splat_load_2xi16'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %load = load <2 x i16>, <2 x i16>* %ptr, align 4
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat_load = shufflevector <2 x i16> %load, <2 x i16> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+ %load = load <2 x i16>, <2 x i16> *%ptr
+ %splat_load = shufflevector <2 x i16> %load, <2 x i16> poison, <2 x i32> zeroinitializer
+ ret void
+}
diff --git a/llvm/test/Analysis/CostModel/X86/splat-load.ll b/llvm/test/Analysis/CostModel/X86/splat-load.ll
new file mode 100644
index 0000000000000..0d9fd437ec16f
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/X86/splat-load.ll
@@ -0,0 +1,51 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -mattr=+sse2 | FileCheck %s -check-prefixes=SSE2
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -mattr=+sse3 | FileCheck %s -check-prefixes=SSE3
+
+; This test checks that the cost of a splat-load shuffle is correctly detected
+; as 0, because the combined load + broadcast is lowered to a `movddup` instr.
+;
+; TODO: AVX `vbroadcast*` seems to support more types than the
+; 2xdouble type of `movddup`:
+; - `vbroadcastss` supports 4xfloat, 8xfloat
+; - `vbroadcastsd` supports 4xdouble
+
+; NOTE: The code in this test is a hack. Since TTI cannot currently detect a
+; proper broadcast pattern from a scalar load (like the one that follows),
+; we use a vector load as the shuffle's operand to trigger the pattern.
+;
+; %load = load double, double *%ptr
+; %insert = insertelement <2 x double> poison, double %load, i32 0
+; %bcast = shufflevector <2 x double> %insert, <2 x double> poison, <2 x i32> zeroinitializer
+
+define void @splat_load_2xdouble(<2 x double> *%ptr) {
+; SSE2-LABEL: 'splat_load_2xdouble'
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <2 x double>, <2 x double>* %ptr, align 16
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat_load = shufflevector <2 x double> %load, <2 x double> poison, <2 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; SSE3-LABEL: 'splat_load_2xdouble'
+; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <2 x double>, <2 x double>* %ptr, align 16
+; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat_load = shufflevector <2 x double> %load, <2 x double> poison, <2 x i32> zeroinitializer
+; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+ %load = load <2 x double>, <2 x double> *%ptr
+ %splat_load = shufflevector <2 x double> %load, <2 x double> poison, <2 x i32> zeroinitializer
+ ret void
+}
+
+define void @splat_load_2xfloat(<2 x float> *%ptr) {
+; SSE2-LABEL: 'splat_load_2xfloat'
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <2 x float>, <2 x float>* %ptr, align 8
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat_load = shufflevector <2 x float> %load, <2 x float> poison, <2 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; SSE3-LABEL: 'splat_load_2xfloat'
+; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <2 x float>, <2 x float>* %ptr, align 8
+; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat_load = shufflevector <2 x float> %load, <2 x float> poison, <2 x i32> zeroinitializer
+; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+ %load = load <2 x float>, <2 x float> *%ptr
+ %splat_load = shufflevector <2 x float> %load, <2 x float> poison, <2 x i32> zeroinitializer
+ ret void
+}
More information about the llvm-commits
mailing list