[llvm] r299589 - [X86 TTI] Implement LSV hook
Keno Fischer via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 5 13:51:38 PDT 2017
Author: kfischer
Date: Wed Apr 5 15:51:38 2017
New Revision: 299589
URL: http://llvm.org/viewvc/llvm-project?rev=299589&view=rev
Log:
[X86 TTI] Implement LSV hook
Summary:
LSV wants to know the maximum size that can be loaded to a vector register.
On X86, this always matches the maximum register width. Implement this
accordingly and add a test to make sure that LSV can vectorize up to the
maximum permissible width on X86.
Reviewers: delena, arsenm
Reviewed By: arsenm
Subscribers: wdng, llvm-commits
Differential Revision: https://reviews.llvm.org/D31504
Added:
llvm/trunk/test/Transforms/LoadStoreVectorizer/X86/load-width.ll
Modified:
llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp
llvm/trunk/lib/Target/X86/X86TargetTransformInfo.h
Modified: llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp?rev=299589&r1=299588&r2=299589&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp Wed Apr 5 15:51:38 2017
@@ -78,7 +78,7 @@ unsigned X86TTIImpl::getNumberOfRegister
return 8;
}
-unsigned X86TTIImpl::getRegisterBitWidth(bool Vector) {
+unsigned X86TTIImpl::getRegisterBitWidth(bool Vector) const {
if (Vector) {
if (ST->hasAVX512())
return 512;
@@ -95,6 +95,10 @@ unsigned X86TTIImpl::getRegisterBitWidth
return 32;
}
+unsigned X86TTIImpl::getLoadStoreVecRegBitWidth(unsigned) const {
+ return getRegisterBitWidth(true);
+}
+
unsigned X86TTIImpl::getMaxInterleaveFactor(unsigned VF) {
// If the loop will not be vectorized, don't interleave the loop.
// Let regular unroll to unroll the loop, which saves the overflow
Modified: llvm/trunk/lib/Target/X86/X86TargetTransformInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86TargetTransformInfo.h?rev=299589&r1=299588&r2=299589&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86TargetTransformInfo.h (original)
+++ llvm/trunk/lib/Target/X86/X86TargetTransformInfo.h Wed Apr 5 15:51:38 2017
@@ -51,7 +51,8 @@ public:
/// @{
unsigned getNumberOfRegisters(bool Vector);
- unsigned getRegisterBitWidth(bool Vector);
+ unsigned getRegisterBitWidth(bool Vector) const;
+ unsigned getLoadStoreVecRegBitWidth(unsigned AS) const;
unsigned getMaxInterleaveFactor(unsigned VF);
int getArithmeticInstrCost(
unsigned Opcode, Type *Ty,
Added: llvm/trunk/test/Transforms/LoadStoreVectorizer/X86/load-width.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoadStoreVectorizer/X86/load-width.ll?rev=299589&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoadStoreVectorizer/X86/load-width.ll (added)
+++ llvm/trunk/test/Transforms/LoadStoreVectorizer/X86/load-width.ll Wed Apr 5 15:51:38 2017
@@ -0,0 +1,38 @@
+; RUN: opt -mtriple=x86_64-unknown-linux-gnu -load-store-vectorizer -mcpu haswell -S -o - %s | FileCheck --check-prefix=CHECK-HSW %s
+; RUN: opt -mtriple=x86_64-unknown-linux-gnu -load-store-vectorizer -mcpu knl -S -o - %s | FileCheck --check-prefix=CHECK-KNL %s
+
+define <8 x double> @loadwidth_insert_extract(double* %ptr) {
+ %a = bitcast double* %ptr to <2 x double> *
+ %b = getelementptr <2 x double>, <2 x double>* %a, i32 1
+ %c = getelementptr <2 x double>, <2 x double>* %a, i32 2
+ %d = getelementptr <2 x double>, <2 x double>* %a, i32 3
+; CHECK-HSW: load <4 x double>
+; CHECK-HSW: load <4 x double>
+; CHECK-HSW-NOT: load
+; CHECK-KNL: load <8 x double>
+; CHECK-KNL-NOT: load
+ %la = load <2 x double>, <2 x double> *%a
+ %lb = load <2 x double>, <2 x double> *%b
+ %lc = load <2 x double>, <2 x double> *%c
+ %ld = load <2 x double>, <2 x double> *%d
+ ; Scalarize everything - Explicitly not a shufflevector to test this code
+ ; path in the LSV
+ %v1 = extractelement <2 x double> %la, i32 0
+ %v2 = extractelement <2 x double> %la, i32 1
+ %v3 = extractelement <2 x double> %lb, i32 0
+ %v4 = extractelement <2 x double> %lb, i32 1
+ %v5 = extractelement <2 x double> %lc, i32 0
+ %v6 = extractelement <2 x double> %lc, i32 1
+ %v7 = extractelement <2 x double> %ld, i32 0
+ %v8 = extractelement <2 x double> %ld, i32 1
+ ; Make a vector again
+ %i1 = insertelement <8 x double> undef, double %v1, i32 0
+ %i2 = insertelement <8 x double> %i1, double %v2, i32 1
+ %i3 = insertelement <8 x double> %i2, double %v3, i32 2
+ %i4 = insertelement <8 x double> %i3, double %v4, i32 3
+ %i5 = insertelement <8 x double> %i4, double %v5, i32 4
+ %i6 = insertelement <8 x double> %i5, double %v6, i32 5
+ %i7 = insertelement <8 x double> %i6, double %v7, i32 6
+ %i8 = insertelement <8 x double> %i7, double %v8, i32 7
+ ret <8 x double> %i8
+}
More information about the llvm-commits
mailing list