[llvm] r276196 - [NVPTX] Enable the load-store vectorizer on nvptx.
Justin Lebar via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 20 15:11:36 PDT 2016
Author: jlebar
Date: Wed Jul 20 17:11:36 2016
New Revision: 276196
URL: http://llvm.org/viewvc/llvm-project?rev=276196&view=rev
Log:
[NVPTX] Enable the load-store vectorizer on nvptx.
Reviewers: tra
Subscribers: jholewinski, arsenm, asbirlea
Differential Revision: https://reviews.llvm.org/D22592
Added:
llvm/trunk/test/CodeGen/NVPTX/LoadStoreVectorizer.ll
Modified:
llvm/trunk/lib/Target/NVPTX/LLVMBuild.txt
llvm/trunk/lib/Target/NVPTX/NVPTXTargetMachine.cpp
llvm/trunk/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll
Modified: llvm/trunk/lib/Target/NVPTX/LLVMBuild.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/LLVMBuild.txt?rev=276196&r1=276195&r2=276196&view=diff
==============================================================================
--- llvm/trunk/lib/Target/NVPTX/LLVMBuild.txt (original)
+++ llvm/trunk/lib/Target/NVPTX/LLVMBuild.txt Wed Jul 20 17:11:36 2016
@@ -28,5 +28,5 @@ has_asmprinter = 1
type = Library
name = NVPTXCodeGen
parent = NVPTX
-required_libraries = Analysis AsmPrinter CodeGen Core MC NVPTXAsmPrinter NVPTXDesc NVPTXInfo Scalar SelectionDAG Support Target TransformUtils
+required_libraries = Analysis AsmPrinter CodeGen Core MC NVPTXAsmPrinter NVPTXDesc NVPTXInfo Scalar SelectionDAG Support Target TransformUtils Vectorize
add_to_library_groups = NVPTX
Modified: llvm/trunk/lib/Target/NVPTX/NVPTXTargetMachine.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTXTargetMachine.cpp?rev=276196&r1=276195&r2=276196&view=diff
==============================================================================
--- llvm/trunk/lib/Target/NVPTX/NVPTXTargetMachine.cpp (original)
+++ llvm/trunk/lib/Target/NVPTX/NVPTXTargetMachine.cpp Wed Jul 20 17:11:36 2016
@@ -46,6 +46,7 @@
#include "llvm/Target/TargetSubtargetInfo.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/GVN.h"
+#include "llvm/Transforms/Vectorize.h"
using namespace llvm;
@@ -54,6 +55,13 @@ static cl::opt<bool> UseInferAddressSpac
cl::desc("Optimize address spaces using NVPTXInferAddressSpaces instead of "
"NVPTXFavorNonGenericAddrSpaces"));
+// LSV is still relatively new; this switch lets us turn it off in case we
+// encounter (or suspect) a bug.
+static cl::opt<bool>
+ DisableLoadStoreVectorizer("disable-nvptx-load-store-vectorizer",
+ cl::desc("Disable load/store vectorizer"),
+ cl::init(false), cl::Hidden);
+
namespace llvm {
void initializeNVVMIntrRangePass(PassRegistry&);
void initializeNVVMReflectPass(PassRegistry&);
@@ -258,6 +266,8 @@ void NVPTXPassConfig::addIRPasses() {
addPass(createNVPTXLowerArgsPass(&getNVPTXTargetMachine()));
if (getOptLevel() != CodeGenOpt::None) {
addAddressSpaceInferencePasses();
+ if (!DisableLoadStoreVectorizer)
+ addPass(createLoadStoreVectorizerPass());
addStraightLineScalarOptimizationPasses();
}
Added: llvm/trunk/test/CodeGen/NVPTX/LoadStoreVectorizer.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/NVPTX/LoadStoreVectorizer.ll?rev=276196&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/NVPTX/LoadStoreVectorizer.ll (added)
+++ llvm/trunk/test/CodeGen/NVPTX/LoadStoreVectorizer.ll Wed Jul 20 17:11:36 2016
@@ -0,0 +1,17 @@
+; RUN: llc < %s | FileCheck -check-prefix=ENABLED %s
+; RUN: llc -disable-nvptx-load-store-vectorizer < %s | FileCheck -check-prefix=DISABLED %s
+target triple = "nvptx64-nvidia-cuda"
+
+; Check that the load-store vectorizer is enabled by default for nvptx, and
+; that it's disabled by the appropriate flag.
+
+; ENABLED: ld.v2.{{.}}32
+; DISABLED: ld.{{.}}32
+; DISABLED: ld.{{.}}32
+define i32 @f(i32* %p) {
+ %p.1 = getelementptr i32, i32* %p, i32 1
+ %v0 = load i32, i32* %p, align 8
+ %v1 = load i32, i32* %p.1, align 4
+ %sum = add i32 %v0, %v1
+ ret i32 %sum
+}
Modified: llvm/trunk/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll?rev=276196&r1=276195&r2=276196&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll (original)
+++ llvm/trunk/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll Wed Jul 20 17:11:36 2016
@@ -45,10 +45,10 @@ define void @sum_of_array(i32 %x, i32 %y
ret void
}
; PTX-LABEL: sum_of_array(
-; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG:%(rd|r)[0-9]+]]{{\]}}
-; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+4{{\]}}
-; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+128{{\]}}
-; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+132{{\]}}
+; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG:%(rd|r)[0-9]+]]{{\]}}
+; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+4{{\]}}
+; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+128{{\]}}
+; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+132{{\]}}
; IR-LABEL: @sum_of_array(
; TODO: GVN is unable to preserve the "inbounds" keyword on the first GEP. Need
@@ -90,10 +90,10 @@ define void @sum_of_array2(i32 %x, i32 %
ret void
}
; PTX-LABEL: sum_of_array2(
-; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG:%(rd|r)[0-9]+]]{{\]}}
-; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+4{{\]}}
-; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+128{{\]}}
-; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+132{{\]}}
+; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG:%(rd|r)[0-9]+]]{{\]}}
+; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+4{{\]}}
+; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+128{{\]}}
+; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+132{{\]}}
; IR-LABEL: @sum_of_array2(
; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
@@ -140,10 +140,10 @@ define void @sum_of_array3(i32 %x, i32 %
ret void
}
; PTX-LABEL: sum_of_array3(
-; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG:%(rd|r)[0-9]+]]{{\]}}
-; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+4{{\]}}
-; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+128{{\]}}
-; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+132{{\]}}
+; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG:%(rd|r)[0-9]+]]{{\]}}
+; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+4{{\]}}
+; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+128{{\]}}
+; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+132{{\]}}
; IR-LABEL: @sum_of_array3(
; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
@@ -186,10 +186,10 @@ define void @sum_of_array4(i32 %x, i32 %
ret void
}
; PTX-LABEL: sum_of_array4(
-; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG:%(rd|r)[0-9]+]]{{\]}}
-; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+4{{\]}}
-; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+128{{\]}}
-; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+132{{\]}}
+; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG:%(rd|r)[0-9]+]]{{\]}}
+; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+4{{\]}}
+; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+128{{\]}}
+; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+132{{\]}}
; IR-LABEL: @sum_of_array4(
; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
More information about the llvm-commits
mailing list