r329411 - [OPENMP, NVPTX] Fix codegen for the teams reduction.
Alexey Bataev via cfe-commits
cfe-commits at lists.llvm.org
Fri Apr 6 09:03:36 PDT 2018
Author: abataev
Date: Fri Apr 6 09:03:36 2018
New Revision: 329411
URL: http://llvm.org/viewvc/llvm-project?rev=329411&view=rev
Log:
[OPENMP, NVPTX] Fix codegen for the teams reduction.
Added NUW flags for all the add|mul|sub operations + replaced sdiv by udiv
as we operate on unsigned values only (addresses, converted to integers)
Modified:
cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
cfe/trunk/test/OpenMP/nvptx_parallel_codegen.cpp
cfe/trunk/test/OpenMP/nvptx_target_codegen.cpp
cfe/trunk/test/OpenMP/nvptx_target_printf_codegen.c
cfe/trunk/test/OpenMP/nvptx_target_teams_codegen.cpp
cfe/trunk/test/OpenMP/nvptx_teams_reduction_codegen.cpp
Modified: cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp?rev=329411&r1=329410&r2=329411&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp Fri Apr 6 09:03:36 2018
@@ -529,8 +529,8 @@ static llvm::Value *getThreadLimit(CodeG
CGBuilderTy &Bld = CGF.Builder;
return IsInSpmdExecutionMode
? getNVPTXNumThreads(CGF)
- : Bld.CreateSub(getNVPTXNumThreads(CGF), getNVPTXWarpSize(CGF),
- "thread_limit");
+ : Bld.CreateNUWSub(getNVPTXNumThreads(CGF), getNVPTXWarpSize(CGF),
+ "thread_limit");
}
/// Get the thread id of the OMP master thread.
@@ -545,9 +545,9 @@ static llvm::Value *getMasterThreadID(Co
llvm::Value *NumThreads = getNVPTXNumThreads(CGF);
// We assume that the warp size is a power of 2.
- llvm::Value *Mask = Bld.CreateSub(getNVPTXWarpSize(CGF), Bld.getInt32(1));
+ llvm::Value *Mask = Bld.CreateNUWSub(getNVPTXWarpSize(CGF), Bld.getInt32(1));
- return Bld.CreateAnd(Bld.CreateSub(NumThreads, Bld.getInt32(1)),
+ return Bld.CreateAnd(Bld.CreateNUWSub(NumThreads, Bld.getInt32(1)),
Bld.CreateNot(Mask), "master_tid");
}
@@ -1714,13 +1714,11 @@ static void emitReductionListCopy(
// Step 1.2: Get the address for dest element:
// address = base + index * ElementSizeInChars.
- unsigned ElementSizeInChars =
- C.getTypeSizeInChars(Private->getType()).getQuantity();
+ llvm::Value *ElementSizeInChars = CGF.getTypeSize(Private->getType());
auto *CurrentOffset =
- Bld.CreateMul(llvm::ConstantInt::get(CGM.SizeTy, ElementSizeInChars),
- ScratchpadIndex);
+ Bld.CreateNUWMul(ElementSizeInChars, ScratchpadIndex);
auto *ScratchPadElemAbsolutePtrVal =
- Bld.CreateAdd(DestBase.getPointer(), CurrentOffset);
+ Bld.CreateNUWAdd(DestBase.getPointer(), CurrentOffset);
ScratchPadElemAbsolutePtrVal =
Bld.CreateIntToPtr(ScratchPadElemAbsolutePtrVal, CGF.VoidPtrTy);
DestElementAddr = Address(ScratchPadElemAbsolutePtrVal,
@@ -1731,13 +1729,11 @@ static void emitReductionListCopy(
case ScratchpadToThread: {
// Step 1.1: Get the address for the src element in the scratchpad.
// address = base + index * ElementSizeInChars.
- unsigned ElementSizeInChars =
- C.getTypeSizeInChars(Private->getType()).getQuantity();
+ llvm::Value *ElementSizeInChars = CGF.getTypeSize(Private->getType());
auto *CurrentOffset =
- Bld.CreateMul(llvm::ConstantInt::get(CGM.SizeTy, ElementSizeInChars),
- ScratchpadIndex);
+ Bld.CreateNUWMul(ElementSizeInChars, ScratchpadIndex);
auto *ScratchPadElemAbsolutePtrVal =
- Bld.CreateAdd(SrcBase.getPointer(), CurrentOffset);
+ Bld.CreateNUWAdd(SrcBase.getPointer(), CurrentOffset);
ScratchPadElemAbsolutePtrVal =
Bld.CreateIntToPtr(ScratchPadElemAbsolutePtrVal, CGF.VoidPtrTy);
SrcElementAddr = Address(ScratchPadElemAbsolutePtrVal,
@@ -1796,22 +1792,20 @@ static void emitReductionListCopy(
if ((IncrScratchpadDest || IncrScratchpadSrc) && (Idx + 1 < Size)) {
llvm::Value *ScratchpadBasePtr =
IncrScratchpadDest ? DestBase.getPointer() : SrcBase.getPointer();
- unsigned ElementSizeInChars =
- C.getTypeSizeInChars(Private->getType()).getQuantity();
- ScratchpadBasePtr = Bld.CreateAdd(
+ llvm::Value *ElementSizeInChars = CGF.getTypeSize(Private->getType());
+ ScratchpadBasePtr = Bld.CreateNUWAdd(
ScratchpadBasePtr,
- Bld.CreateMul(ScratchpadWidth, llvm::ConstantInt::get(
- CGM.SizeTy, ElementSizeInChars)));
+ Bld.CreateNUWMul(ScratchpadWidth, ElementSizeInChars));
// Take care of global memory alignment for performance
- ScratchpadBasePtr = Bld.CreateSub(ScratchpadBasePtr,
- llvm::ConstantInt::get(CGM.SizeTy, 1));
- ScratchpadBasePtr = Bld.CreateSDiv(
+ ScratchpadBasePtr = Bld.CreateNUWSub(
+ ScratchpadBasePtr, llvm::ConstantInt::get(CGM.SizeTy, 1));
+ ScratchpadBasePtr = Bld.CreateUDiv(
ScratchpadBasePtr,
llvm::ConstantInt::get(CGM.SizeTy, GlobalMemoryAlignment));
- ScratchpadBasePtr = Bld.CreateAdd(ScratchpadBasePtr,
- llvm::ConstantInt::get(CGM.SizeTy, 1));
- ScratchpadBasePtr = Bld.CreateMul(
+ ScratchpadBasePtr = Bld.CreateNUWAdd(
+ ScratchpadBasePtr, llvm::ConstantInt::get(CGM.SizeTy, 1));
+ ScratchpadBasePtr = Bld.CreateNUWMul(
ScratchpadBasePtr,
llvm::ConstantInt::get(CGM.SizeTy, GlobalMemoryAlignment));
Modified: cfe/trunk/test/OpenMP/nvptx_parallel_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/nvptx_parallel_codegen.cpp?rev=329411&r1=329410&r2=329411&view=diff
==============================================================================
--- cfe/trunk/test/OpenMP/nvptx_parallel_codegen.cpp (original)
+++ cfe/trunk/test/OpenMP/nvptx_parallel_codegen.cpp Fri Apr 6 09:03:36 2018
@@ -127,7 +127,7 @@ int bar(int n){
// CHECK-DAG: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
// CHECK-DAG: [[NTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
// CHECK-DAG: [[WS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
-// CHECK-DAG: [[TH_LIMIT:%.+]] = sub i32 [[NTH]], [[WS]]
+// CHECK-DAG: [[TH_LIMIT:%.+]] = sub nuw i32 [[NTH]], [[WS]]
// CHECK: [[IS_WORKER:%.+]] = icmp ult i32 [[TID]], [[TH_LIMIT]]
// CHECK: br i1 [[IS_WORKER]], label {{%?}}[[WORKER:.+]], label {{%?}}[[CHECK_MASTER:.+]]
//
@@ -145,7 +145,7 @@ int bar(int n){
// CHECK: [[MASTER]]
// CHECK-DAG: [[MNTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
// CHECK-DAG: [[MWS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
-// CHECK: [[MTMP1:%.+]] = sub i32 [[MNTH]], [[MWS]]
+// CHECK: [[MTMP1:%.+]] = sub nuw i32 [[MNTH]], [[MWS]]
// CHECK: call void @__kmpc_kernel_init(i32 [[MTMP1]]
// CHECK: call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void (i16, i32)* [[PARALLEL_FN1]]_wrapper to i8*),
// CHECK: call void @llvm.nvvm.barrier0()
@@ -246,7 +246,7 @@ int bar(int n){
// CHECK-DAG: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
// CHECK-DAG: [[NTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
// CHECK-DAG: [[WS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
-// CHECK-DAG: [[TH_LIMIT:%.+]] = sub i32 [[NTH]], [[WS]]
+// CHECK-DAG: [[TH_LIMIT:%.+]] = sub nuw i32 [[NTH]], [[WS]]
// CHECK: [[IS_WORKER:%.+]] = icmp ult i32 [[TID]], [[TH_LIMIT]]
// CHECK: br i1 [[IS_WORKER]], label {{%?}}[[WORKER:.+]], label {{%?}}[[CHECK_MASTER:.+]]
//
@@ -264,7 +264,7 @@ int bar(int n){
// CHECK: [[MASTER]]
// CHECK-DAG: [[MNTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
// CHECK-DAG: [[MWS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
-// CHECK: [[MTMP1:%.+]] = sub i32 [[MNTH]], [[MWS]]
+// CHECK: [[MTMP1:%.+]] = sub nuw i32 [[MNTH]], [[MWS]]
// CHECK: call void @__kmpc_kernel_init(i32 [[MTMP1]]
// CHECK-64: [[N:%.+]] = load i32, i32* [[REF_N]],
// CHECK-32: [[N:%.+]] = load i32, i32* [[LOCAL_N]],
Modified: cfe/trunk/test/OpenMP/nvptx_target_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/nvptx_target_codegen.cpp?rev=329411&r1=329410&r2=329411&view=diff
==============================================================================
--- cfe/trunk/test/OpenMP/nvptx_target_codegen.cpp (original)
+++ cfe/trunk/test/OpenMP/nvptx_target_codegen.cpp Fri Apr 6 09:03:36 2018
@@ -68,7 +68,7 @@ int foo(int n) {
// CHECK-DAG: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
// CHECK-DAG: [[NTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
// CHECK-DAG: [[WS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
- // CHECK-DAG: [[TH_LIMIT:%.+]] = sub i32 [[NTH]], [[WS]]
+ // CHECK-DAG: [[TH_LIMIT:%.+]] = sub nuw i32 [[NTH]], [[WS]]
// CHECK: [[IS_WORKER:%.+]] = icmp ult i32 [[TID]], [[TH_LIMIT]]
// CHECK: br i1 [[IS_WORKER]], label {{%?}}[[WORKER:.+]], label {{%?}}[[CHECK_MASTER:.+]]
//
@@ -86,7 +86,7 @@ int foo(int n) {
// CHECK: [[MASTER]]
// CHECK-DAG: [[MNTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
// CHECK-DAG: [[MWS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
- // CHECK: [[MTMP1:%.+]] = sub i32 [[MNTH]], [[MWS]]
+ // CHECK: [[MTMP1:%.+]] = sub nuw i32 [[MNTH]], [[MWS]]
// CHECK: call void @__kmpc_kernel_init(i32 [[MTMP1]]
// CHECK: br label {{%?}}[[TERMINATE:.+]]
//
@@ -144,7 +144,7 @@ int foo(int n) {
// CHECK-DAG: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
// CHECK-DAG: [[NTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
// CHECK-DAG: [[WS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
- // CHECK-DAG: [[TH_LIMIT:%.+]] = sub i32 [[NTH]], [[WS]]
+ // CHECK-DAG: [[TH_LIMIT:%.+]] = sub nuw i32 [[NTH]], [[WS]]
// CHECK: [[IS_WORKER:%.+]] = icmp ult i32 [[TID]], [[TH_LIMIT]]
// CHECK: br i1 [[IS_WORKER]], label {{%?}}[[WORKER:.+]], label {{%?}}[[CHECK_MASTER:.+]]
//
@@ -162,7 +162,7 @@ int foo(int n) {
// CHECK: [[MASTER]]
// CHECK-DAG: [[MNTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
// CHECK-DAG: [[MWS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
- // CHECK: [[MTMP1:%.+]] = sub i32 [[MNTH]], [[MWS]]
+ // CHECK: [[MTMP1:%.+]] = sub nuw i32 [[MNTH]], [[MWS]]
// CHECK: call void @__kmpc_kernel_init(i32 [[MTMP1]]
// CHECK: load i16, i16* [[AA_CADDR]],
// CHECK: br label {{%?}}[[TERMINATE:.+]]
@@ -245,7 +245,7 @@ int foo(int n) {
// CHECK-DAG: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
// CHECK-DAG: [[NTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
// CHECK-DAG: [[WS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
- // CHECK-DAG: [[TH_LIMIT:%.+]] = sub i32 [[NTH]], [[WS]]
+ // CHECK-DAG: [[TH_LIMIT:%.+]] = sub nuw i32 [[NTH]], [[WS]]
// CHECK: [[IS_WORKER:%.+]] = icmp ult i32 [[TID]], [[TH_LIMIT]]
// CHECK: br i1 [[IS_WORKER]], label {{%?}}[[WORKER:.+]], label {{%?}}[[CHECK_MASTER:.+]]
//
@@ -263,7 +263,7 @@ int foo(int n) {
// CHECK: [[MASTER]]
// CHECK-DAG: [[MNTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
// CHECK-DAG: [[MWS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
- // CHECK: [[MTMP1:%.+]] = sub i32 [[MNTH]], [[MWS]]
+ // CHECK: [[MTMP1:%.+]] = sub nuw i32 [[MNTH]], [[MWS]]
// CHECK: call void @__kmpc_kernel_init(i32 [[MTMP1]]
//
// Use captures.
@@ -414,7 +414,7 @@ int bar(int n){
// CHECK-DAG: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
// CHECK-DAG: [[NTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
// CHECK-DAG: [[WS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
- // CHECK-DAG: [[TH_LIMIT:%.+]] = sub i32 [[NTH]], [[WS]]
+ // CHECK-DAG: [[TH_LIMIT:%.+]] = sub nuw i32 [[NTH]], [[WS]]
// CHECK: [[IS_WORKER:%.+]] = icmp ult i32 [[TID]], [[TH_LIMIT]]
// CHECK: br i1 [[IS_WORKER]], label {{%?}}[[WORKER:.+]], label {{%?}}[[CHECK_MASTER:.+]]
//
@@ -432,7 +432,7 @@ int bar(int n){
// CHECK: [[MASTER]]
// CHECK-DAG: [[MNTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
// CHECK-DAG: [[MWS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
- // CHECK: [[MTMP1:%.+]] = sub i32 [[MNTH]], [[MWS]]
+ // CHECK: [[MTMP1:%.+]] = sub nuw i32 [[MNTH]], [[MWS]]
// CHECK: call void @__kmpc_kernel_init(i32 [[MTMP1]]
// CHECK-64-DAG: load i32, i32* [[REF_A]]
// CHECK-32-DAG: load i32, i32* [[LOCAL_A]]
@@ -503,7 +503,7 @@ int bar(int n){
// CHECK-DAG: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
// CHECK-DAG: [[NTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
// CHECK-DAG: [[WS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
- // CHECK-DAG: [[TH_LIMIT:%.+]] = sub i32 [[NTH]], [[WS]]
+ // CHECK-DAG: [[TH_LIMIT:%.+]] = sub nuw i32 [[NTH]], [[WS]]
// CHECK: [[IS_WORKER:%.+]] = icmp ult i32 [[TID]], [[TH_LIMIT]]
// CHECK: br i1 [[IS_WORKER]], label {{%?}}[[WORKER:.+]], label {{%?}}[[CHECK_MASTER:.+]]
//
@@ -521,7 +521,7 @@ int bar(int n){
// CHECK: [[MASTER]]
// CHECK-DAG: [[MNTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
// CHECK-DAG: [[MWS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
- // CHECK: [[MTMP1:%.+]] = sub i32 [[MNTH]], [[MWS]]
+ // CHECK: [[MTMP1:%.+]] = sub nuw i32 [[MNTH]], [[MWS]]
// CHECK: call void @__kmpc_kernel_init(i32 [[MTMP1]]
// Use captures.
// CHECK-DAG: getelementptr inbounds [[S1]], [[S1]]* [[REF_THIS]], i32 0, i32 0
@@ -587,7 +587,7 @@ int bar(int n){
// CHECK-DAG: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
// CHECK-DAG: [[NTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
// CHECK-DAG: [[WS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
- // CHECK-DAG: [[TH_LIMIT:%.+]] = sub i32 [[NTH]], [[WS]]
+ // CHECK-DAG: [[TH_LIMIT:%.+]] = sub nuw i32 [[NTH]], [[WS]]
// CHECK: [[IS_WORKER:%.+]] = icmp ult i32 [[TID]], [[TH_LIMIT]]
// CHECK: br i1 [[IS_WORKER]], label {{%?}}[[WORKER:.+]], label {{%?}}[[CHECK_MASTER:.+]]
//
@@ -605,7 +605,7 @@ int bar(int n){
// CHECK: [[MASTER]]
// CHECK-DAG: [[MNTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
// CHECK-DAG: [[MWS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
- // CHECK: [[MTMP1:%.+]] = sub i32 [[MNTH]], [[MWS]]
+ // CHECK: [[MTMP1:%.+]] = sub nuw i32 [[MNTH]], [[MWS]]
// CHECK: call void @__kmpc_kernel_init(i32 [[MTMP1]]
//
// CHECK-64-DAG: load i32, i32* [[REF_A]]
Modified: cfe/trunk/test/OpenMP/nvptx_target_printf_codegen.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/nvptx_target_printf_codegen.c?rev=329411&r1=329410&r2=329411&view=diff
==============================================================================
--- cfe/trunk/test/OpenMP/nvptx_target_printf_codegen.c (original)
+++ cfe/trunk/test/OpenMP/nvptx_target_printf_codegen.c Fri Apr 6 09:03:36 2018
@@ -29,7 +29,7 @@ int CheckSimple() {
// CHECK: [[MASTER]]
// CHECK-DAG: [[MNTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
// CHECK-DAG: [[MWS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
- // CHECK: [[MTMP1:%.+]] = sub i32 [[MNTH]], [[MWS]]
+ // CHECK: [[MTMP1:%.+]] = sub nuw i32 [[MNTH]], [[MWS]]
// CHECK: call void @__kmpc_kernel_init(i32 [[MTMP1]]
// printf in master-only basic block.
@@ -69,7 +69,7 @@ void CheckNoArgs() {
// CHECK: [[MASTER]]
// CHECK-DAG: [[MNTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
// CHECK-DAG: [[MWS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
- // CHECK: [[MTMP1:%.+]] = sub i32 [[MNTH]], [[MWS]]
+ // CHECK: [[MTMP1:%.+]] = sub nuw i32 [[MNTH]], [[MWS]]
// CHECK: call void @__kmpc_kernel_init(i32 [[MTMP1]]
// printf in master-only basic block.
@@ -102,7 +102,7 @@ void CheckAllocaIsInEntryBlock() {
// CHECK: [[MASTER]]
// CHECK-DAG: [[MNTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
// CHECK-DAG: [[MWS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
- // CHECK: [[MTMP1:%.+]] = sub i32 [[MNTH]], [[MWS]]
+ // CHECK: [[MTMP1:%.+]] = sub nuw i32 [[MNTH]], [[MWS]]
// CHECK: call void @__kmpc_kernel_init(i32 [[MTMP1]]
if (foo) {
Modified: cfe/trunk/test/OpenMP/nvptx_target_teams_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/nvptx_target_teams_codegen.cpp?rev=329411&r1=329410&r2=329411&view=diff
==============================================================================
--- cfe/trunk/test/OpenMP/nvptx_target_teams_codegen.cpp (original)
+++ cfe/trunk/test/OpenMP/nvptx_target_teams_codegen.cpp Fri Apr 6 09:03:36 2018
@@ -93,7 +93,7 @@ int bar(int n){
// CHECK-DAG: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
// CHECK-DAG: [[NTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
// CHECK-DAG: [[WS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
- // CHECK-DAG: [[TH_LIMIT:%.+]] = sub i32 [[NTH]], [[WS]]
+ // CHECK-DAG: [[TH_LIMIT:%.+]] = sub nuw i32 [[NTH]], [[WS]]
// CHECK: [[IS_WORKER:%.+]] = icmp ult i32 [[TID]], [[TH_LIMIT]]
// CHECK: br i1 [[IS_WORKER]], label {{%?}}[[WORKER:.+]], label {{%?}}[[CHECK_MASTER:.+]]
//
@@ -111,7 +111,7 @@ int bar(int n){
// CHECK: [[MASTER]]
// CHECK-DAG: [[MNTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
// CHECK-DAG: [[MWS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
- // CHECK: [[MTMP1:%.+]] = sub i32 [[MNTH]], [[MWS]]
+ // CHECK: [[MTMP1:%.+]] = sub nuw i32 [[MNTH]], [[MWS]]
// CHECK: call void @__kmpc_kernel_init(i32 [[MTMP1]]
//
// CHECK-NOT: kmpc_fork_teams
@@ -179,7 +179,7 @@ int bar(int n){
// CHECK-DAG: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
// CHECK-DAG: [[NTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
// CHECK-DAG: [[WS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
- // CHECK-DAG: [[TH_LIMIT:%.+]] = sub i32 [[NTH]], [[WS]]
+ // CHECK-DAG: [[TH_LIMIT:%.+]] = sub nuw i32 [[NTH]], [[WS]]
// CHECK: [[IS_WORKER:%.+]] = icmp ult i32 [[TID]], [[TH_LIMIT]]
// CHECK: br i1 [[IS_WORKER]], label {{%?}}[[WORKER:.+]], label {{%?}}[[CHECK_MASTER:.+]]
//
@@ -197,7 +197,7 @@ int bar(int n){
// CHECK: [[MASTER]]
// CHECK-DAG: [[MNTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
// CHECK-DAG: [[MWS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
- // CHECK: [[MTMP1:%.+]] = sub i32 [[MNTH]], [[MWS]]
+ // CHECK: [[MTMP1:%.+]] = sub nuw i32 [[MNTH]], [[MWS]]
// CHECK: call void @__kmpc_kernel_init(i32 [[MTMP1]]
//
// CHECK-NOT: kmpc_fork_teams
Modified: cfe/trunk/test/OpenMP/nvptx_teams_reduction_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/nvptx_teams_reduction_codegen.cpp?rev=329411&r1=329410&r2=329411&view=diff
==============================================================================
--- cfe/trunk/test/OpenMP/nvptx_teams_reduction_codegen.cpp (original)
+++ cfe/trunk/test/OpenMP/nvptx_teams_reduction_codegen.cpp Fri Apr 6 09:03:36 2018
@@ -246,8 +246,8 @@ int bar(int n){
// CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i[[SZ]] 0, i[[SZ]] 0
// CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]],
//
- // CHECK: [[P:%.+]] = mul i[[SZ]] 8, [[TEAM]]
- // CHECK: [[SCRATCHPAD_ELT_PTR64:%.+]] = add i[[SZ]] [[SCRATCHPAD]], [[P]]
+ // CHECK: [[P:%.+]] = mul nuw i[[SZ]] 8, [[TEAM]]
+ // CHECK: [[SCRATCHPAD_ELT_PTR64:%.+]] = add nuw i[[SZ]] [[SCRATCHPAD]], [[P]]
// CHECK: [[SCRATCHPAD_ELT_PTR_VOID:%.+]] = inttoptr i[[SZ]] [[SCRATCHPAD_ELT_PTR64]] to i8*
// CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to double*
// CHECK: [[ELT_VAL:%.+]] = load double, double* [[ELT]], align
@@ -272,8 +272,8 @@ int bar(int n){
// CHECK: [[SHOULD_REDUCE:%.+]] = load i32, i32* {{.+}}, align
// CHECK: [[SCRATCHPAD:%.+]] = ptrtoint i8* [[SCRATCHPAD_PTR]] to i[[SZ]]
//
- // CHECK: [[P:%.+]] = mul i[[SZ]] 8, [[TEAM]]
- // CHECK: [[SCRATCHPAD_ELT_PTR64:%.+]] = add i[[SZ]] [[SCRATCHPAD]], [[P]]
+ // CHECK: [[P:%.+]] = mul nuw i[[SZ]] 8, [[TEAM]]
+ // CHECK: [[SCRATCHPAD_ELT_PTR64:%.+]] = add nuw i[[SZ]] [[SCRATCHPAD]], [[P]]
// CHECK: [[SCRATCHPAD_ELT_PTR_VOID:%.+]] = inttoptr i[[SZ]] [[SCRATCHPAD_ELT_PTR64]] to i8*
// CHECK: [[REMOTE_ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[REMOTE_RED_LIST]], i[[SZ]] 0, i[[SZ]] 0
@@ -588,24 +588,24 @@ int bar(int n){
// CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i[[SZ]] 0, i[[SZ]] 0
// CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]],
//
- // CHECK: [[P:%.+]] = mul i[[SZ]] 1, [[TEAM]]
- // CHECK: [[SCRATCHPAD_ELT_PTR64:%.+]] = add i[[SZ]] [[SCRATCHPAD]], [[P]]
+ // CHECK: [[P:%.+]] = mul nuw i[[SZ]] 1, [[TEAM]]
+ // CHECK: [[SCRATCHPAD_ELT_PTR64:%.+]] = add nuw i[[SZ]] [[SCRATCHPAD]], [[P]]
// CHECK: [[SCRATCHPAD_ELT_PTR:%.+]] = inttoptr i[[SZ]] [[SCRATCHPAD_ELT_PTR64]] to i8*
// CHECK: [[ELT_VAL:%.+]] = load i8, i8* [[ELT_VOID]], align
// CHECK: store i8 [[ELT_VAL]], i8* [[SCRATCHPAD_ELT_PTR]], align
//
- // CHECK: [[OF:%.+]] = mul i[[SZ]] [[NUM_TEAMS]], 1
- // CHECK: [[POS1:%.+]] = add i[[SZ]] [[SCRATCHPAD]], [[OF]]
- // CHECK: [[POS2:%.+]] = sub i[[SZ]] [[POS1]], 1
- // CHECK: [[POS3:%.+]] = sdiv i[[SZ]] [[POS2]], 256
- // CHECK: [[POS4:%.+]] = add i[[SZ]] [[POS3]], 1
- // CHECK: [[SCRATCHPAD_NEXT:%.+]] = mul i[[SZ]] [[POS4]], 256
+ // CHECK: [[OF:%.+]] = mul nuw i[[SZ]] [[NUM_TEAMS]], 1
+ // CHECK: [[POS1:%.+]] = add nuw i[[SZ]] [[SCRATCHPAD]], [[OF]]
+ // CHECK: [[POS2:%.+]] = sub nuw i[[SZ]] [[POS1]], 1
+ // CHECK: [[POS3:%.+]] = udiv i[[SZ]] [[POS2]], 256
+ // CHECK: [[POS4:%.+]] = add nuw i[[SZ]] [[POS3]], 1
+ // CHECK: [[SCRATCHPAD_NEXT:%.+]] = mul nuw i[[SZ]] [[POS4]], 256
//
// CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i[[SZ]] 0, i[[SZ]] 1
// CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]],
//
- // CHECK: [[P:%.+]] = mul i[[SZ]] 4, [[TEAM]]
- // CHECK: [[SCRATCHPAD_ELT_PTR64:%.+]] = add i[[SZ]] [[SCRATCHPAD_NEXT]], [[P]]
+ // CHECK: [[P:%.+]] = mul nuw i[[SZ]] 4, [[TEAM]]
+ // CHECK: [[SCRATCHPAD_ELT_PTR64:%.+]] = add nuw i[[SZ]] [[SCRATCHPAD_NEXT]], [[P]]
// CHECK: [[SCRATCHPAD_ELT_PTR_VOID:%.+]] = inttoptr i[[SZ]] [[SCRATCHPAD_ELT_PTR64]] to i8*
// CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to float*
// CHECK: [[ELT_VAL:%.+]] = load float, float* [[ELT]], align
@@ -631,8 +631,8 @@ int bar(int n){
// CHECK: [[SHOULD_REDUCE:%.+]] = load i32, i32* {{.+}}, align
// CHECK: [[SCRATCHPAD:%.+]] = ptrtoint i8* [[SCRATCHPAD_PTR]] to i[[SZ]]
//
- // CHECK: [[P:%.+]] = mul i[[SZ]] 1, [[TEAM]]
- // CHECK: [[SCRATCHPAD_ELT_PTR64:%.+]] = add i[[SZ]] [[SCRATCHPAD]], [[P]]
+ // CHECK: [[P:%.+]] = mul nuw i[[SZ]] 1, [[TEAM]]
+ // CHECK: [[SCRATCHPAD_ELT_PTR64:%.+]] = add nuw i[[SZ]] [[SCRATCHPAD]], [[P]]
// CHECK: [[SCRATCHPAD_ELT_PTR_VOID:%.+]] = inttoptr i[[SZ]] [[SCRATCHPAD_ELT_PTR64]] to i8*
// CHECK: [[REMOTE_ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[REMOTE_RED_LIST]], i[[SZ]] 0, i[[SZ]] 0
@@ -640,15 +640,15 @@ int bar(int n){
// CHECK: store i8 [[REMOTE_ELT_VAL]], i8* [[REMOTE_ELT1]], align
// CHECK: store i8* [[REMOTE_ELT1]], i8** [[REMOTE_ELT_REF]], align
//
- // CHECK: [[OF:%.+]] = mul i[[SZ]] [[NUM_TEAMS]], 1
- // CHECK: [[POS1:%.+]] = add i[[SZ]] [[SCRATCHPAD]], [[OF]]
- // CHECK: [[POS2:%.+]] = sub i[[SZ]] [[POS1]], 1
- // CHECK: [[POS3:%.+]] = sdiv i[[SZ]] [[POS2]], 256
- // CHECK: [[POS4:%.+]] = add i[[SZ]] [[POS3]], 1
- // CHECK: [[SCRATCHPAD_NEXT:%.+]] = mul i[[SZ]] [[POS4]], 256
+ // CHECK: [[OF:%.+]] = mul nuw i[[SZ]] [[NUM_TEAMS]], 1
+ // CHECK: [[POS1:%.+]] = add nuw i[[SZ]] [[SCRATCHPAD]], [[OF]]
+ // CHECK: [[POS2:%.+]] = sub nuw i[[SZ]] [[POS1]], 1
+ // CHECK: [[POS3:%.+]] = udiv i[[SZ]] [[POS2]], 256
+ // CHECK: [[POS4:%.+]] = add nuw i[[SZ]] [[POS3]], 1
+ // CHECK: [[SCRATCHPAD_NEXT:%.+]] = mul nuw i[[SZ]] [[POS4]], 256
//
- // CHECK: [[P:%.+]] = mul i[[SZ]] 4, [[TEAM]]
- // CHECK: [[SCRATCHPAD_ELT_PTR64:%.+]] = add i[[SZ]] [[SCRATCHPAD_NEXT]], [[P]]
+ // CHECK: [[P:%.+]] = mul nuw i[[SZ]] 4, [[TEAM]]
+ // CHECK: [[SCRATCHPAD_ELT_PTR64:%.+]] = add nuw i[[SZ]] [[SCRATCHPAD_NEXT]], [[P]]
// CHECK: [[SCRATCHPAD_ELT_PTR_VOID:%.+]] = inttoptr i[[SZ]] [[SCRATCHPAD_ELT_PTR64]] to i8*
// CHECK: [[REMOTE_ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[REMOTE_RED_LIST]], i[[SZ]] 0, i[[SZ]] 1
@@ -1013,26 +1013,26 @@ int bar(int n){
// CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i[[SZ]] 0, i[[SZ]] 0
// CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]],
//
- // CHECK: [[P:%.+]] = mul i[[SZ]] 4, [[TEAM]]
- // CHECK: [[SCRATCHPAD_ELT_PTR64:%.+]] = add i[[SZ]] [[SCRATCHPAD]], [[P]]
+ // CHECK: [[P:%.+]] = mul nuw i[[SZ]] 4, [[TEAM]]
+ // CHECK: [[SCRATCHPAD_ELT_PTR64:%.+]] = add nuw i[[SZ]] [[SCRATCHPAD]], [[P]]
// CHECK: [[SCRATCHPAD_ELT_PTR_VOID:%.+]] = inttoptr i[[SZ]] [[SCRATCHPAD_ELT_PTR64]] to i8*
// CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i32*
// CHECK: [[ELT_VAL:%.+]] = load i32, i32* [[ELT]], align
// CHECK: [[SCRATCHPAD_ELT_PTR:%.+]] = bitcast i8* [[SCRATCHPAD_ELT_PTR_VOID]] to i32*
// CHECK: store i32 [[ELT_VAL]], i32* [[SCRATCHPAD_ELT_PTR]], align
//
- // CHECK: [[OF:%.+]] = mul i[[SZ]] [[NUM_TEAMS]], 4
- // CHECK: [[POS1:%.+]] = add i[[SZ]] [[SCRATCHPAD]], [[OF]]
- // CHECK: [[POS2:%.+]] = sub i[[SZ]] [[POS1]], 1
- // CHECK: [[POS3:%.+]] = sdiv i[[SZ]] [[POS2]], 256
- // CHECK: [[POS4:%.+]] = add i[[SZ]] [[POS3]], 1
- // CHECK: [[SCRATCHPAD_NEXT:%.+]] = mul i[[SZ]] [[POS4]], 256
+ // CHECK: [[OF:%.+]] = mul nuw i[[SZ]] [[NUM_TEAMS]], 4
+ // CHECK: [[POS1:%.+]] = add nuw i[[SZ]] [[SCRATCHPAD]], [[OF]]
+ // CHECK: [[POS2:%.+]] = sub nuw i[[SZ]] [[POS1]], 1
+ // CHECK: [[POS3:%.+]] = udiv i[[SZ]] [[POS2]], 256
+ // CHECK: [[POS4:%.+]] = add nuw i[[SZ]] [[POS3]], 1
+ // CHECK: [[SCRATCHPAD_NEXT:%.+]] = mul nuw i[[SZ]] [[POS4]], 256
//
// CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i[[SZ]] 0, i[[SZ]] 1
// CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]],
//
- // CHECK: [[P:%.+]] = mul i[[SZ]] 2, [[TEAM]]
- // CHECK: [[SCRATCHPAD_ELT_PTR64:%.+]] = add i[[SZ]] [[SCRATCHPAD_NEXT]], [[P]]
+ // CHECK: [[P:%.+]] = mul nuw i[[SZ]] 2, [[TEAM]]
+ // CHECK: [[SCRATCHPAD_ELT_PTR64:%.+]] = add nuw i[[SZ]] [[SCRATCHPAD_NEXT]], [[P]]
// CHECK: [[SCRATCHPAD_ELT_PTR_VOID:%.+]] = inttoptr i[[SZ]] [[SCRATCHPAD_ELT_PTR64]] to i8*
// CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i16*
// CHECK: [[ELT_VAL:%.+]] = load i16, i16* [[ELT]], align
@@ -1058,8 +1058,8 @@ int bar(int n){
// CHECK: [[SHOULD_REDUCE:%.+]] = load i32, i32* {{.+}}, align
// CHECK: [[SCRATCHPAD:%.+]] = ptrtoint i8* [[SCRATCHPAD_PTR]] to i[[SZ]]
//
- // CHECK: [[P:%.+]] = mul i[[SZ]] 4, [[TEAM]]
- // CHECK: [[SCRATCHPAD_ELT_PTR64:%.+]] = add i[[SZ]] [[SCRATCHPAD]], [[P]]
+ // CHECK: [[P:%.+]] = mul nuw i[[SZ]] 4, [[TEAM]]
+ // CHECK: [[SCRATCHPAD_ELT_PTR64:%.+]] = add nuw i[[SZ]] [[SCRATCHPAD]], [[P]]
// CHECK: [[SCRATCHPAD_ELT_PTR_VOID:%.+]] = inttoptr i[[SZ]] [[SCRATCHPAD_ELT_PTR64]] to i8*
// CHECK: [[REMOTE_ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[REMOTE_RED_LIST]], i[[SZ]] 0, i[[SZ]] 0
@@ -1069,15 +1069,15 @@ int bar(int n){
// CHECK: [[REMOTE_ELT1_PTR:%.+]] = bitcast i32* [[REMOTE_ELT1]] to i8*
// CHECK: store i8* [[REMOTE_ELT1_PTR]], i8** [[REMOTE_ELT_REF]], align
//
- // CHECK: [[OF:%.+]] = mul i[[SZ]] [[NUM_TEAMS]], 4
- // CHECK: [[POS1:%.+]] = add i[[SZ]] [[SCRATCHPAD]], [[OF]]
- // CHECK: [[POS2:%.+]] = sub i[[SZ]] [[POS1]], 1
- // CHECK: [[POS3:%.+]] = sdiv i[[SZ]] [[POS2]], 256
- // CHECK: [[POS4:%.+]] = add i[[SZ]] [[POS3]], 1
- // CHECK: [[SCRATCHPAD_NEXT:%.+]] = mul i[[SZ]] [[POS4]], 256
+ // CHECK: [[OF:%.+]] = mul nuw i[[SZ]] [[NUM_TEAMS]], 4
+ // CHECK: [[POS1:%.+]] = add nuw i[[SZ]] [[SCRATCHPAD]], [[OF]]
+ // CHECK: [[POS2:%.+]] = sub nuw i[[SZ]] [[POS1]], 1
+ // CHECK: [[POS3:%.+]] = udiv i[[SZ]] [[POS2]], 256
+ // CHECK: [[POS4:%.+]] = add nuw i[[SZ]] [[POS3]], 1
+ // CHECK: [[SCRATCHPAD_NEXT:%.+]] = mul nuw i[[SZ]] [[POS4]], 256
//
- // CHECK: [[P:%.+]] = mul i[[SZ]] 2, [[TEAM]]
- // CHECK: [[SCRATCHPAD_ELT_PTR64:%.+]] = add i[[SZ]] [[SCRATCHPAD_NEXT]], [[P]]
+ // CHECK: [[P:%.+]] = mul nuw i[[SZ]] 2, [[TEAM]]
+ // CHECK: [[SCRATCHPAD_ELT_PTR64:%.+]] = add nuw i[[SZ]] [[SCRATCHPAD_NEXT]], [[P]]
// CHECK: [[SCRATCHPAD_ELT_PTR_VOID:%.+]] = inttoptr i[[SZ]] [[SCRATCHPAD_ELT_PTR64]] to i8*
// CHECK: [[REMOTE_ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[REMOTE_RED_LIST]], i[[SZ]] 0, i[[SZ]] 1
More information about the cfe-commits
mailing list