r321818 - [OPENMP] Fix casting in NVPTX support library.
Alexey Bataev via cfe-commits
cfe-commits at lists.llvm.org
Thu Jan 4 12:18:56 PST 2018
Author: abataev
Date: Thu Jan 4 12:18:55 2018
New Revision: 321818
URL: http://llvm.org/viewvc/llvm-project?rev=321818&view=rev
Log:
[OPENMP] Fix casting in NVPTX support library.
If the reduction required shuffle in the NVPTX codegen, we may need to
cast the reduced value to the integer type. This casting was implemented
incorrectly and may cause compiler crash. Patch fixes this problem.
Modified:
cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
cfe/trunk/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp
cfe/trunk/test/OpenMP/nvptx_teams_reduction_codegen.cpp
Modified: cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp?rev=321818&r1=321817&r2=321818&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp Thu Jan 4 12:18:55 2018
@@ -1059,19 +1059,41 @@ void CGOpenMPRuntimeNVPTX::emitSpmdParal
emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
}
+/// Cast value to the specified type.
+static llvm::Value *
+castValueToType(CodeGenFunction &CGF, llvm::Value *Val, llvm::Type *CastTy,
+ llvm::Optional<bool> IsSigned = llvm::None) {
+ if (Val->getType() == CastTy)
+ return Val;
+ if (Val->getType()->getPrimitiveSizeInBits() > 0 &&
+ CastTy->getPrimitiveSizeInBits() > 0 &&
+ Val->getType()->getPrimitiveSizeInBits() ==
+ CastTy->getPrimitiveSizeInBits())
+ return CGF.Builder.CreateBitCast(Val, CastTy);
+ if (IsSigned.hasValue() && CastTy->isIntegerTy() &&
+ Val->getType()->isIntegerTy())
+ return CGF.Builder.CreateIntCast(Val, CastTy, *IsSigned);
+ Address CastItem = CGF.CreateTempAlloca(
+ CastTy,
+ CharUnits::fromQuantity(
+ CGF.CGM.getDataLayout().getPrefTypeAlignment(Val->getType())));
+ Address ValCastItem = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+ CastItem, Val->getType()->getPointerTo(CastItem.getAddressSpace()));
+ CGF.Builder.CreateStore(Val, ValCastItem);
+ return CGF.Builder.CreateLoad(CastItem);
+}
+
/// This function creates calls to one of two shuffle functions to copy
/// variables between lanes in a warp.
static llvm::Value *createRuntimeShuffleFunction(CodeGenFunction &CGF,
- QualType ElemTy,
llvm::Value *Elem,
llvm::Value *Offset) {
auto &CGM = CGF.CGM;
- auto &C = CGM.getContext();
auto &Bld = CGF.Builder;
CGOpenMPRuntimeNVPTX &RT =
*(static_cast<CGOpenMPRuntimeNVPTX *>(&CGM.getOpenMPRuntime()));
- unsigned Size = CGM.getContext().getTypeSizeInChars(ElemTy).getQuantity();
+ unsigned Size = CGM.getDataLayout().getTypeStoreSize(Elem->getType());
assert(Size <= 8 && "Unsupported bitwidth in shuffle instruction.");
OpenMPRTLFunctionNVPTX ShuffleFn = Size <= 4
@@ -1079,17 +1101,16 @@ static llvm::Value *createRuntimeShuffle
: OMPRTL_NVPTX__kmpc_shuffle_int64;
// Cast all types to 32- or 64-bit values before calling shuffle routines.
- auto CastTy = Size <= 4 ? CGM.Int32Ty : CGM.Int64Ty;
- auto *ElemCast = Bld.CreateSExtOrBitCast(Elem, CastTy);
- auto *WarpSize = CGF.EmitScalarConversion(
- getNVPTXWarpSize(CGF), C.getIntTypeForBitwidth(32, /* Signed */ true),
- C.getIntTypeForBitwidth(16, /* Signed */ true), SourceLocation());
+ llvm::Type *CastTy = Size <= 4 ? CGM.Int32Ty : CGM.Int64Ty;
+ llvm::Value *ElemCast = castValueToType(CGF, Elem, CastTy, /*isSigned=*/true);
+ auto *WarpSize =
+ Bld.CreateIntCast(getNVPTXWarpSize(CGF), CGM.Int16Ty, /*isSigned=*/true);
auto *ShuffledVal =
CGF.EmitRuntimeCall(RT.createNVPTXRuntimeFunction(ShuffleFn),
{ElemCast, Offset, WarpSize});
- return Bld.CreateTruncOrBitCast(ShuffledVal, CGF.ConvertTypeForMem(ElemTy));
+ return castValueToType(CGF, ShuffledVal, Elem->getType(), /*isSigned=*/true);
}
namespace {
@@ -1151,10 +1172,9 @@ static void emitReductionListCopy(
// Step 1.1: Get the address for the src element in the Reduce list.
Address SrcElementPtrAddr =
Bld.CreateConstArrayGEP(SrcBase, Idx, CGF.getPointerSize());
- llvm::Value *SrcElementPtrPtr = CGF.EmitLoadOfScalar(
- SrcElementPtrAddr, /*Volatile=*/false, C.VoidPtrTy, SourceLocation());
- SrcElementAddr =
- Address(SrcElementPtrPtr, C.getTypeAlignInChars(Private->getType()));
+ SrcElementAddr = CGF.EmitLoadOfPointer(
+ SrcElementPtrAddr,
+ C.getPointerType(Private->getType())->castAs<PointerType>());
// Step 1.2: Create a temporary to store the element in the destination
// Reduce list.
@@ -1170,32 +1190,26 @@ static void emitReductionListCopy(
// Step 1.1: Get the address for the src element in the Reduce list.
Address SrcElementPtrAddr =
Bld.CreateConstArrayGEP(SrcBase, Idx, CGF.getPointerSize());
- llvm::Value *SrcElementPtrPtr = CGF.EmitLoadOfScalar(
- SrcElementPtrAddr, /*Volatile=*/false, C.VoidPtrTy, SourceLocation());
- SrcElementAddr =
- Address(SrcElementPtrPtr, C.getTypeAlignInChars(Private->getType()));
+ SrcElementAddr = CGF.EmitLoadOfPointer(
+ SrcElementPtrAddr,
+ C.getPointerType(Private->getType())->castAs<PointerType>());
// Step 1.2: Get the address for dest element. The destination
// element has already been created on the thread's stack.
DestElementPtrAddr =
Bld.CreateConstArrayGEP(DestBase, Idx, CGF.getPointerSize());
- llvm::Value *DestElementPtr =
- CGF.EmitLoadOfScalar(DestElementPtrAddr, /*Volatile=*/false,
- C.VoidPtrTy, SourceLocation());
- Address DestElemAddr =
- Address(DestElementPtr, C.getTypeAlignInChars(Private->getType()));
- DestElementAddr = Bld.CreateElementBitCast(
- DestElemAddr, CGF.ConvertTypeForMem(Private->getType()));
+ DestElementAddr = CGF.EmitLoadOfPointer(
+ DestElementPtrAddr,
+ C.getPointerType(Private->getType())->castAs<PointerType>());
break;
}
case ThreadToScratchpad: {
// Step 1.1: Get the address for the src element in the Reduce list.
Address SrcElementPtrAddr =
Bld.CreateConstArrayGEP(SrcBase, Idx, CGF.getPointerSize());
- llvm::Value *SrcElementPtrPtr = CGF.EmitLoadOfScalar(
- SrcElementPtrAddr, /*Volatile=*/false, C.VoidPtrTy, SourceLocation());
- SrcElementAddr =
- Address(SrcElementPtrPtr, C.getTypeAlignInChars(Private->getType()));
+ SrcElementAddr = CGF.EmitLoadOfPointer(
+ SrcElementPtrAddr,
+ C.getPointerType(Private->getType())->castAs<PointerType>());
// Step 1.2: Get the address for dest element:
// address = base + index * ElementSizeInChars.
@@ -1208,11 +1222,8 @@ static void emitReductionListCopy(
Bld.CreateAdd(DestBase.getPointer(), CurrentOffset);
ScratchPadElemAbsolutePtrVal =
Bld.CreateIntToPtr(ScratchPadElemAbsolutePtrVal, CGF.VoidPtrTy);
- Address ScratchpadPtr =
- Address(ScratchPadElemAbsolutePtrVal,
- C.getTypeAlignInChars(Private->getType()));
- DestElementAddr = Bld.CreateElementBitCast(
- ScratchpadPtr, CGF.ConvertTypeForMem(Private->getType()));
+ DestElementAddr = Address(ScratchPadElemAbsolutePtrVal,
+ C.getTypeAlignInChars(Private->getType()));
IncrScratchpadDest = true;
break;
}
@@ -1253,10 +1264,11 @@ static void emitReductionListCopy(
// Now that all active lanes have read the element in the
// Reduce list, shuffle over the value from the remote lane.
- if (ShuffleInElement) {
- Elem = createRuntimeShuffleFunction(CGF, Private->getType(), Elem,
- RemoteLaneOffset);
- }
+ if (ShuffleInElement)
+ Elem = createRuntimeShuffleFunction(CGF, Elem, RemoteLaneOffset);
+
+ DestElementAddr = Bld.CreateElementBitCast(DestElementAddr,
+ SrcElementAddr.getElementType());
// Store the source element value to the dest element address.
CGF.EmitStoreOfScalar(Elem, DestElementAddr, /*Volatile=*/false,
Modified: cfe/trunk/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp?rev=321818&r1=321817&r2=321818&view=diff
==============================================================================
--- cfe/trunk/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp (original)
+++ cfe/trunk/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp Thu Jan 4 12:18:55 2018
@@ -168,9 +168,9 @@ int bar(int n){
// CHECK: [[REMOTE_ELT_VOID:%.+]] = load i8*, i8** [[REMOTE_ELT_REF]],
// CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i{{32|64}} 0, i{{32|64}} 0
// CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]],
- // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to double*
// CHECK: [[REMOTE_ELT:%.+]] = bitcast i8* [[REMOTE_ELT_VOID]] to double*
// CHECK: [[REMOTE_ELT_VAL:%.+]] = load double, double* [[REMOTE_ELT]], align
+ // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to double*
// CHECK: store double [[REMOTE_ELT_VAL]], double* [[ELT]], align
// CHECK: br label {{%?}}[[COPY_CONT:.+]]
//
@@ -405,9 +405,9 @@ int bar(int n){
// CHECK: [[REMOTE_ELT_VOID:%.+]] = load i8*, i8** [[REMOTE_ELT_REF]],
// CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i{{32|64}} 0, i{{32|64}} 1
// CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]],
- // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to float*
// CHECK: [[REMOTE_ELT:%.+]] = bitcast i8* [[REMOTE_ELT_VOID]] to float*
// CHECK: [[REMOTE_ELT_VAL:%.+]] = load float, float* [[REMOTE_ELT]], align
+ // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to float*
// CHECK: store float [[REMOTE_ELT_VAL]], float* [[ELT]], align
// CHECK: br label {{%?}}[[COPY_CONT:.+]]
//
@@ -714,18 +714,18 @@ int bar(int n){
// CHECK: [[REMOTE_ELT_VOID:%.+]] = load i8*, i8** [[REMOTE_ELT_REF]],
// CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i{{32|64}} 0, i{{32|64}} 0
// CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]],
- // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i32*
// CHECK: [[REMOTE_ELT:%.+]] = bitcast i8* [[REMOTE_ELT_VOID]] to i32*
// CHECK: [[REMOTE_ELT_VAL:%.+]] = load i32, i32* [[REMOTE_ELT]], align
+ // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i32*
// CHECK: store i32 [[REMOTE_ELT_VAL]], i32* [[ELT]], align
//
// CHECK: [[REMOTE_ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[REMOTE_RED_LIST]], i{{32|64}} 0, i{{32|64}} 1
// CHECK: [[REMOTE_ELT_VOID:%.+]] = load i8*, i8** [[REMOTE_ELT_REF]],
// CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i{{32|64}} 0, i{{32|64}} 1
// CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]],
- // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i16*
// CHECK: [[REMOTE_ELT:%.+]] = bitcast i8* [[REMOTE_ELT_VOID]] to i16*
// CHECK: [[REMOTE_ELT_VAL:%.+]] = load i16, i16* [[REMOTE_ELT]], align
+ // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i16*
// CHECK: store i16 [[REMOTE_ELT_VAL]], i16* [[ELT]], align
// CHECK: br label {{%?}}[[COPY_CONT:.+]]
//
Modified: cfe/trunk/test/OpenMP/nvptx_teams_reduction_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/nvptx_teams_reduction_codegen.cpp?rev=321818&r1=321817&r2=321818&view=diff
==============================================================================
--- cfe/trunk/test/OpenMP/nvptx_teams_reduction_codegen.cpp (original)
+++ cfe/trunk/test/OpenMP/nvptx_teams_reduction_codegen.cpp Thu Jan 4 12:18:55 2018
@@ -168,9 +168,9 @@ int bar(int n){
// CHECK: [[REMOTE_ELT_VOID:%.+]] = load i8*, i8** [[REMOTE_ELT_REF]],
// CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i[[SZ]] 0, i[[SZ]] 0
// CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]],
- // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to double*
// CHECK: [[REMOTE_ELT:%.+]] = bitcast i8* [[REMOTE_ELT_VOID]] to double*
// CHECK: [[REMOTE_ELT_VAL:%.+]] = load double, double* [[REMOTE_ELT]], align
+ // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to double*
// CHECK: store double [[REMOTE_ELT_VAL]], double* [[ELT]], align
// CHECK: br label {{%?}}[[COPY_CONT:.+]]
//
@@ -249,9 +249,9 @@ int bar(int n){
// CHECK: [[P:%.+]] = mul i[[SZ]] 8, [[TEAM]]
// CHECK: [[SCRATCHPAD_ELT_PTR64:%.+]] = add i[[SZ]] [[SCRATCHPAD]], [[P]]
// CHECK: [[SCRATCHPAD_ELT_PTR_VOID:%.+]] = inttoptr i[[SZ]] [[SCRATCHPAD_ELT_PTR64]] to i8*
- // CHECK: [[SCRATCHPAD_ELT_PTR:%.+]] = bitcast i8* [[SCRATCHPAD_ELT_PTR_VOID]] to double*
// CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to double*
// CHECK: [[ELT_VAL:%.+]] = load double, double* [[ELT]], align
+ // CHECK: [[SCRATCHPAD_ELT_PTR:%.+]] = bitcast i8* [[SCRATCHPAD_ELT_PTR_VOID]] to double*
// CHECK: store double [[ELT_VAL]], double* [[SCRATCHPAD_ELT_PTR]], align
//
// CHECK: ret
@@ -298,25 +298,15 @@ int bar(int n){
// CHECK: [[REMOTE_ELT_VOID:%.+]] = load i8*, i8** [[REMOTE_ELT_REF]],
// CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST:%.+]], i[[SZ]] 0, i[[SZ]] 0
// CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]],
- // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to double*
// CHECK: [[REMOTE_ELT:%.+]] = bitcast i8* [[REMOTE_ELT_VOID]] to double*
// CHECK: [[REMOTE_ELT_VAL:%.+]] = load double, double* [[REMOTE_ELT]], align
+ // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to double*
// CHECK: store double [[REMOTE_ELT_VAL]], double* [[ELT]], align
// CHECK: br label {{%?}}[[REDUCE_CONT]]
//
// CHECK: [[REDUCE_CONT]]
// CHECK: ret
-
-
-
-
-
-
-
-
-
-
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l33}}_worker()
// CHECK: define {{.*}}void [[T2:@__omp_offloading_.+template.+l33]](
@@ -480,9 +470,9 @@ int bar(int n){
// CHECK: [[REMOTE_ELT_VOID:%.+]] = load i8*, i8** [[REMOTE_ELT_REF]],
// CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i[[SZ]] 0, i[[SZ]] 1
// CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]],
- // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to float*
// CHECK: [[REMOTE_ELT:%.+]] = bitcast i8* [[REMOTE_ELT_VOID]] to float*
// CHECK: [[REMOTE_ELT_VAL:%.+]] = load float, float* [[REMOTE_ELT]], align
+ // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to float*
// CHECK: store float [[REMOTE_ELT_VAL]], float* [[ELT]], align
// CHECK: br label {{%?}}[[COPY_CONT:.+]]
//
@@ -617,9 +607,9 @@ int bar(int n){
// CHECK: [[P:%.+]] = mul i[[SZ]] 4, [[TEAM]]
// CHECK: [[SCRATCHPAD_ELT_PTR64:%.+]] = add i[[SZ]] [[SCRATCHPAD_NEXT]], [[P]]
// CHECK: [[SCRATCHPAD_ELT_PTR_VOID:%.+]] = inttoptr i[[SZ]] [[SCRATCHPAD_ELT_PTR64]] to i8*
- // CHECK: [[SCRATCHPAD_ELT_PTR:%.+]] = bitcast i8* [[SCRATCHPAD_ELT_PTR_VOID]] to float*
// CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to float*
// CHECK: [[ELT_VAL:%.+]] = load float, float* [[ELT]], align
+ // CHECK: [[SCRATCHPAD_ELT_PTR:%.+]] = bitcast i8* [[SCRATCHPAD_ELT_PTR_VOID]] to float*
// CHECK: store float [[ELT_VAL]], float* [[SCRATCHPAD_ELT_PTR]], align
//
// CHECK: ret
@@ -690,24 +680,15 @@ int bar(int n){
// CHECK: [[REMOTE_ELT_VOID:%.+]] = load i8*, i8** [[REMOTE_ELT_REF]],
// CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST:%.+]], i[[SZ]] 0, i[[SZ]] 1
// CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]],
- // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to float*
// CHECK: [[REMOTE_ELT:%.+]] = bitcast i8* [[REMOTE_ELT_VOID]] to float*
// CHECK: [[REMOTE_ELT_VAL:%.+]] = load float, float* [[REMOTE_ELT]], align
+ // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to float*
// CHECK: store float [[REMOTE_ELT_VAL]], float* [[ELT]], align
// CHECK: br label {{%?}}[[REDUCE_CONT]]
//
// CHECK: [[REDUCE_CONT]]
// CHECK: ret
-
-
-
-
-
-
-
-
-
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l40}}_worker()
// CHECK: define {{.*}}void [[T3:@__omp_offloading_.+template.+l40]](
@@ -903,18 +884,18 @@ int bar(int n){
// CHECK: [[REMOTE_ELT_VOID:%.+]] = load i8*, i8** [[REMOTE_ELT_REF]],
// CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i[[SZ]] 0, i[[SZ]] 0
// CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]],
- // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i32*
// CHECK: [[REMOTE_ELT:%.+]] = bitcast i8* [[REMOTE_ELT_VOID]] to i32*
// CHECK: [[REMOTE_ELT_VAL:%.+]] = load i32, i32* [[REMOTE_ELT]], align
+ // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i32*
// CHECK: store i32 [[REMOTE_ELT_VAL]], i32* [[ELT]], align
//
// CHECK: [[REMOTE_ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[REMOTE_RED_LIST]], i[[SZ]] 0, i[[SZ]] 1
// CHECK: [[REMOTE_ELT_VOID:%.+]] = load i8*, i8** [[REMOTE_ELT_REF]],
// CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i[[SZ]] 0, i[[SZ]] 1
// CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]],
- // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i16*
// CHECK: [[REMOTE_ELT:%.+]] = bitcast i8* [[REMOTE_ELT_VOID]] to i16*
// CHECK: [[REMOTE_ELT_VAL:%.+]] = load i16, i16* [[REMOTE_ELT]], align
+ // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i16*
// CHECK: store i16 [[REMOTE_ELT_VAL]], i16* [[ELT]], align
// CHECK: br label {{%?}}[[COPY_CONT:.+]]
//
@@ -1035,9 +1016,9 @@ int bar(int n){
// CHECK: [[P:%.+]] = mul i[[SZ]] 4, [[TEAM]]
// CHECK: [[SCRATCHPAD_ELT_PTR64:%.+]] = add i[[SZ]] [[SCRATCHPAD]], [[P]]
// CHECK: [[SCRATCHPAD_ELT_PTR_VOID:%.+]] = inttoptr i[[SZ]] [[SCRATCHPAD_ELT_PTR64]] to i8*
- // CHECK: [[SCRATCHPAD_ELT_PTR:%.+]] = bitcast i8* [[SCRATCHPAD_ELT_PTR_VOID]] to i32*
// CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i32*
// CHECK: [[ELT_VAL:%.+]] = load i32, i32* [[ELT]], align
+ // CHECK: [[SCRATCHPAD_ELT_PTR:%.+]] = bitcast i8* [[SCRATCHPAD_ELT_PTR_VOID]] to i32*
// CHECK: store i32 [[ELT_VAL]], i32* [[SCRATCHPAD_ELT_PTR]], align
//
// CHECK: [[OF:%.+]] = mul i[[SZ]] [[NUM_TEAMS]], 4
@@ -1053,9 +1034,9 @@ int bar(int n){
// CHECK: [[P:%.+]] = mul i[[SZ]] 2, [[TEAM]]
// CHECK: [[SCRATCHPAD_ELT_PTR64:%.+]] = add i[[SZ]] [[SCRATCHPAD_NEXT]], [[P]]
// CHECK: [[SCRATCHPAD_ELT_PTR_VOID:%.+]] = inttoptr i[[SZ]] [[SCRATCHPAD_ELT_PTR64]] to i8*
- // CHECK: [[SCRATCHPAD_ELT_PTR:%.+]] = bitcast i8* [[SCRATCHPAD_ELT_PTR_VOID]] to i16*
// CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i16*
// CHECK: [[ELT_VAL:%.+]] = load i16, i16* [[ELT]], align
+ // CHECK: [[SCRATCHPAD_ELT_PTR:%.+]] = bitcast i8* [[SCRATCHPAD_ELT_PTR_VOID]] to i16*
// CHECK: store i16 [[ELT_VAL]], i16* [[SCRATCHPAD_ELT_PTR]], align
//
// CHECK: ret
@@ -1121,18 +1102,18 @@ int bar(int n){
// CHECK: [[REMOTE_ELT_VOID:%.+]] = load i8*, i8** [[REMOTE_ELT_REF]],
// CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST:%.+]], i[[SZ]] 0, i[[SZ]] 0
// CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]],
- // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i32*
// CHECK: [[REMOTE_ELT:%.+]] = bitcast i8* [[REMOTE_ELT_VOID]] to i32*
// CHECK: [[REMOTE_ELT_VAL:%.+]] = load i32, i32* [[REMOTE_ELT]], align
+ // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i32*
// CHECK: store i32 [[REMOTE_ELT_VAL]], i32* [[ELT]], align
//
// CHECK: [[REMOTE_ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[REMOTE_RED_LIST:%.+]], i[[SZ]] 0, i[[SZ]] 1
// CHECK: [[REMOTE_ELT_VOID:%.+]] = load i8*, i8** [[REMOTE_ELT_REF]],
// CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST:%.+]], i[[SZ]] 0, i[[SZ]] 1
// CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]],
- // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i16*
// CHECK: [[REMOTE_ELT:%.+]] = bitcast i8* [[REMOTE_ELT_VOID]] to i16*
// CHECK: [[REMOTE_ELT_VAL:%.+]] = load i16, i16* [[REMOTE_ELT]], align
+ // CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i16*
// CHECK: store i16 [[REMOTE_ELT_VAL]], i16* [[ELT]], align
// CHECK: br label {{%?}}[[REDUCE_CONT]]
//
More information about the cfe-commits
mailing list