[clang] [clang] Lower _BitInt(129+) to a different type in LLVM IR (PR #91364)

Mariya Podchishchaeva via cfe-commits cfe-commits at lists.llvm.org
Fri Jul 12 00:37:21 PDT 2024


https://github.com/Fznamznon updated https://github.com/llvm/llvm-project/pull/91364

>From bbc1ff959c41f5fc710623d23250ebaebcfeb9cb Mon Sep 17 00:00:00 2001
From: "Podchishchaeva, Mariya" <mariya.podchishchaeva at intel.com>
Date: Tue, 7 May 2024 09:57:20 -0700
Subject: [PATCH 01/21] [clang] Lower long _BitInt(129+) to a different type in
 LLVM IR

Currently for i128:128 targets either __int128 or a correct _BitInt(129+)
implementation possible with lowering to iN, but not both. Since we have now
correct implementation of __int128, this patch attempts to fix codegen
issues by lowering _BitInt(129+) types to an array of i8 for "memory",
similarly how it is happening for bools now.

Fixes https://github.com/llvm/llvm-project/issues/85139
Fixes https://github.com/llvm/llvm-project/issues/83419
---
 clang/lib/CodeGen/CGExpr.cpp         |  8 +++
 clang/lib/CodeGen/CGExprConstant.cpp | 12 ++++
 clang/lib/CodeGen/CGExprScalar.cpp   |  7 ++
 clang/lib/CodeGen/CodeGenTypes.cpp   |  6 ++
 clang/test/CodeGen/ext-int-cc.c      |  2 +-
 clang/test/CodeGen/ext-int.c         | 97 ++++++++++++++++++++++++++--
 6 files changed, 127 insertions(+), 5 deletions(-)

diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index d96c7bb1e5682..7e631e469a885 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -1989,6 +1989,14 @@ llvm::Value *CodeGenFunction::EmitLoadOfScalar(Address Addr, bool Volatile,
     return EmitAtomicLoad(AtomicLValue, Loc).getScalarVal();
   }
 
+  if (const auto *BIT = Ty->getAs<BitIntType>()) {
+    if (BIT->getNumBits() > 128) {
+      // Long _BitInt has array of bytes as in-memory type.
+      llvm::Type *NewTy = ConvertType(Ty);
+      Addr = Addr.withElementType(NewTy);
+    }
+  }
+
   llvm::LoadInst *Load = Builder.CreateLoad(Addr, Volatile);
   if (isNontemporal) {
     llvm::MDNode *Node = llvm::MDNode::get(
diff --git a/clang/lib/CodeGen/CGExprConstant.cpp b/clang/lib/CodeGen/CGExprConstant.cpp
index 94962091116af..98ab1e23d1283 100644
--- a/clang/lib/CodeGen/CGExprConstant.cpp
+++ b/clang/lib/CodeGen/CGExprConstant.cpp
@@ -1774,6 +1774,18 @@ llvm::Constant *ConstantEmitter::emitForMemory(CodeGenModule &CGM,
     return Res;
   }
 
+  if (const auto *BIT = destType->getAs<BitIntType>()) {
+    if (BIT->getNumBits() > 128) {
+      // Long _BitInt has array of bytes as in-memory type.
+      ConstantAggregateBuilder Builder(CGM);
+      llvm::Type *DesiredTy = CGM.getTypes().ConvertTypeForMem(destType);
+      auto *CI = cast<llvm::ConstantInt>(C);
+      llvm::APInt Value = CI->getValue();
+      Builder.addBits(Value, /*OffsetInBits=*/0, /*AllowOverwrite=*/false);
+      return Builder.build(DesiredTy, /*AllowOversized*/ false);
+    }
+  }
+
   return C;
 }
 
diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp
index d84531959b50b..717d47d20dea1 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -5348,6 +5348,13 @@ Value *ScalarExprEmitter::VisitVAArgExpr(VAArgExpr *VE) {
     return llvm::UndefValue::get(ArgTy);
   }
 
+  if (const auto *BIT = Ty->getAs<BitIntType>()) {
+    if (BIT->getNumBits() > 128) {
+      // Long _BitInt has array of bytes as in-memory type.
+      ArgPtr = ArgPtr.withElementType(ArgTy);
+    }
+  }
+
   // FIXME Volatility.
   llvm::Value *Val = Builder.CreateLoad(ArgPtr);
 
diff --git a/clang/lib/CodeGen/CodeGenTypes.cpp b/clang/lib/CodeGen/CodeGenTypes.cpp
index e8d75eda029e6..55c618677ddb4 100644
--- a/clang/lib/CodeGen/CodeGenTypes.cpp
+++ b/clang/lib/CodeGen/CodeGenTypes.cpp
@@ -114,6 +114,12 @@ llvm::Type *CodeGenTypes::ConvertTypeForMem(QualType T, bool ForBitField) {
     return llvm::IntegerType::get(getLLVMContext(),
                                   (unsigned)Context.getTypeSize(T));
 
+  if (const auto *BIT = T->getAs<BitIntType>()) {
+    if (BIT->getNumBits() > 128)
+      R = llvm::ArrayType::get(CGM.Int8Ty,
+                               (unsigned)Context.getTypeSize(T) / 8);
+  }
+
   // Else, don't map it.
   return R;
 }
diff --git a/clang/test/CodeGen/ext-int-cc.c b/clang/test/CodeGen/ext-int-cc.c
index 001e866d34b45..83f20dcb0667b 100644
--- a/clang/test/CodeGen/ext-int-cc.c
+++ b/clang/test/CodeGen/ext-int-cc.c
@@ -131,7 +131,7 @@ void ParamPassing3(_BitInt(15) a, _BitInt(31) b) {}
 // are negated. This will give an error when a target does support larger
 // _BitInt widths to alert us to enable the test.
 void ParamPassing4(_BitInt(129) a) {}
-// LIN64: define{{.*}} void @ParamPassing4(ptr byval(i129) align 8 %{{.+}})
+// LIN64: define{{.*}} void @ParamPassing4(ptr byval([24 x i8]) align 8 %{{.+}})
 // WIN64: define dso_local void @ParamPassing4(ptr %{{.+}})
 // LIN32: define{{.*}} void @ParamPassing4(ptr %{{.+}})
 // WIN32: define dso_local void @ParamPassing4(ptr %{{.+}})
diff --git a/clang/test/CodeGen/ext-int.c b/clang/test/CodeGen/ext-int.c
index 4cb399d108f29..a6a632bd985d2 100644
--- a/clang/test/CodeGen/ext-int.c
+++ b/clang/test/CodeGen/ext-int.c
@@ -1,12 +1,19 @@
-// RUN: %clang_cc1 -triple x86_64-gnu-linux -O3 -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK64
-// RUN: %clang_cc1 -triple x86_64-windows-pc -O3 -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK64
-// RUN: %clang_cc1 -triple i386-gnu-linux -O3 -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,LIN32
-// RUN: %clang_cc1 -triple i386-windows-pc -O3 -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,WIN32
+// RUN: %clang_cc1 -std=c23 -triple x86_64-gnu-linux -O3 -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK64
+// RUN: %clang_cc1 -std=c23 -triple x86_64-windows-pc -O3 -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK64
+// RUN: %clang_cc1 -std=c23 -triple i386-gnu-linux -O3 -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,LIN32
+// RUN: %clang_cc1 -std=c23 -triple i386-windows-pc -O3 -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,WIN32
+
+// CHECK64: %struct.S1 = type { i17, [4 x i8], [24 x i8] }
+// CHECK64: %struct.S2 = type { [40 x i8], i32, [4 x i8] }
 
 //GH62207
 unsigned _BitInt(1) GlobSize1 = 0;
 // CHECK: @GlobSize1 = {{.*}}global i1 false
 
+// CHECK64: @__const.foo.A = private unnamed_addr constant { i17, [4 x i8], <{ i8, [23 x i8] }> } { i17 1, [4 x i8] undef, <{ i8, [23 x i8] }> <{ i8 -86, [23 x i8] zeroinitializer }> }, align 8
+// CHECK64: @BigGlob = {{.*}}global <{ i8, i8, [38 x i8] }> <{ i8 -68, i8 2, [38 x i8] zeroinitializer }>, align 8
+// CHECK64: @f.p = internal global <{ i8, i8, [22 x i8] }> <{ i8 16, i8 39, [22 x i8] zeroinitializer }>, align 8
+
 void GenericTest(_BitInt(3) a, unsigned _BitInt(3) b, _BitInt(4) c) {
   // CHECK: define {{.*}}void @GenericTest
   int which = _Generic(a, _BitInt(3): 1, unsigned _BitInt(3) : 2, _BitInt(4) : 3);
@@ -62,3 +69,85 @@ void Size1ExtIntParam(unsigned _BitInt(1) A) {
   // CHECK: store i1 %[[PARAM_LOAD]], ptr %[[IDX]]
   B[2] = A;
 }
+
+#if __BITINT_MAXWIDTH__ > 128
+struct S1 {
+  _BitInt(17) A;
+  _BitInt(129) B;
+};
+
+int foo(int a) {
+  // CHECK64: %A1 = getelementptr inbounds %struct.S1, ptr %B, i32 0, i32 0
+  // CHECK64: store i17 1, ptr %A1, align 8
+  // CHECK64: %B2 = getelementptr inbounds %struct.S1, ptr %B, i32 0, i32 2
+  // CHECK64: %0 = load i32, ptr %a.addr, align 4
+  // CHECK64: %conv = sext i32 %0 to i129
+  // CHECK64: store i129 %conv, ptr %B2, align 8
+  // CHECK64: %B3 = getelementptr inbounds %struct.S1, ptr %A, i32 0, i32 2
+  // CHECK64: %1 = load i129, ptr %B3, align 8
+  // CHECK64: %conv4 = trunc i129 %1 to i32
+  // CHECK64: %B5 = getelementptr inbounds %struct.S1, ptr %B, i32 0, i32 2
+  // CHECK64: %2 = load i129, ptr %B5, align 8
+  struct S1 A = {1, 170};
+  struct S1 B = {1, a};
+  return (int)A.B + (int)B.B;
+}
+
+struct S2 {
+  _BitInt(257) A;
+  int B;
+};
+
+_BitInt(257) bar() {
+  // CHECK64: define {{.*}}void @bar(ptr {{.*}} sret([40 x i8]) align 8 %[[RET:.+]])
+  // CHECK64: %A = alloca %struct.S2, align 8
+  // CHECK64: %0 = getelementptr inbounds { <{ i8, [39 x i8] }>, i32, [4 x i8] }, ptr %A, i32 0, i32 0
+  // CHECK64: %1 = getelementptr inbounds <{ i8, [39 x i8] }>, ptr %0, i32 0, i32 0
+  // CHECK64: store i8 1, ptr %1, align 8
+  // CHECK64: %2 = getelementptr inbounds { <{ i8, [39 x i8] }>, i32, [4 x i8] }, ptr %A, i32 0, i32 1
+  // CHECK64: store i32 10000, ptr %2, align 8
+  // CHECK64: %A1 = getelementptr inbounds %struct.S2, ptr %A, i32 0, i32 0
+  // CHECK64: %3 = load i257, ptr %A1, align 8
+  // CHECK64: store i257 %3, ptr %[[RET]], align 8
+  struct S2 A = {1, 10000};
+  return A.A;
+}
+
+void TakesVarargs(int i, ...) {
+  // CHECK64: define{{.*}} void @TakesVarargs(i32
+__builtin_va_list args;
+__builtin_va_start(args, i);
+
+_BitInt(160) A = __builtin_va_arg(args, _BitInt(160));
+  // CHECK64: %[[ARG:.+]] = load i160
+  // CHECK64: store i160 %[[ARG]], ptr %A, align 8
+}
+
+_BitInt(129) *f1(_BitInt(129) *p) {
+  // CHECK64: getelementptr inbounds [24 x i8], {{.*}} i64 1
+  return p + 1;
+}
+
+char *f2(char *p) {
+  // CHECK64: getelementptr inbounds i8, {{.*}} i64 24
+  return p + sizeof(_BitInt(129));
+}
+
+auto BigGlob = (_BitInt(257))700;
+// CHECK64: define {{.*}}void @foobar(ptr {{.*}} sret([40 x i8]) align 8 %[[RET1:.+]])
+_BitInt(257) foobar() {
+  // CHECK64: %A = alloca [40 x i8], align 8
+  // CHECK64: %0 = load i257, ptr @BigGlob, align 8
+  // CHECK64: %add = add nsw i257 %0, 1
+  // CHECK64: store i257 %add, ptr %A, align 8
+  // CHECK64: %1 = load i257, ptr %A, align 8
+  // CHECK64: store i257 %1, ptr %[[RET1]], align 8
+  _BitInt(257) A = BigGlob + 1;
+  return A;
+}
+
+void f() {
+  static _BitInt(130) p = {10000};
+}
+
+#endif

>From fc6f90c703a9a2cdb731c1fc36a918b56025bf30 Mon Sep 17 00:00:00 2001
From: "Podchishchaeva, Mariya" <mariya.podchishchaeva at intel.com>
Date: Mon, 13 May 2024 08:43:29 -0700
Subject: [PATCH 02/21] Add load and store type, update more places

---
 clang/lib/CodeGen/CGExpr.cpp                  |   61 +-
 clang/lib/CodeGen/CGExprConstant.cpp          |   20 +-
 clang/lib/CodeGen/CGExprScalar.cpp            |   27 +-
 clang/lib/CodeGen/CGStmt.cpp                  |   14 +-
 clang/lib/CodeGen/CodeGenFunction.cpp         |    5 +
 clang/lib/CodeGen/CodeGenFunction.h           |    2 +
 clang/lib/CodeGen/CodeGenTypes.cpp            |   42 +-
 clang/lib/CodeGen/CodeGenTypes.h              |    9 +
 clang/test/CodeGen/arm-abi-vector.c           |    2 +-
 clang/test/CodeGen/ext-int.c                  |   99 +-
 clang/test/CodeGenCXX/ext-int.cpp             |    6 +-
 .../CodeGenHLSL/builtins/dot-builtin.hlsl     |    4 +-
 clang/test/CodeGenHLSL/builtins/dot.hlsl      |    6 +-
 clang/test/Frontend/fixed_point_comparisons.c |    4 +-
 ...istribute_parallel_for_simd_if_codegen.cpp |  328 ++---
 .../parallel_master_taskloop_simd_codegen.cpp | 1298 ++++++++---------
 ...ams_distribute_parallel_for_if_codegen.cpp |  132 +-
 ...istribute_parallel_for_simd_if_codegen.cpp |  752 +++++-----
 .../target_teams_generic_loop_if_codegen.cpp  |  124 +-
 ...ams_distribute_parallel_for_if_codegen.cpp |  124 +-
 ...istribute_parallel_for_simd_if_codegen.cpp |  720 ++++-----
 .../OpenMP/teams_distribute_simd_codegen.cpp  |  316 ++--
 22 files changed, 2101 insertions(+), 1994 deletions(-)

diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 7e631e469a885..cb127338b16c7 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -1989,13 +1989,8 @@ llvm::Value *CodeGenFunction::EmitLoadOfScalar(Address Addr, bool Volatile,
     return EmitAtomicLoad(AtomicLValue, Loc).getScalarVal();
   }
 
-  if (const auto *BIT = Ty->getAs<BitIntType>()) {
-    if (BIT->getNumBits() > 128) {
-      // Long _BitInt has array of bytes as in-memory type.
-      llvm::Type *NewTy = ConvertType(Ty);
-      Addr = Addr.withElementType(NewTy);
-    }
-  }
+  Addr = Addr.withElementType(
+      CGM.getTypes().convertTypeForLoadStore(Ty, Addr.getElementType()));
 
   llvm::LoadInst *Load = Builder.CreateLoad(Addr, Volatile);
   if (isNontemporal) {
@@ -2020,26 +2015,27 @@ llvm::Value *CodeGenFunction::EmitLoadOfScalar(Address Addr, bool Volatile,
 }
 
 llvm::Value *CodeGenFunction::EmitToMemory(llvm::Value *Value, QualType Ty) {
-  // Bool has a different representation in memory than in registers.
-  if (hasBooleanRepresentation(Ty)) {
-    // This should really always be an i1, but sometimes it's already
-    // an i8, and it's awkward to track those cases down.
-    if (Value->getType()->isIntegerTy(1))
-      return Builder.CreateZExt(Value, ConvertTypeForMem(Ty), "frombool");
-    assert(Value->getType()->isIntegerTy(getContext().getTypeSize(Ty)) &&
-           "wrong value rep of bool");
+  if (hasBooleanRepresentation(Ty) ||
+      (Ty->isBitIntType() && Value->getType()->isIntegerTy())) {
+    llvm::Type *StoreTy = convertTypeForLoadStore(Ty, Value->getType());
+    bool Signed = Ty->isSignedIntegerOrEnumerationType();
+    return Builder.CreateIntCast(Value, StoreTy, Signed, "storedv");
+  }
+
+  if (Ty->isExtVectorBoolType()) {
+    llvm::Type *StoreTy = convertTypeForLoadStore(Ty, Value->getType());
+    // Expand to the memory bit width.
+    unsigned MemNumElems = StoreTy->getPrimitiveSizeInBits();
+    // <N x i1> --> <P x i1>.
+    Value = emitBoolVecConversion(Value, MemNumElems, "insertvec");
+    // <P x i1> --> iP.
+    Value = Builder.CreateBitCast(Value, StoreTy);
   }
 
   return Value;
 }
 
 llvm::Value *CodeGenFunction::EmitFromMemory(llvm::Value *Value, QualType Ty) {
-  // Bool has a different representation in memory than in registers.
-  if (hasBooleanRepresentation(Ty)) {
-    assert(Value->getType()->isIntegerTy(getContext().getTypeSize(Ty)) &&
-           "wrong value rep of bool");
-    return Builder.CreateTrunc(Value, Builder.getInt1Ty(), "tobool");
-  }
   if (Ty->isExtVectorBoolType()) {
     const auto *RawIntTy = Value->getType();
     // Bitcast iP --> <P x i1>.
@@ -2052,6 +2048,12 @@ llvm::Value *CodeGenFunction::EmitFromMemory(llvm::Value *Value, QualType Ty) {
     return emitBoolVecConversion(V, ValNumElems, "extractvec");
   }
 
+  if (hasBooleanRepresentation(Ty) || Ty->isBitIntType()) {
+    llvm::Type *ResTy = ConvertType(Ty);
+    bool Signed = Ty->isSignedIntegerOrEnumerationType();
+    return Builder.CreateIntCast(Value, ResTy, Signed, "loadedv");
+  }
+
   return Value;
 }
 
@@ -2104,17 +2106,10 @@ void CodeGenFunction::EmitStoreOfScalar(llvm::Value *Value, Address Addr,
   llvm::Type *SrcTy = Value->getType();
   if (const auto *ClangVecTy = Ty->getAs<VectorType>()) {
     auto *VecTy = dyn_cast<llvm::FixedVectorType>(SrcTy);
-    if (VecTy && ClangVecTy->isExtVectorBoolType()) {
-      auto *MemIntTy = cast<llvm::IntegerType>(Addr.getElementType());
-      // Expand to the memory bit width.
-      unsigned MemNumElems = MemIntTy->getPrimitiveSizeInBits();
-      // <N x i1> --> <P x i1>.
-      Value = emitBoolVecConversion(Value, MemNumElems, "insertvec");
-      // <P x i1> --> iP.
-      Value = Builder.CreateBitCast(Value, MemIntTy);
-    } else if (!CGM.getCodeGenOpts().PreserveVec3Type) {
+    if (!CGM.getCodeGenOpts().PreserveVec3Type) {
       // Handle vec3 special.
-      if (VecTy && cast<llvm::FixedVectorType>(VecTy)->getNumElements() == 3) {
+      if (!Ty->isExtVectorBoolType() && VecTy &&
+          cast<llvm::FixedVectorType>(VecTy)->getNumElements() == 3) {
         // Our source is a vec3, do a shuffle vector to make it a vec4.
         Value = Builder.CreateShuffleVector(Value, ArrayRef<int>{0, 1, 2, -1},
                                             "extractVec");
@@ -2473,7 +2468,8 @@ void CodeGenFunction::EmitStoreThroughLValue(RValue Src, LValue Dst,
 void CodeGenFunction::EmitStoreThroughBitfieldLValue(RValue Src, LValue Dst,
                                                      llvm::Value **Result) {
   const CGBitFieldInfo &Info = Dst.getBitFieldInfo();
-  llvm::Type *ResLTy = ConvertTypeForMem(Dst.getType());
+  llvm::Type *ResLTy = CGM.getTypes().convertTypeForLoadStore(
+      Dst.getType(), ConvertType(Dst.getType()));
   Address Ptr = Dst.getBitFieldAddress();
 
   // Get the source value, truncated to the width of the bit-field.
@@ -2541,7 +2537,6 @@ void CodeGenFunction::EmitStoreThroughBitfieldLValue(RValue Src, LValue Dst,
         ResultVal = Builder.CreateAShr(ResultVal, HighBits, "bf.result.ashr");
       }
     }
-
     ResultVal = Builder.CreateIntCast(ResultVal, ResLTy, Info.IsSigned,
                                       "bf.result.cast");
     *Result = EmitFromMemory(ResultVal, Dst.getType());
diff --git a/clang/lib/CodeGen/CGExprConstant.cpp b/clang/lib/CodeGen/CGExprConstant.cpp
index 98ab1e23d1283..79707fddcfc54 100644
--- a/clang/lib/CodeGen/CGExprConstant.cpp
+++ b/clang/lib/CodeGen/CGExprConstant.cpp
@@ -761,6 +761,10 @@ bool ConstStructBuilder::Build(const InitListExpr *ILE, bool AllowOverwrite) {
       if (Field->hasAttr<NoUniqueAddressAttr>())
         AllowOverwrite = true;
     } else {
+      llvm::Type *LoadType = CGM.getTypes().convertTypeForLoadStore(
+          Field->getType(), EltInit->getType());
+      EltInit = llvm::ConstantFoldLoadFromConst(
+          EltInit, LoadType, llvm::APInt::getZero(32), CGM.getDataLayout());
       // Otherwise we have a bitfield.
       if (auto *CI = dyn_cast<llvm::ConstantInt>(EltInit)) {
         if (!AppendBitField(Field, Layout.getFieldOffset(FieldNo), CI,
@@ -862,9 +866,15 @@ bool ConstStructBuilder::Build(const APValue &Val, const RecordDecl *RD,
       if (Field->hasAttr<NoUniqueAddressAttr>())
         AllowOverwrite = true;
     } else {
+      llvm::Type *LoadType = CGM.getTypes().convertTypeForLoadStore(
+          Field->getType(), EltInit->getType());
+      llvm::ConstantInt *Init =
+          cast<llvm::ConstantInt>(llvm::ConstantFoldLoadFromConst(
+              EltInit, LoadType, llvm::APInt::getZero(32),
+              CGM.getDataLayout()));
       // Otherwise we have a bitfield.
       if (!AppendBitField(*Field, Layout.getFieldOffset(FieldNo) + OffsetBits,
-                          cast<llvm::ConstantInt>(EltInit), AllowOverwrite))
+                          Init, AllowOverwrite))
         return false;
     }
   }
@@ -1774,11 +1784,15 @@ llvm::Constant *ConstantEmitter::emitForMemory(CodeGenModule &CGM,
     return Res;
   }
 
-  if (const auto *BIT = destType->getAs<BitIntType>()) {
-    if (BIT->getNumBits() > 128) {
+  if (destType->isBitIntType()) {
+    if (!CGM.getTypes().LLVMTypeLayoutMatchesAST(destType, C->getType())) {
       // Long _BitInt has array of bytes as in-memory type.
+      // So, split constant into individual bytes.
       ConstantAggregateBuilder Builder(CGM);
       llvm::Type *DesiredTy = CGM.getTypes().ConvertTypeForMem(destType);
+      // LLVM type doesn't match AST type only for big enough _BitInts, these
+      // types don't appear in constant expressions involving ptrtoint, so it
+      // is safe to expect a constant int here.
       auto *CI = cast<llvm::ConstantInt>(C);
       llvm::APInt Value = CI->getValue();
       Builder.addBits(Value, /*OffsetInBits=*/0, /*AllowOverwrite=*/false);
diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp
index 717d47d20dea1..0c49e0f8b78e6 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -436,9 +436,9 @@ class ScalarExprEmitter
 
     if (Value *Result = ConstantEmitter(CGF).tryEmitConstantExpr(E)) {
       if (E->isGLValue())
-        return CGF.Builder.CreateLoad(Address(
-            Result, CGF.ConvertTypeForMem(E->getType()),
-            CGF.getContext().getTypeAlignInChars(E->getType())));
+        return CGF.Builder.CreateLoad(
+            Address(Result, CGF.convertTypeForLoadStore(E->getType()),
+                    CGF.getContext().getTypeAlignInChars(E->getType())));
       return Result;
     }
     return Visit(E->getSubExpr());
@@ -5348,25 +5348,8 @@ Value *ScalarExprEmitter::VisitVAArgExpr(VAArgExpr *VE) {
     return llvm::UndefValue::get(ArgTy);
   }
 
-  if (const auto *BIT = Ty->getAs<BitIntType>()) {
-    if (BIT->getNumBits() > 128) {
-      // Long _BitInt has array of bytes as in-memory type.
-      ArgPtr = ArgPtr.withElementType(ArgTy);
-    }
-  }
-
-  // FIXME Volatility.
-  llvm::Value *Val = Builder.CreateLoad(ArgPtr);
-
-  // If EmitVAArg promoted the type, we must truncate it.
-  if (ArgTy != Val->getType()) {
-    if (ArgTy->isPointerTy() && !Val->getType()->isPointerTy())
-      Val = Builder.CreateIntToPtr(Val, ArgTy);
-    else
-      Val = Builder.CreateTrunc(Val, ArgTy);
-  }
-
-  return Val;
+  return CGF.EmitLoadOfScalar(ArgPtr, Ty.isVolatileQualified(), Ty,
+                              VE->getExprLoc());
 }
 
 Value *ScalarExprEmitter::VisitBlockExpr(const BlockExpr *block) {
diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp
index 479945e3b4cb5..39b0b5a606b5c 100644
--- a/clang/lib/CodeGen/CGStmt.cpp
+++ b/clang/lib/CodeGen/CGStmt.cpp
@@ -1504,9 +1504,17 @@ void CodeGenFunction::EmitReturnStmt(const ReturnStmt &S) {
     Builder.CreateStore(Result.getScalarVal(), ReturnValue);
   } else {
     switch (getEvaluationKind(RV->getType())) {
-    case TEK_Scalar:
-      Builder.CreateStore(EmitScalarExpr(RV), ReturnValue);
-      break;
+    case TEK_Scalar: {
+      llvm::Value *Ret = EmitScalarExpr(RV);
+      // EmitStoreOfScalar could be used here, but it extends bool which for
+      // some targets is returned as i1 zeroext.
+      if (RV->getType()->isBitIntType()) {
+        QualType Ty = RV->getType();
+        if (!CGM.getTypes().LLVMTypeLayoutMatchesAST(Ty, Ret->getType()))
+          Ret = EmitToMemory(Ret, Ty);
+      }
+      Builder.CreateStore(Ret, ReturnValue);
+    } break;
     case TEK_Complex:
       EmitComplexExprIntoLValue(RV, MakeAddrLValue(ReturnValue, RV->getType()),
                                 /*isInit*/ true);
diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp
index 477814140a9e2..01a3aac1e4829 100644
--- a/clang/lib/CodeGen/CodeGenFunction.cpp
+++ b/clang/lib/CodeGen/CodeGenFunction.cpp
@@ -233,6 +233,11 @@ llvm::Type *CodeGenFunction::ConvertType(QualType T) {
   return CGM.getTypes().ConvertType(T);
 }
 
+llvm::Type *CodeGenFunction::convertTypeForLoadStore(QualType ASTTy,
+                                                     llvm::Type *LLVMTy) {
+  return CGM.getTypes().convertTypeForLoadStore(ASTTy, LLVMTy);
+}
+
 TypeEvaluationKind CodeGenFunction::getEvaluationKind(QualType type) {
   type = type.getCanonicalType();
   while (true) {
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index e1e687af6a781..72b0bf028032e 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -2565,6 +2565,8 @@ class CodeGenFunction : public CodeGenTypeCache {
 
   llvm::Type *ConvertTypeForMem(QualType T);
   llvm::Type *ConvertType(QualType T);
+  llvm::Type *convertTypeForLoadStore(QualType ASTTy,
+                                      llvm::Type *LLVMTy = nullptr);
   llvm::Type *ConvertType(const TypeDecl *T) {
     return ConvertType(getContext().getTypeDeclType(T));
   }
diff --git a/clang/lib/CodeGen/CodeGenTypes.cpp b/clang/lib/CodeGen/CodeGenTypes.cpp
index 55c618677ddb4..aa9c7a61d67d3 100644
--- a/clang/lib/CodeGen/CodeGenTypes.cpp
+++ b/clang/lib/CodeGen/CodeGenTypes.cpp
@@ -107,6 +107,12 @@ llvm::Type *CodeGenTypes::ConvertTypeForMem(QualType T, bool ForBitField) {
     return llvm::IntegerType::get(FixedVT->getContext(), BytePadded);
   }
 
+  if (T->isBitIntType()) {
+    if (!LLVMTypeLayoutMatchesAST(T, R))
+      return llvm::ArrayType::get(CGM.Int8Ty,
+                                  Context.getTypeSizeInChars(T).getQuantity());
+  }
+
   // If this is a bool type, or a bit-precise integer type in a bitfield
   // representation, map this integer to the target-specified size.
   if ((ForBitField && T->isBitIntType()) ||
@@ -114,16 +120,42 @@ llvm::Type *CodeGenTypes::ConvertTypeForMem(QualType T, bool ForBitField) {
     return llvm::IntegerType::get(getLLVMContext(),
                                   (unsigned)Context.getTypeSize(T));
 
-  if (const auto *BIT = T->getAs<BitIntType>()) {
-    if (BIT->getNumBits() > 128)
-      R = llvm::ArrayType::get(CGM.Int8Ty,
-                               (unsigned)Context.getTypeSize(T) / 8);
-  }
 
   // Else, don't map it.
   return R;
 }
 
+bool CodeGenTypes::LLVMTypeLayoutMatchesAST(QualType ASTTy,
+                                            llvm::Type *LLVMTy) {
+  CharUnits ASTSize = Context.getTypeSizeInChars(ASTTy);
+  CharUnits LLVMSize =
+      CharUnits::fromQuantity(getDataLayout().getTypeAllocSize(LLVMTy));
+  return ASTSize == LLVMSize;
+}
+
+llvm::Type *CodeGenTypes::convertTypeForLoadStore(QualType T,
+                                                  llvm::Type *LLVMTy) {
+  if (!LLVMTy)
+    LLVMTy = ConvertType(T);
+
+  if (!T->isBitIntType() && LLVMTy->isIntegerTy(1))
+    return llvm::IntegerType::get(getLLVMContext(),
+                                  (unsigned)Context.getTypeSize(T));
+
+  if (T->isBitIntType()) {
+    llvm::Type *R = ConvertType(T);
+    if (!LLVMTypeLayoutMatchesAST(T, R))
+      return llvm::Type::getIntNTy(
+          getLLVMContext(), Context.getTypeSizeInChars(T).getQuantity() * 8);
+  }
+
+  if (T->isExtVectorBoolType()) {
+    return ConvertTypeForMem(T);
+  }
+
+  return LLVMTy;
+}
+
 /// isRecordLayoutComplete - Return true if the specified type is already
 /// completely laid out.
 bool CodeGenTypes::isRecordLayoutComplete(const Type *Ty) const {
diff --git a/clang/lib/CodeGen/CodeGenTypes.h b/clang/lib/CodeGen/CodeGenTypes.h
index 01c0c673795c0..c9b7a3a36832f 100644
--- a/clang/lib/CodeGen/CodeGenTypes.h
+++ b/clang/lib/CodeGen/CodeGenTypes.h
@@ -128,6 +128,15 @@ class CodeGenTypes {
   /// memory representation is usually i8 or i32, depending on the target.
   llvm::Type *ConvertTypeForMem(QualType T, bool ForBitField = false);
 
+  /// Check that size and abi alignment of given LLVM type matches size and
+  /// alignment of given AST type.
+  bool LLVMTypeLayoutMatchesAST(QualType ASTTy, llvm::Type *LLVMTy);
+
+  /// For AST types with special memory representation and whose AST type
+  /// doesn't match LLVM type (see LLVMTypeLayoutMatchesAST) returns type that
+  /// ought to be used for load and store operations.
+  llvm::Type *convertTypeForLoadStore(QualType T, llvm::Type *LLVMTy = nullptr);
+
   /// GetFunctionType - Get the LLVM function type for \arg Info.
   llvm::FunctionType *GetFunctionType(const CGFunctionInfo &Info);
 
diff --git a/clang/test/CodeGen/arm-abi-vector.c b/clang/test/CodeGen/arm-abi-vector.c
index 7f0cc4bcb0cd8..c2a8902007980 100644
--- a/clang/test/CodeGen/arm-abi-vector.c
+++ b/clang/test/CodeGen/arm-abi-vector.c
@@ -194,7 +194,7 @@ double varargs_vec_3s(int fixed, ...) {
 // APCS-GNU: [[VAR:%.*]] = alloca <3 x i16>, align 8
 // APCS-GNU: [[AP:%.*]] = load ptr,
 // APCS-GNU: [[AP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[AP]], i32 8
-// APCS-GNU: [[VEC:%.*]] = load <3 x i16>, ptr [[AP]], align 4
+// APCS-GNU: [[VEC:%.*]] = load <4 x i16>, ptr [[AP]], align 4
 // ANDROID: varargs_vec_3s
 // ANDROID: alloca <3 x i16>, align 8
 // ANDROID: [[AP_ALIGN:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr {{%.*}}, i32 -8)
diff --git a/clang/test/CodeGen/ext-int.c b/clang/test/CodeGen/ext-int.c
index a6a632bd985d2..223742cc3bc64 100644
--- a/clang/test/CodeGen/ext-int.c
+++ b/clang/test/CodeGen/ext-int.c
@@ -1,10 +1,16 @@
-// RUN: %clang_cc1 -std=c23 -triple x86_64-gnu-linux -O3 -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK64
-// RUN: %clang_cc1 -std=c23 -triple x86_64-windows-pc -O3 -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK64
+// RUN: %clang_cc1 -std=c23 -triple x86_64-gnu-linux -O3 -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK64,LIN64
+// RUN: %clang_cc1 -std=c23 -triple x86_64-windows-pc -O3 -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK64,WIN64
 // RUN: %clang_cc1 -std=c23 -triple i386-gnu-linux -O3 -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,LIN32
 // RUN: %clang_cc1 -std=c23 -triple i386-windows-pc -O3 -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,WIN32
 
 // CHECK64: %struct.S1 = type { i17, [4 x i8], [24 x i8] }
+// WIN32: %struct.S1 = type { i17, [4 x i8], [24 x i8] }
+// LIN32: %struct.S1 = type { i17, [20 x i8] }
 // CHECK64: %struct.S2 = type { [40 x i8], i32, [4 x i8] }
+// WIN32: %struct.S2 = type { [40 x i8], i32, [4 x i8] }
+// LIN32: %struct.S2 = type { [36 x i8], i32 }
+// LIN64: %struct.S3 = type { [17 x i8], [7 x i8] }
+// WIN64: %struct.S3 = type { [24 x i8] }
 
 //GH62207
 unsigned _BitInt(1) GlobSize1 = 0;
@@ -50,7 +56,7 @@ void OffsetOfTest(void) {
   int B = __builtin_offsetof(struct S,B);
   // CHECK64: store i32 8, ptr %{{.+}}
   // LIN32: store i32 4, ptr %{{.+}}
-  // WINCHECK32: store i32 8, ptr %{{.+}}
+  // WIN32: store i32 8, ptr %{{.+}}
   int C = __builtin_offsetof(struct S,C);
   // CHECK64: store i32 24, ptr %{{.+}}
   // LIN32: store i32 20, ptr %{{.+}}
@@ -77,17 +83,29 @@ struct S1 {
 };
 
 int foo(int a) {
-  // CHECK64: %A1 = getelementptr inbounds %struct.S1, ptr %B, i32 0, i32 0
-  // CHECK64: store i17 1, ptr %A1, align 8
+  // CHECK: %A1 = getelementptr inbounds %struct.S1, ptr %B, i32 0, i32 0
+  // CHECK: store i17 1, ptr %A1
   // CHECK64: %B2 = getelementptr inbounds %struct.S1, ptr %B, i32 0, i32 2
-  // CHECK64: %0 = load i32, ptr %a.addr, align 4
-  // CHECK64: %conv = sext i32 %0 to i129
-  // CHECK64: store i129 %conv, ptr %B2, align 8
+  // WIN32: %B2 = getelementptr inbounds %struct.S1, ptr %B, i32 0, i32 2
+  // LIN32: %B2 = getelementptr inbounds %struct.S1, ptr %B, i32 0, i32 1
+  // CHECK: %0 = load i32, ptr %a.addr, align 4
+  // CHECK: %conv = sext i32 %0 to i129
+  // CHECK64: storedv = sext i129 %conv to i192
+  // WIN32: storedv = sext i129 %conv to i192
+  // LIN32: storedv = sext i129 %conv to i160
+  // CHECK64: store i192 %storedv, ptr %B2, align 8
+  // WIN32: store i192 %storedv, ptr %B2, align 8
+  // LIN32: store i160 %storedv, ptr %B2, align 4
   // CHECK64: %B3 = getelementptr inbounds %struct.S1, ptr %A, i32 0, i32 2
-  // CHECK64: %1 = load i129, ptr %B3, align 8
-  // CHECK64: %conv4 = trunc i129 %1 to i32
-  // CHECK64: %B5 = getelementptr inbounds %struct.S1, ptr %B, i32 0, i32 2
-  // CHECK64: %2 = load i129, ptr %B5, align 8
+  // WIN32: %B3 = getelementptr inbounds %struct.S1, ptr %A, i32 0, i32 2
+  // LIN32: %B3 = getelementptr inbounds %struct.S1, ptr %A, i32 0, i32 1
+  // CHECK64: %1 = load i192, ptr %B3, align 8
+  // WIN32: %1 = load i192, ptr %B3, align 8
+  // LIN32: %1 = load i160, ptr %B3, align 4
+  // CHECK64: %loadedv = trunc i192 %1 to i129
+  // WIN32: %loadedv = trunc i192 %1 to i129
+  // LIN32: %loadedv = trunc i160 %1 to i129
+  // CHECK: %conv4 = trunc i129 %loadedv to i32
   struct S1 A = {1, 170};
   struct S1 B = {1, a};
   return (int)A.B + (int)B.B;
@@ -107,8 +125,10 @@ _BitInt(257) bar() {
   // CHECK64: %2 = getelementptr inbounds { <{ i8, [39 x i8] }>, i32, [4 x i8] }, ptr %A, i32 0, i32 1
   // CHECK64: store i32 10000, ptr %2, align 8
   // CHECK64: %A1 = getelementptr inbounds %struct.S2, ptr %A, i32 0, i32 0
-  // CHECK64: %3 = load i257, ptr %A1, align 8
-  // CHECK64: store i257 %3, ptr %[[RET]], align 8
+  // CHECK64: %3 = load i320, ptr %A1, align 8
+  // CHECK64: %loadedv = trunc i320 %3 to i257
+  // CHECK64: %storedv = sext i257 %loadedv to i320
+  // CHECK64: store i320 %storedv, ptr %[[RET]], align 8
   struct S2 A = {1, 10000};
   return A.A;
 }
@@ -119,8 +139,10 @@ __builtin_va_list args;
 __builtin_va_start(args, i);
 
 _BitInt(160) A = __builtin_va_arg(args, _BitInt(160));
-  // CHECK64: %[[ARG:.+]] = load i160
-  // CHECK64: store i160 %[[ARG]], ptr %A, align 8
+  // CHECK64: %[[ARG:.+]] = load i192
+  // CHECK64: %[[TRUNC:.+]] = trunc i192 %[[ARG]] to i160
+  // CHECK64: %[[SEXT:.+]] = sext i160 %[[TRUNC]] to i192
+  // CHECK64: store i192 %[[SEXT]], ptr %A, align 8
 }
 
 _BitInt(129) *f1(_BitInt(129) *p) {
@@ -137,11 +159,15 @@ auto BigGlob = (_BitInt(257))700;
 // CHECK64: define {{.*}}void @foobar(ptr {{.*}} sret([40 x i8]) align 8 %[[RET1:.+]])
 _BitInt(257) foobar() {
   // CHECK64: %A = alloca [40 x i8], align 8
-  // CHECK64: %0 = load i257, ptr @BigGlob, align 8
-  // CHECK64: %add = add nsw i257 %0, 1
-  // CHECK64: store i257 %add, ptr %A, align 8
-  // CHECK64: %1 = load i257, ptr %A, align 8
-  // CHECK64: store i257 %1, ptr %[[RET1]], align 8
+  // CHECK64: %0 = load i320, ptr @BigGlob, align 8
+  // CHECK64: %loadedv = trunc i320 %0 to i257
+  // CHECK64: %add = add nsw i257 %loadedv, 1
+  // CHECK64: %storedv = sext i257 %add to i320
+  // CHECK64: store i320 %storedv, ptr %A, align 8
+  // CHECK64: %1 = load i320, ptr %A, align 8
+  // CHECK64: %loadedv1 = trunc i320 %1 to i257
+  // CHECK64: %storedv2 = sext i257 %loadedv1 to i320
+  // CHECK64: store i320 %storedv2, ptr %[[RET1]], align 8
   _BitInt(257) A = BigGlob + 1;
   return A;
 }
@@ -150,4 +176,35 @@ void f() {
   static _BitInt(130) p = {10000};
 }
 
+struct S3 {
+  _BitInt (136) A : 129;
+};
+
+void bitField() {
+  struct S3 s = {1};
+  struct {
+    _BitInt (136) A : 48;
+    int a;
+  } s1 = {s.A};
+  s1.A = 36;
+  // LIN64: %s = alloca %struct.S3, align 8
+  // LIN64: %s1 = alloca %struct.anon, align 8
+  // LIN64: call void @llvm.memcpy.p0.p0.i64(ptr align 8 %s, ptr align 8 @__const.bitField.s, i64 24, i1 false)
+  // LIN64: %bf.load = load i136, ptr %s, align 8
+  // LIN64: %bf.shl = shl i136 %bf.load, 7
+  // LIN64: %bf.ashr = ashr i136 %bf.shl, 7
+  // LIN64: %0 = trunc i136 %bf.ashr to i64
+  // LIN64: %bf.load1 = load i64, ptr %s1, align 8
+  // LIN64: %bf.value = and i64 %0, 281474976710655
+  // LIN64: %bf.clear = and i64 %bf.load1, -281474976710656
+  // LIN64: %bf.set = or i64 %bf.clear, %bf.value
+  // LIN64: store i64 %bf.set, ptr %s1, align 8
+  // LIN64: %a = getelementptr inbounds %struct.anon, ptr %s1, i32 0, i32 1
+  // LIN64: store i32 0, ptr %a, align 8
+  // LIN64: %bf.load2 = load i64, ptr %s1, align 8
+  // LIN64: %bf.clear3 = and i64 %bf.load2, -281474976710656
+  // LIN64: %bf.set4 = or i64 %bf.clear3, 36
+  // LIN64: store i64 %bf.set4, ptr %s1, align 8
+}
+
 #endif
diff --git a/clang/test/CodeGenCXX/ext-int.cpp b/clang/test/CodeGenCXX/ext-int.cpp
index a1d17c840ee46..41fb6274d42c0 100644
--- a/clang/test/CodeGenCXX/ext-int.cpp
+++ b/clang/test/CodeGenCXX/ext-int.cpp
@@ -176,8 +176,10 @@ void TakesVarargs(int i, ...) {
   // LIN32: %[[CUR1:.+]] = load ptr, ptr %[[ARGS]]
   // LIN32: %[[NEXT1:.+]] = getelementptr inbounds i8, ptr %[[CUR1]], i32 12
   // LIN32: store ptr %[[NEXT1]], ptr %[[ARGS]]
-  // LIN32: %[[LOADV1:.+]] = load i92, ptr %[[CUR1]]
-  // LIN32: store i92 %[[LOADV1]], ptr
+  // LIN32: %[[LOADV1:.+]] = load i96, ptr %[[CUR1]]
+  // LIN32: %[[TR:.+]] = trunc i96 %[[LOADV1]] to i92
+  // LIN32: %[[SEXT:.+]] = sext i92 %[[TR]] to i96
+  // LIN32: store i96 %[[SEXT]], ptr
 
   // WIN64: %[[CUR1:.+]] = load ptr, ptr %[[ARGS]]
   // WIN64: %[[NEXT1:.+]] = getelementptr inbounds i8, ptr %[[CUR1]], i64 8
diff --git a/clang/test/CodeGenHLSL/builtins/dot-builtin.hlsl b/clang/test/CodeGenHLSL/builtins/dot-builtin.hlsl
index 9881dabc3a110..b0b95074c972d 100644
--- a/clang/test/CodeGenHLSL/builtins/dot-builtin.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/dot-builtin.hlsl
@@ -1,7 +1,7 @@
 // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple dxil-pc-shadermodel6.3-library %s -fnative-half-type -emit-llvm -disable-llvm-passes -o - | FileCheck %s
 
 // CHECK-LABEL: builtin_bool_to_float_type_promotion
-// CHECK: %conv1 = uitofp i1 %tobool to double
+// CHECK: %conv1 = uitofp i1 %loadedv to double
 // CHECK: %dx.dot = fmul double %conv, %conv1
 // CHECK: %conv2 = fptrunc double %dx.dot to float
 // CHECK: ret float %conv2
@@ -10,7 +10,7 @@ float builtin_bool_to_float_type_promotion ( float p0, bool p1 ) {
 }
 
 // CHECK-LABEL: builtin_bool_to_float_arg1_type_promotion
-// CHECK: %conv = uitofp i1 %tobool to double
+// CHECK: %conv = uitofp i1 %loadedv to double
 // CHECK: %conv1 = fpext float %1 to double
 // CHECK: %dx.dot = fmul double %conv, %conv1
 // CHECK: %conv2 = fptrunc double %dx.dot to float
diff --git a/clang/test/CodeGenHLSL/builtins/dot.hlsl b/clang/test/CodeGenHLSL/builtins/dot.hlsl
index 307d71cce3cb6..ca84d2f401e13 100644
--- a/clang/test/CodeGenHLSL/builtins/dot.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/dot.hlsl
@@ -1,6 +1,6 @@
 // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
 // RUN:   dxil-pc-shadermodel6.3-library %s -fnative-half-type \
-// RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s \ 
+// RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s \
 // RUN:   --check-prefixes=CHECK,NATIVE_HALF
 // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
 // RUN:   dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \
@@ -178,14 +178,14 @@ float test_builtin_dot_float3_int_splat(float3 p0, int p1) {
 // CHECK: ret double %dx.dot
 double test_dot_double(double p0, double p1) { return dot(p0, p1); }
 
-// CHECK: %conv = zext i1 %tobool to i32
+// CHECK: %conv = zext i1 %loadedv to i32
 // CHECK: %dx.dot = mul i32 %conv, %1
 // CHECK: ret i32 %dx.dot
 int test_dot_bool_scalar_arg0_type_promotion(bool p0, int p1) {
   return dot(p0, p1);
 }
 
-// CHECK: %conv = zext i1 %tobool to i32
+// CHECK: %conv = zext i1 %loadedv to i32
 // CHECK: %dx.dot = mul i32 %0, %conv
 // CHECK: ret i32 %dx.dot
 int test_dot_bool_scalar_arg1_type_promotion(int p0, bool p1) {
diff --git a/clang/test/Frontend/fixed_point_comparisons.c b/clang/test/Frontend/fixed_point_comparisons.c
index 8cd2aa2dbc651..59c4405e41c03 100644
--- a/clang/test/Frontend/fixed_point_comparisons.c
+++ b/clang/test/Frontend/fixed_point_comparisons.c
@@ -249,8 +249,8 @@ void TestIntComparisons(void) {
   sa == b;
   // CHECK:      [[A:%[0-9]+]] = load i16, ptr %sa, align 2
   // CHECK-NEXT: [[B:%[0-9]+]] = load i8, ptr %b, align 1
-  // CHECK-NEXT: %tobool = trunc i8 [[B]] to i1
-  // CHECK-NEXT: [[CONV_B:%[a-z0-9]+]] = zext i1 %tobool to i32
+  // CHECK-NEXT: %loadedv = trunc i8 [[B]] to i1
+  // CHECK-NEXT: [[CONV_B:%[a-z0-9]+]] = zext i1 %loadedv to i32
   // CHECK-NEXT: [[RESIZE_A:%[a-z0-9]+]] = sext i16 [[A]] to i39
   // CHECK-NEXT: [[RESIZE_B:%[a-z0-9]+]] = sext i32 [[CONV_B]] to i39
   // CHECK-NEXT: [[UPSCALE_B:%[a-z0-9]+]] = shl i39 [[RESIZE_B]], 7
diff --git a/clang/test/OpenMP/distribute_parallel_for_simd_if_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_if_codegen.cpp
index 4a321b24b8c31..6693473e892d2 100644
--- a/clang/test/OpenMP/distribute_parallel_for_simd_if_codegen.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_simd_if_codegen.cpp
@@ -2567,16 +2567,16 @@ int main() {
 // CHECK3-NEXT:    [[I:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8
 // CHECK3-NEXT:    [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT:    [[DOTCAPTURE_EXPR__CASTED12:%.*]] = alloca i64, align 8
-// CHECK3-NEXT:    [[DOTBOUND_ZERO_ADDR17:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[DOTCAPTURE_EXPR__CASTED11:%.*]] = alloca i64, align 8
+// CHECK3-NEXT:    [[DOTBOUND_ZERO_ADDR16:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK3-NEXT:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
 // CHECK3-NEXT:    store ptr [[ARG]], ptr [[ARG_ADDR]], align 8
 // CHECK3-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[ARG_ADDR]], align 8
 // CHECK3-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
 // CHECK3-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0
-// CHECK3-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
-// CHECK3-NEXT:    store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1
+// CHECK3-NEXT:    [[STOREDV:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK3-NEXT:    store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR_]], align 1
 // CHECK3-NEXT:    store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
 // CHECK3-NEXT:    store i32 99, ptr [[DOTOMP_COMB_UB]], align 4
 // CHECK3-NEXT:    store i32 1, ptr [[DOTOMP_STRIDE]], align 4
@@ -2598,29 +2598,29 @@ int main() {
 // CHECK3-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
 // CHECK3-NEXT:    store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4
 // CHECK3-NEXT:    [[TMP7:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1
-// CHECK3-NEXT:    [[TOBOOL1:%.*]] = trunc i8 [[TMP7]] to i1
-// CHECK3-NEXT:    br i1 [[TOBOOL1]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE7:%.*]]
+// CHECK3-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP7]] to i1
+// CHECK3-NEXT:    br i1 [[LOADEDV]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE6:%.*]]
 // CHECK3:       omp_if.then:
 // CHECK3-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
 // CHECK3:       omp.inner.for.cond:
 // CHECK3-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35:![0-9]+]]
 // CHECK3-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]]
-// CHECK3-NEXT:    [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
-// CHECK3-NEXT:    br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// CHECK3-NEXT:    [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
+// CHECK3-NEXT:    br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
 // CHECK3:       omp.inner.for.body:
 // CHECK3-NEXT:    [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP35]]
 // CHECK3-NEXT:    [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
 // CHECK3-NEXT:    [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]]
 // CHECK3-NEXT:    [[TMP13:%.*]] = zext i32 [[TMP12]] to i64
 // CHECK3-NEXT:    [[TMP14:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP35]]
-// CHECK3-NEXT:    [[TOBOOL3:%.*]] = trunc i8 [[TMP14]] to i1
-// CHECK3-NEXT:    [[FROMBOOL4:%.*]] = zext i1 [[TOBOOL3]] to i8
-// CHECK3-NEXT:    store i8 [[FROMBOOL4]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1, !llvm.access.group [[ACC_GRP35]]
+// CHECK3-NEXT:    [[LOADEDV2:%.*]] = trunc i8 [[TMP14]] to i1
+// CHECK3-NEXT:    [[STOREDV3:%.*]] = zext i1 [[LOADEDV2]] to i8
+// CHECK3-NEXT:    store i8 [[STOREDV3]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1, !llvm.access.group [[ACC_GRP35]]
 // CHECK3-NEXT:    [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group [[ACC_GRP35]]
 // CHECK3-NEXT:    [[TMP16:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP35]]
-// CHECK3-NEXT:    [[TOBOOL5:%.*]] = trunc i8 [[TMP16]] to i1
-// CHECK3-NEXT:    br i1 [[TOBOOL5]], label [[OMP_IF_THEN6:%.*]], label [[OMP_IF_ELSE:%.*]]
-// CHECK3:       omp_if.then6:
+// CHECK3-NEXT:    [[LOADEDV4:%.*]] = trunc i8 [[TMP16]] to i1
+// CHECK3-NEXT:    br i1 [[LOADEDV4]], label [[OMP_IF_THEN5:%.*]], label [[OMP_IF_ELSE:%.*]]
+// CHECK3:       omp_if.then5:
 // CHECK3-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l99.omp_outlined.omp_outlined, i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]]), !llvm.access.group [[ACC_GRP35]]
 // CHECK3-NEXT:    br label [[OMP_IF_END:%.*]]
 // CHECK3:       omp_if.else:
@@ -2639,48 +2639,48 @@ int main() {
 // CHECK3-NEXT:    store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]]
 // CHECK3-NEXT:    br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]]
 // CHECK3:       omp.inner.for.end:
-// CHECK3-NEXT:    br label [[OMP_IF_END22:%.*]]
-// CHECK3:       omp_if.else7:
-// CHECK3-NEXT:    br label [[OMP_INNER_FOR_COND8:%.*]]
-// CHECK3:       omp.inner.for.cond8:
+// CHECK3-NEXT:    br label [[OMP_IF_END21:%.*]]
+// CHECK3:       omp_if.else6:
+// CHECK3-NEXT:    br label [[OMP_INNER_FOR_COND7:%.*]]
+// CHECK3:       omp.inner.for.cond7:
 // CHECK3-NEXT:    [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
 // CHECK3-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK3-NEXT:    [[CMP9:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]]
-// CHECK3-NEXT:    br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY10:%.*]], label [[OMP_INNER_FOR_END21:%.*]]
-// CHECK3:       omp.inner.for.body10:
+// CHECK3-NEXT:    [[CMP8:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]]
+// CHECK3-NEXT:    br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END20:%.*]]
+// CHECK3:       omp.inner.for.body9:
 // CHECK3-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
 // CHECK3-NEXT:    [[TMP23:%.*]] = zext i32 [[TMP22]] to i64
 // CHECK3-NEXT:    [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
 // CHECK3-NEXT:    [[TMP25:%.*]] = zext i32 [[TMP24]] to i64
 // CHECK3-NEXT:    [[TMP26:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1
-// CHECK3-NEXT:    [[TOBOOL11:%.*]] = trunc i8 [[TMP26]] to i1
-// CHECK3-NEXT:    [[FROMBOOL13:%.*]] = zext i1 [[TOBOOL11]] to i8
-// CHECK3-NEXT:    store i8 [[FROMBOOL13]], ptr [[DOTCAPTURE_EXPR__CASTED12]], align 1
-// CHECK3-NEXT:    [[TMP27:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED12]], align 8
+// CHECK3-NEXT:    [[LOADEDV10:%.*]] = trunc i8 [[TMP26]] to i1
+// CHECK3-NEXT:    [[STOREDV12:%.*]] = zext i1 [[LOADEDV10]] to i8
+// CHECK3-NEXT:    store i8 [[STOREDV12]], ptr [[DOTCAPTURE_EXPR__CASTED11]], align 1
+// CHECK3-NEXT:    [[TMP27:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED11]], align 8
 // CHECK3-NEXT:    [[TMP28:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1
-// CHECK3-NEXT:    [[TOBOOL14:%.*]] = trunc i8 [[TMP28]] to i1
-// CHECK3-NEXT:    br i1 [[TOBOOL14]], label [[OMP_IF_THEN15:%.*]], label [[OMP_IF_ELSE16:%.*]]
-// CHECK3:       omp_if.then15:
+// CHECK3-NEXT:    [[LOADEDV13:%.*]] = trunc i8 [[TMP28]] to i1
+// CHECK3-NEXT:    br i1 [[LOADEDV13]], label [[OMP_IF_THEN14:%.*]], label [[OMP_IF_ELSE15:%.*]]
+// CHECK3:       omp_if.then14:
 // CHECK3-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l99.omp_outlined.omp_outlined.1, i64 [[TMP23]], i64 [[TMP25]], i64 [[TMP27]])
-// CHECK3-NEXT:    br label [[OMP_IF_END18:%.*]]
-// CHECK3:       omp_if.else16:
+// CHECK3-NEXT:    br label [[OMP_IF_END17:%.*]]
+// CHECK3:       omp_if.else15:
 // CHECK3-NEXT:    call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP3]])
 // CHECK3-NEXT:    [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK3-NEXT:    store i32 0, ptr [[DOTBOUND_ZERO_ADDR17]], align 4
-// CHECK3-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l99.omp_outlined.omp_outlined.1(ptr [[TMP29]], ptr [[DOTBOUND_ZERO_ADDR17]], i64 [[TMP23]], i64 [[TMP25]], i64 [[TMP27]]) #[[ATTR2]]
+// CHECK3-NEXT:    store i32 0, ptr [[DOTBOUND_ZERO_ADDR16]], align 4
+// CHECK3-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l99.omp_outlined.omp_outlined.1(ptr [[TMP29]], ptr [[DOTBOUND_ZERO_ADDR16]], i64 [[TMP23]], i64 [[TMP25]], i64 [[TMP27]]) #[[ATTR2]]
 // CHECK3-NEXT:    call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP3]])
-// CHECK3-NEXT:    br label [[OMP_IF_END18]]
-// CHECK3:       omp_if.end18:
-// CHECK3-NEXT:    br label [[OMP_INNER_FOR_INC19:%.*]]
-// CHECK3:       omp.inner.for.inc19:
+// CHECK3-NEXT:    br label [[OMP_IF_END17]]
+// CHECK3:       omp_if.end17:
+// CHECK3-NEXT:    br label [[OMP_INNER_FOR_INC18:%.*]]
+// CHECK3:       omp.inner.for.inc18:
 // CHECK3-NEXT:    [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
 // CHECK3-NEXT:    [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK3-NEXT:    [[ADD20:%.*]] = add nsw i32 [[TMP30]], [[TMP31]]
-// CHECK3-NEXT:    store i32 [[ADD20]], ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT:    br label [[OMP_INNER_FOR_COND8]], !llvm.loop [[LOOP38:![0-9]+]]
-// CHECK3:       omp.inner.for.end21:
-// CHECK3-NEXT:    br label [[OMP_IF_END22]]
-// CHECK3:       omp_if.end22:
+// CHECK3-NEXT:    [[ADD19:%.*]] = add nsw i32 [[TMP30]], [[TMP31]]
+// CHECK3-NEXT:    store i32 [[ADD19]], ptr [[DOTOMP_IV]], align 4
+// CHECK3-NEXT:    br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP38:![0-9]+]]
+// CHECK3:       omp.inner.for.end20:
+// CHECK3-NEXT:    br label [[OMP_IF_END21]]
+// CHECK3:       omp_if.end21:
 // CHECK3-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK3:       omp.loop.exit:
 // CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
@@ -2725,8 +2725,8 @@ int main() {
 // CHECK3-NEXT:    store i32 1, ptr [[DOTOMP_STRIDE]], align 4
 // CHECK3-NEXT:    store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK3-NEXT:    [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
-// CHECK3-NEXT:    [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1
-// CHECK3-NEXT:    br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
+// CHECK3-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
+// CHECK3-NEXT:    br i1 [[LOADEDV]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
 // CHECK3:       omp_if.then:
 // CHECK3-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK3-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
@@ -2852,8 +2852,8 @@ int main() {
 // CHECK3-NEXT:    store i32 1, ptr [[DOTOMP_STRIDE]], align 4
 // CHECK3-NEXT:    store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK3-NEXT:    [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
-// CHECK3-NEXT:    [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1
-// CHECK3-NEXT:    br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
+// CHECK3-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
+// CHECK3-NEXT:    br i1 [[LOADEDV]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
 // CHECK3:       omp_if.then:
 // CHECK3-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK3-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
@@ -3969,60 +3969,60 @@ int main() {
 // CHECK7-NEXT:    store i32 100, ptr [[I6]], align 4
 // CHECK7-NEXT:    [[TMP10:%.*]] = load i32, ptr @Arg, align 4
 // CHECK7-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP10]], 0
-// CHECK7-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
-// CHECK7-NEXT:    store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1
+// CHECK7-NEXT:    [[STOREDV:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK7-NEXT:    store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR_]], align 1
 // CHECK7-NEXT:    store i32 0, ptr [[DOTOMP_LB17]], align 4
 // CHECK7-NEXT:    store i32 99, ptr [[DOTOMP_UB18]], align 4
 // CHECK7-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB17]], align 4
 // CHECK7-NEXT:    store i32 [[TMP11]], ptr [[DOTOMP_IV19]], align 4
 // CHECK7-NEXT:    [[TMP12:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1
-// CHECK7-NEXT:    [[TOBOOL21:%.*]] = trunc i8 [[TMP12]] to i1
-// CHECK7-NEXT:    br i1 [[TOBOOL21]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
+// CHECK7-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP12]] to i1
+// CHECK7-NEXT:    br i1 [[LOADEDV]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
 // CHECK7:       omp_if.then:
-// CHECK7-NEXT:    br label [[OMP_INNER_FOR_COND22:%.*]]
-// CHECK7:       omp.inner.for.cond22:
+// CHECK7-NEXT:    br label [[OMP_INNER_FOR_COND21:%.*]]
+// CHECK7:       omp.inner.for.cond21:
 // CHECK7-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV19]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]]
 // CHECK7-NEXT:    [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB18]], align 4, !llvm.access.group [[ACC_GRP14]]
-// CHECK7-NEXT:    [[CMP23:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]]
-// CHECK7-NEXT:    br i1 [[CMP23]], label [[OMP_INNER_FOR_BODY24:%.*]], label [[OMP_INNER_FOR_END30:%.*]]
-// CHECK7:       omp.inner.for.body24:
+// CHECK7-NEXT:    [[CMP22:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]]
+// CHECK7-NEXT:    br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]]
+// CHECK7:       omp.inner.for.body23:
 // CHECK7-NEXT:    [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV19]], align 4, !llvm.access.group [[ACC_GRP14]]
-// CHECK7-NEXT:    [[MUL25:%.*]] = mul nsw i32 [[TMP15]], 1
-// CHECK7-NEXT:    [[ADD26:%.*]] = add nsw i32 0, [[MUL25]]
-// CHECK7-NEXT:    store i32 [[ADD26]], ptr [[I20]], align 4, !llvm.access.group [[ACC_GRP14]]
+// CHECK7-NEXT:    [[MUL24:%.*]] = mul nsw i32 [[TMP15]], 1
+// CHECK7-NEXT:    [[ADD25:%.*]] = add nsw i32 0, [[MUL24]]
+// CHECK7-NEXT:    store i32 [[ADD25]], ptr [[I20]], align 4, !llvm.access.group [[ACC_GRP14]]
 // CHECK7-NEXT:    call void @_Z3fn6v(), !llvm.access.group [[ACC_GRP14]]
-// CHECK7-NEXT:    br label [[OMP_BODY_CONTINUE27:%.*]]
-// CHECK7:       omp.body.continue27:
-// CHECK7-NEXT:    br label [[OMP_INNER_FOR_INC28:%.*]]
-// CHECK7:       omp.inner.for.inc28:
+// CHECK7-NEXT:    br label [[OMP_BODY_CONTINUE26:%.*]]
+// CHECK7:       omp.body.continue26:
+// CHECK7-NEXT:    br label [[OMP_INNER_FOR_INC27:%.*]]
+// CHECK7:       omp.inner.for.inc27:
 // CHECK7-NEXT:    [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV19]], align 4, !llvm.access.group [[ACC_GRP14]]
-// CHECK7-NEXT:    [[ADD29:%.*]] = add nsw i32 [[TMP16]], 1
-// CHECK7-NEXT:    store i32 [[ADD29]], ptr [[DOTOMP_IV19]], align 4, !llvm.access.group [[ACC_GRP14]]
-// CHECK7-NEXT:    br label [[OMP_INNER_FOR_COND22]], !llvm.loop [[LOOP15:![0-9]+]]
-// CHECK7:       omp.inner.for.end30:
+// CHECK7-NEXT:    [[ADD28:%.*]] = add nsw i32 [[TMP16]], 1
+// CHECK7-NEXT:    store i32 [[ADD28]], ptr [[DOTOMP_IV19]], align 4, !llvm.access.group [[ACC_GRP14]]
+// CHECK7-NEXT:    br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP15:![0-9]+]]
+// CHECK7:       omp.inner.for.end29:
 // CHECK7-NEXT:    br label [[OMP_IF_END:%.*]]
 // CHECK7:       omp_if.else:
-// CHECK7-NEXT:    br label [[OMP_INNER_FOR_COND31:%.*]]
-// CHECK7:       omp.inner.for.cond31:
+// CHECK7-NEXT:    br label [[OMP_INNER_FOR_COND30:%.*]]
+// CHECK7:       omp.inner.for.cond30:
 // CHECK7-NEXT:    [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV19]], align 4
 // CHECK7-NEXT:    [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB18]], align 4
-// CHECK7-NEXT:    [[CMP32:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]]
-// CHECK7-NEXT:    br i1 [[CMP32]], label [[OMP_INNER_FOR_BODY33:%.*]], label [[OMP_INNER_FOR_END39:%.*]]
-// CHECK7:       omp.inner.for.body33:
+// CHECK7-NEXT:    [[CMP31:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]]
+// CHECK7-NEXT:    br i1 [[CMP31]], label [[OMP_INNER_FOR_BODY32:%.*]], label [[OMP_INNER_FOR_END38:%.*]]
+// CHECK7:       omp.inner.for.body32:
 // CHECK7-NEXT:    [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV19]], align 4
-// CHECK7-NEXT:    [[MUL34:%.*]] = mul nsw i32 [[TMP19]], 1
-// CHECK7-NEXT:    [[ADD35:%.*]] = add nsw i32 0, [[MUL34]]
-// CHECK7-NEXT:    store i32 [[ADD35]], ptr [[I20]], align 4
+// CHECK7-NEXT:    [[MUL33:%.*]] = mul nsw i32 [[TMP19]], 1
+// CHECK7-NEXT:    [[ADD34:%.*]] = add nsw i32 0, [[MUL33]]
+// CHECK7-NEXT:    store i32 [[ADD34]], ptr [[I20]], align 4
 // CHECK7-NEXT:    call void @_Z3fn6v()
-// CHECK7-NEXT:    br label [[OMP_BODY_CONTINUE36:%.*]]
-// CHECK7:       omp.body.continue36:
-// CHECK7-NEXT:    br label [[OMP_INNER_FOR_INC37:%.*]]
-// CHECK7:       omp.inner.for.inc37:
+// CHECK7-NEXT:    br label [[OMP_BODY_CONTINUE35:%.*]]
+// CHECK7:       omp.body.continue35:
+// CHECK7-NEXT:    br label [[OMP_INNER_FOR_INC36:%.*]]
+// CHECK7:       omp.inner.for.inc36:
 // CHECK7-NEXT:    [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV19]], align 4
-// CHECK7-NEXT:    [[ADD38:%.*]] = add nsw i32 [[TMP20]], 1
-// CHECK7-NEXT:    store i32 [[ADD38]], ptr [[DOTOMP_IV19]], align 4
-// CHECK7-NEXT:    br label [[OMP_INNER_FOR_COND31]], !llvm.loop [[LOOP17:![0-9]+]]
-// CHECK7:       omp.inner.for.end39:
+// CHECK7-NEXT:    [[ADD37:%.*]] = add nsw i32 [[TMP20]], 1
+// CHECK7-NEXT:    store i32 [[ADD37]], ptr [[DOTOMP_IV19]], align 4
+// CHECK7-NEXT:    br label [[OMP_INNER_FOR_COND30]], !llvm.loop [[LOOP17:![0-9]+]]
+// CHECK7:       omp.inner.for.end38:
 // CHECK7-NEXT:    br label [[OMP_IF_END]]
 // CHECK7:       omp_if.end:
 // CHECK7-NEXT:    store i32 100, ptr [[I20]], align 4
@@ -6582,16 +6582,16 @@ int main() {
 // CHECK11-NEXT:    [[I:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8
 // CHECK11-NEXT:    [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK11-NEXT:    [[DOTCAPTURE_EXPR__CASTED12:%.*]] = alloca i64, align 8
-// CHECK11-NEXT:    [[DOTBOUND_ZERO_ADDR17:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[DOTCAPTURE_EXPR__CASTED11:%.*]] = alloca i64, align 8
+// CHECK11-NEXT:    [[DOTBOUND_ZERO_ADDR16:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK11-NEXT:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
 // CHECK11-NEXT:    store ptr [[ARG]], ptr [[ARG_ADDR]], align 8
 // CHECK11-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[ARG_ADDR]], align 8
 // CHECK11-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
 // CHECK11-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0
-// CHECK11-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
-// CHECK11-NEXT:    store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1
+// CHECK11-NEXT:    [[STOREDV:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK11-NEXT:    store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR_]], align 1
 // CHECK11-NEXT:    store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
 // CHECK11-NEXT:    store i32 99, ptr [[DOTOMP_COMB_UB]], align 4
 // CHECK11-NEXT:    store i32 1, ptr [[DOTOMP_STRIDE]], align 4
@@ -6613,29 +6613,29 @@ int main() {
 // CHECK11-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
 // CHECK11-NEXT:    store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4
 // CHECK11-NEXT:    [[TMP7:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1
-// CHECK11-NEXT:    [[TOBOOL1:%.*]] = trunc i8 [[TMP7]] to i1
-// CHECK11-NEXT:    br i1 [[TOBOOL1]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE7:%.*]]
+// CHECK11-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP7]] to i1
+// CHECK11-NEXT:    br i1 [[LOADEDV]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE6:%.*]]
 // CHECK11:       omp_if.then:
 // CHECK11-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
 // CHECK11:       omp.inner.for.cond:
 // CHECK11-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35:![0-9]+]]
 // CHECK11-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]]
-// CHECK11-NEXT:    [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
-// CHECK11-NEXT:    br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// CHECK11-NEXT:    [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
+// CHECK11-NEXT:    br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
 // CHECK11:       omp.inner.for.body:
 // CHECK11-NEXT:    [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP35]]
 // CHECK11-NEXT:    [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
 // CHECK11-NEXT:    [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]]
 // CHECK11-NEXT:    [[TMP13:%.*]] = zext i32 [[TMP12]] to i64
 // CHECK11-NEXT:    [[TMP14:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP35]]
-// CHECK11-NEXT:    [[TOBOOL3:%.*]] = trunc i8 [[TMP14]] to i1
-// CHECK11-NEXT:    [[FROMBOOL4:%.*]] = zext i1 [[TOBOOL3]] to i8
-// CHECK11-NEXT:    store i8 [[FROMBOOL4]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1, !llvm.access.group [[ACC_GRP35]]
+// CHECK11-NEXT:    [[LOADEDV2:%.*]] = trunc i8 [[TMP14]] to i1
+// CHECK11-NEXT:    [[STOREDV3:%.*]] = zext i1 [[LOADEDV2]] to i8
+// CHECK11-NEXT:    store i8 [[STOREDV3]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1, !llvm.access.group [[ACC_GRP35]]
 // CHECK11-NEXT:    [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group [[ACC_GRP35]]
 // CHECK11-NEXT:    [[TMP16:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP35]]
-// CHECK11-NEXT:    [[TOBOOL5:%.*]] = trunc i8 [[TMP16]] to i1
-// CHECK11-NEXT:    br i1 [[TOBOOL5]], label [[OMP_IF_THEN6:%.*]], label [[OMP_IF_ELSE:%.*]]
-// CHECK11:       omp_if.then6:
+// CHECK11-NEXT:    [[LOADEDV4:%.*]] = trunc i8 [[TMP16]] to i1
+// CHECK11-NEXT:    br i1 [[LOADEDV4]], label [[OMP_IF_THEN5:%.*]], label [[OMP_IF_ELSE:%.*]]
+// CHECK11:       omp_if.then5:
 // CHECK11-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l99.omp_outlined.omp_outlined, i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]]), !llvm.access.group [[ACC_GRP35]]
 // CHECK11-NEXT:    br label [[OMP_IF_END:%.*]]
 // CHECK11:       omp_if.else:
@@ -6654,48 +6654,48 @@ int main() {
 // CHECK11-NEXT:    store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]]
 // CHECK11-NEXT:    br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]]
 // CHECK11:       omp.inner.for.end:
-// CHECK11-NEXT:    br label [[OMP_IF_END22:%.*]]
-// CHECK11:       omp_if.else7:
-// CHECK11-NEXT:    br label [[OMP_INNER_FOR_COND8:%.*]]
-// CHECK11:       omp.inner.for.cond8:
+// CHECK11-NEXT:    br label [[OMP_IF_END21:%.*]]
+// CHECK11:       omp_if.else6:
+// CHECK11-NEXT:    br label [[OMP_INNER_FOR_COND7:%.*]]
+// CHECK11:       omp.inner.for.cond7:
 // CHECK11-NEXT:    [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
 // CHECK11-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK11-NEXT:    [[CMP9:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]]
-// CHECK11-NEXT:    br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY10:%.*]], label [[OMP_INNER_FOR_END21:%.*]]
-// CHECK11:       omp.inner.for.body10:
+// CHECK11-NEXT:    [[CMP8:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]]
+// CHECK11-NEXT:    br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END20:%.*]]
+// CHECK11:       omp.inner.for.body9:
 // CHECK11-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
 // CHECK11-NEXT:    [[TMP23:%.*]] = zext i32 [[TMP22]] to i64
 // CHECK11-NEXT:    [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
 // CHECK11-NEXT:    [[TMP25:%.*]] = zext i32 [[TMP24]] to i64
 // CHECK11-NEXT:    [[TMP26:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1
-// CHECK11-NEXT:    [[TOBOOL11:%.*]] = trunc i8 [[TMP26]] to i1
-// CHECK11-NEXT:    [[FROMBOOL13:%.*]] = zext i1 [[TOBOOL11]] to i8
-// CHECK11-NEXT:    store i8 [[FROMBOOL13]], ptr [[DOTCAPTURE_EXPR__CASTED12]], align 1
-// CHECK11-NEXT:    [[TMP27:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED12]], align 8
+// CHECK11-NEXT:    [[LOADEDV10:%.*]] = trunc i8 [[TMP26]] to i1
+// CHECK11-NEXT:    [[STOREDV12:%.*]] = zext i1 [[LOADEDV10]] to i8
+// CHECK11-NEXT:    store i8 [[STOREDV12]], ptr [[DOTCAPTURE_EXPR__CASTED11]], align 1
+// CHECK11-NEXT:    [[TMP27:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED11]], align 8
 // CHECK11-NEXT:    [[TMP28:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1
-// CHECK11-NEXT:    [[TOBOOL14:%.*]] = trunc i8 [[TMP28]] to i1
-// CHECK11-NEXT:    br i1 [[TOBOOL14]], label [[OMP_IF_THEN15:%.*]], label [[OMP_IF_ELSE16:%.*]]
-// CHECK11:       omp_if.then15:
+// CHECK11-NEXT:    [[LOADEDV13:%.*]] = trunc i8 [[TMP28]] to i1
+// CHECK11-NEXT:    br i1 [[LOADEDV13]], label [[OMP_IF_THEN14:%.*]], label [[OMP_IF_ELSE15:%.*]]
+// CHECK11:       omp_if.then14:
 // CHECK11-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l99.omp_outlined.omp_outlined.1, i64 [[TMP23]], i64 [[TMP25]], i64 [[TMP27]])
-// CHECK11-NEXT:    br label [[OMP_IF_END18:%.*]]
-// CHECK11:       omp_if.else16:
+// CHECK11-NEXT:    br label [[OMP_IF_END17:%.*]]
+// CHECK11:       omp_if.else15:
 // CHECK11-NEXT:    call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP3]])
 // CHECK11-NEXT:    [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK11-NEXT:    store i32 0, ptr [[DOTBOUND_ZERO_ADDR17]], align 4
-// CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l99.omp_outlined.omp_outlined.1(ptr [[TMP29]], ptr [[DOTBOUND_ZERO_ADDR17]], i64 [[TMP23]], i64 [[TMP25]], i64 [[TMP27]]) #[[ATTR2]]
+// CHECK11-NEXT:    store i32 0, ptr [[DOTBOUND_ZERO_ADDR16]], align 4
+// CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l99.omp_outlined.omp_outlined.1(ptr [[TMP29]], ptr [[DOTBOUND_ZERO_ADDR16]], i64 [[TMP23]], i64 [[TMP25]], i64 [[TMP27]]) #[[ATTR2]]
 // CHECK11-NEXT:    call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP3]])
-// CHECK11-NEXT:    br label [[OMP_IF_END18]]
-// CHECK11:       omp_if.end18:
-// CHECK11-NEXT:    br label [[OMP_INNER_FOR_INC19:%.*]]
-// CHECK11:       omp.inner.for.inc19:
+// CHECK11-NEXT:    br label [[OMP_IF_END17]]
+// CHECK11:       omp_if.end17:
+// CHECK11-NEXT:    br label [[OMP_INNER_FOR_INC18:%.*]]
+// CHECK11:       omp.inner.for.inc18:
 // CHECK11-NEXT:    [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
 // CHECK11-NEXT:    [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK11-NEXT:    [[ADD20:%.*]] = add nsw i32 [[TMP30]], [[TMP31]]
-// CHECK11-NEXT:    store i32 [[ADD20]], ptr [[DOTOMP_IV]], align 4
-// CHECK11-NEXT:    br label [[OMP_INNER_FOR_COND8]], !llvm.loop [[LOOP38:![0-9]+]]
-// CHECK11:       omp.inner.for.end21:
-// CHECK11-NEXT:    br label [[OMP_IF_END22]]
-// CHECK11:       omp_if.end22:
+// CHECK11-NEXT:    [[ADD19:%.*]] = add nsw i32 [[TMP30]], [[TMP31]]
+// CHECK11-NEXT:    store i32 [[ADD19]], ptr [[DOTOMP_IV]], align 4
+// CHECK11-NEXT:    br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP38:![0-9]+]]
+// CHECK11:       omp.inner.for.end20:
+// CHECK11-NEXT:    br label [[OMP_IF_END21]]
+// CHECK11:       omp_if.end21:
 // CHECK11-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 // CHECK11:       omp.loop.exit:
 // CHECK11-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]])
@@ -6740,8 +6740,8 @@ int main() {
 // CHECK11-NEXT:    store i32 1, ptr [[DOTOMP_STRIDE]], align 4
 // CHECK11-NEXT:    store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK11-NEXT:    [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
-// CHECK11-NEXT:    [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1
-// CHECK11-NEXT:    br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
+// CHECK11-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
+// CHECK11-NEXT:    br i1 [[LOADEDV]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
 // CHECK11:       omp_if.then:
 // CHECK11-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK11-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
@@ -6867,8 +6867,8 @@ int main() {
 // CHECK11-NEXT:    store i32 1, ptr [[DOTOMP_STRIDE]], align 4
 // CHECK11-NEXT:    store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK11-NEXT:    [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
-// CHECK11-NEXT:    [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1
-// CHECK11-NEXT:    br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
+// CHECK11-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
+// CHECK11-NEXT:    br i1 [[LOADEDV]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
 // CHECK11:       omp_if.then:
 // CHECK11-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK11-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
@@ -7984,60 +7984,60 @@ int main() {
 // CHECK15-NEXT:    store i32 100, ptr [[I6]], align 4
 // CHECK15-NEXT:    [[TMP10:%.*]] = load i32, ptr @Arg, align 4
 // CHECK15-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP10]], 0
-// CHECK15-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
-// CHECK15-NEXT:    store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1
+// CHECK15-NEXT:    [[STOREDV:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK15-NEXT:    store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR_]], align 1
 // CHECK15-NEXT:    store i32 0, ptr [[DOTOMP_LB17]], align 4
 // CHECK15-NEXT:    store i32 99, ptr [[DOTOMP_UB18]], align 4
 // CHECK15-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB17]], align 4
 // CHECK15-NEXT:    store i32 [[TMP11]], ptr [[DOTOMP_IV19]], align 4
 // CHECK15-NEXT:    [[TMP12:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1
-// CHECK15-NEXT:    [[TOBOOL21:%.*]] = trunc i8 [[TMP12]] to i1
-// CHECK15-NEXT:    br i1 [[TOBOOL21]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
+// CHECK15-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP12]] to i1
+// CHECK15-NEXT:    br i1 [[LOADEDV]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
 // CHECK15:       omp_if.then:
-// CHECK15-NEXT:    br label [[OMP_INNER_FOR_COND22:%.*]]
-// CHECK15:       omp.inner.for.cond22:
+// CHECK15-NEXT:    br label [[OMP_INNER_FOR_COND21:%.*]]
+// CHECK15:       omp.inner.for.cond21:
 // CHECK15-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV19]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]]
 // CHECK15-NEXT:    [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB18]], align 4, !llvm.access.group [[ACC_GRP14]]
-// CHECK15-NEXT:    [[CMP23:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]]
-// CHECK15-NEXT:    br i1 [[CMP23]], label [[OMP_INNER_FOR_BODY24:%.*]], label [[OMP_INNER_FOR_END30:%.*]]
-// CHECK15:       omp.inner.for.body24:
+// CHECK15-NEXT:    [[CMP22:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]]
+// CHECK15-NEXT:    br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]]
+// CHECK15:       omp.inner.for.body23:
 // CHECK15-NEXT:    [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV19]], align 4, !llvm.access.group [[ACC_GRP14]]
-// CHECK15-NEXT:    [[MUL25:%.*]] = mul nsw i32 [[TMP15]], 1
-// CHECK15-NEXT:    [[ADD26:%.*]] = add nsw i32 0, [[MUL25]]
-// CHECK15-NEXT:    store i32 [[ADD26]], ptr [[I20]], align 4, !llvm.access.group [[ACC_GRP14]]
+// CHECK15-NEXT:    [[MUL24:%.*]] = mul nsw i32 [[TMP15]], 1
+// CHECK15-NEXT:    [[ADD25:%.*]] = add nsw i32 0, [[MUL24]]
+// CHECK15-NEXT:    store i32 [[ADD25]], ptr [[I20]], align 4, !llvm.access.group [[ACC_GRP14]]
 // CHECK15-NEXT:    call void @_Z3fn6v(), !llvm.access.group [[ACC_GRP14]]
-// CHECK15-NEXT:    br label [[OMP_BODY_CONTINUE27:%.*]]
-// CHECK15:       omp.body.continue27:
-// CHECK15-NEXT:    br label [[OMP_INNER_FOR_INC28:%.*]]
-// CHECK15:       omp.inner.for.inc28:
+// CHECK15-NEXT:    br label [[OMP_BODY_CONTINUE26:%.*]]
+// CHECK15:       omp.body.continue26:
+// CHECK15-NEXT:    br label [[OMP_INNER_FOR_INC27:%.*]]
+// CHECK15:       omp.inner.for.inc27:
 // CHECK15-NEXT:    [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV19]], align 4, !llvm.access.group [[ACC_GRP14]]
-// CHECK15-NEXT:    [[ADD29:%.*]] = add nsw i32 [[TMP16]], 1
-// CHECK15-NEXT:    store i32 [[ADD29]], ptr [[DOTOMP_IV19]], align 4, !llvm.access.group [[ACC_GRP14]]
-// CHECK15-NEXT:    br label [[OMP_INNER_FOR_COND22]], !llvm.loop [[LOOP15:![0-9]+]]
-// CHECK15:       omp.inner.for.end30:
+// CHECK15-NEXT:    [[ADD28:%.*]] = add nsw i32 [[TMP16]], 1
+// CHECK15-NEXT:    store i32 [[ADD28]], ptr [[DOTOMP_IV19]], align 4, !llvm.access.group [[ACC_GRP14]]
+// CHECK15-NEXT:    br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP15:![0-9]+]]
+// CHECK15:       omp.inner.for.end29:
 // CHECK15-NEXT:    br label [[OMP_IF_END:%.*]]
 // CHECK15:       omp_if.else:
-// CHECK15-NEXT:    br label [[OMP_INNER_FOR_COND31:%.*]]
-// CHECK15:       omp.inner.for.cond31:
+// CHECK15-NEXT:    br label [[OMP_INNER_FOR_COND30:%.*]]
+// CHECK15:       omp.inner.for.cond30:
 // CHECK15-NEXT:    [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV19]], align 4
 // CHECK15-NEXT:    [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB18]], align 4
-// CHECK15-NEXT:    [[CMP32:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]]
-// CHECK15-NEXT:    br i1 [[CMP32]], label [[OMP_INNER_FOR_BODY33:%.*]], label [[OMP_INNER_FOR_END39:%.*]]
-// CHECK15:       omp.inner.for.body33:
+// CHECK15-NEXT:    [[CMP31:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]]
+// CHECK15-NEXT:    br i1 [[CMP31]], label [[OMP_INNER_FOR_BODY32:%.*]], label [[OMP_INNER_FOR_END38:%.*]]
+// CHECK15:       omp.inner.for.body32:
 // CHECK15-NEXT:    [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV19]], align 4
-// CHECK15-NEXT:    [[MUL34:%.*]] = mul nsw i32 [[TMP19]], 1
-// CHECK15-NEXT:    [[ADD35:%.*]] = add nsw i32 0, [[MUL34]]
-// CHECK15-NEXT:    store i32 [[ADD35]], ptr [[I20]], align 4
+// CHECK15-NEXT:    [[MUL33:%.*]] = mul nsw i32 [[TMP19]], 1
+// CHECK15-NEXT:    [[ADD34:%.*]] = add nsw i32 0, [[MUL33]]
+// CHECK15-NEXT:    store i32 [[ADD34]], ptr [[I20]], align 4
 // CHECK15-NEXT:    call void @_Z3fn6v()
-// CHECK15-NEXT:    br label [[OMP_BODY_CONTINUE36:%.*]]
-// CHECK15:       omp.body.continue36:
-// CHECK15-NEXT:    br label [[OMP_INNER_FOR_INC37:%.*]]
-// CHECK15:       omp.inner.for.inc37:
+// CHECK15-NEXT:    br label [[OMP_BODY_CONTINUE35:%.*]]
+// CHECK15:       omp.body.continue35:
+// CHECK15-NEXT:    br label [[OMP_INNER_FOR_INC36:%.*]]
+// CHECK15:       omp.inner.for.inc36:
 // CHECK15-NEXT:    [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV19]], align 4
-// CHECK15-NEXT:    [[ADD38:%.*]] = add nsw i32 [[TMP20]], 1
-// CHECK15-NEXT:    store i32 [[ADD38]], ptr [[DOTOMP_IV19]], align 4
-// CHECK15-NEXT:    br label [[OMP_INNER_FOR_COND31]], !llvm.loop [[LOOP17:![0-9]+]]
-// CHECK15:       omp.inner.for.end39:
+// CHECK15-NEXT:    [[ADD37:%.*]] = add nsw i32 [[TMP20]], 1
+// CHECK15-NEXT:    store i32 [[ADD37]], ptr [[DOTOMP_IV19]], align 4
+// CHECK15-NEXT:    br label [[OMP_INNER_FOR_COND30]], !llvm.loop [[LOOP17:![0-9]+]]
+// CHECK15:       omp.inner.for.end38:
 // CHECK15-NEXT:    br label [[OMP_IF_END]]
 // CHECK15:       omp_if.end:
 // CHECK15-NEXT:    store i32 100, ptr [[I20]], align 4
diff --git a/clang/test/OpenMP/parallel_master_taskloop_simd_codegen.cpp b/clang/test/OpenMP/parallel_master_taskloop_simd_codegen.cpp
index 0f43f0ac71704..6387946fcf544 100644
--- a/clang/test/OpenMP/parallel_master_taskloop_simd_codegen.cpp
+++ b/clang/test/OpenMP/parallel_master_taskloop_simd_codegen.cpp
@@ -66,8 +66,8 @@ struct S {
 // CHECK1-NEXT:    [[I:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_3:%.*]] = alloca i8, align 1
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4
-// CHECK1-NEXT:    [[DOTCAPTURE_EXPR__CASTED6:%.*]] = alloca i64, align 8
-// CHECK1-NEXT:    [[DOTCAPTURE_EXPR__CASTED8:%.*]] = alloca i64, align 8
+// CHECK1-NEXT:    [[DOTCAPTURE_EXPR__CASTED5:%.*]] = alloca i64, align 8
+// CHECK1-NEXT:    [[DOTCAPTURE_EXPR__CASTED7:%.*]] = alloca i64, align 8
 // CHECK1-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
@@ -88,21 +88,21 @@ struct S {
 // CHECK1-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @main.omp_outlined.1, i64 [[TMP6]])
 // CHECK1-NEXT:    [[TMP7:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
 // CHECK1-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0
-// CHECK1-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
-// CHECK1-NEXT:    store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_3]], align 1
+// CHECK1-NEXT:    [[STOREDV:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK1-NEXT:    store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR_3]], align 1
 // CHECK1-NEXT:    [[TMP8:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
 // CHECK1-NEXT:    store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_4]], align 4
 // CHECK1-NEXT:    [[TMP9:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_3]], align 1
-// CHECK1-NEXT:    [[TOBOOL5:%.*]] = trunc i8 [[TMP9]] to i1
-// CHECK1-NEXT:    [[FROMBOOL7:%.*]] = zext i1 [[TOBOOL5]] to i8
-// CHECK1-NEXT:    store i8 [[FROMBOOL7]], ptr [[DOTCAPTURE_EXPR__CASTED6]], align 1
-// CHECK1-NEXT:    [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED6]], align 8
+// CHECK1-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP9]] to i1
+// CHECK1-NEXT:    [[STOREDV6:%.*]] = zext i1 [[LOADEDV]] to i8
+// CHECK1-NEXT:    store i8 [[STOREDV6]], ptr [[DOTCAPTURE_EXPR__CASTED5]], align 1
+// CHECK1-NEXT:    [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED5]], align 8
 // CHECK1-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
-// CHECK1-NEXT:    store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR__CASTED8]], align 4
-// CHECK1-NEXT:    [[TMP12:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED8]], align 8
+// CHECK1-NEXT:    store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR__CASTED7]], align 4
+// CHECK1-NEXT:    [[TMP12:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED7]], align 8
 // CHECK1-NEXT:    [[TMP13:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_3]], align 1
-// CHECK1-NEXT:    [[TOBOOL9:%.*]] = trunc i8 [[TMP13]] to i1
-// CHECK1-NEXT:    br i1 [[TOBOOL9]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
+// CHECK1-NEXT:    [[LOADEDV8:%.*]] = trunc i8 [[TMP13]] to i1
+// CHECK1-NEXT:    br i1 [[LOADEDV8]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
 // CHECK1:       omp_if.then:
 // CHECK1-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 5, ptr @main.omp_outlined.4, ptr [[I]], ptr [[ARGC_ADDR]], ptr [[ARGV_ADDR]], i64 [[TMP10]], i64 [[TMP12]])
 // CHECK1-NEXT:    br label [[OMP_IF_END:%.*]]
@@ -199,34 +199,34 @@ struct S {
 // CHECK1-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]])
 // CHECK1-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]])
 // CHECK1-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]])
-// CHECK1-NEXT:    store i32 [[TMP2]], ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14
-// CHECK1-NEXT:    store ptr [[TMP5]], ptr [[DOTPART_ID__ADDR_I]], align 8, !noalias !14
-// CHECK1-NEXT:    store ptr null, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14
-// CHECK1-NEXT:    store ptr null, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14
-// CHECK1-NEXT:    store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 8, !noalias !14
-// CHECK1-NEXT:    store i64 [[TMP9]], ptr [[DOTLB__ADDR_I]], align 8, !noalias !14
-// CHECK1-NEXT:    store i64 [[TMP11]], ptr [[DOTUB__ADDR_I]], align 8, !noalias !14
-// CHECK1-NEXT:    store i64 [[TMP13]], ptr [[DOTST__ADDR_I]], align 8, !noalias !14
-// CHECK1-NEXT:    store i32 [[TMP15]], ptr [[DOTLITER__ADDR_I]], align 4, !noalias !14
-// CHECK1-NEXT:    store ptr [[TMP17]], ptr [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !14
-// CHECK1-NEXT:    store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !14
-// CHECK1-NEXT:    [[TMP18:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !14
-// CHECK1-NEXT:    [[TMP19:%.*]] = load i64, ptr [[DOTLB__ADDR_I]], align 8, !noalias !14
+// CHECK1-NEXT:    store i32 [[TMP2]], ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias [[META14:![0-9]+]]
+// CHECK1-NEXT:    store ptr [[TMP5]], ptr [[DOTPART_ID__ADDR_I]], align 8, !noalias [[META14]]
+// CHECK1-NEXT:    store ptr null, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias [[META14]]
+// CHECK1-NEXT:    store ptr null, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias [[META14]]
+// CHECK1-NEXT:    store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 8, !noalias [[META14]]
+// CHECK1-NEXT:    store i64 [[TMP9]], ptr [[DOTLB__ADDR_I]], align 8, !noalias [[META14]]
+// CHECK1-NEXT:    store i64 [[TMP11]], ptr [[DOTUB__ADDR_I]], align 8, !noalias [[META14]]
+// CHECK1-NEXT:    store i64 [[TMP13]], ptr [[DOTST__ADDR_I]], align 8, !noalias [[META14]]
+// CHECK1-NEXT:    store i32 [[TMP15]], ptr [[DOTLITER__ADDR_I]], align 4, !noalias [[META14]]
+// CHECK1-NEXT:    store ptr [[TMP17]], ptr [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias [[META14]]
+// CHECK1-NEXT:    store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, !noalias [[META14]]
+// CHECK1-NEXT:    [[TMP18:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias [[META14]]
+// CHECK1-NEXT:    [[TMP19:%.*]] = load i64, ptr [[DOTLB__ADDR_I]], align 8, !noalias [[META14]]
 // CHECK1-NEXT:    [[CONV_I:%.*]] = trunc i64 [[TMP19]] to i32
-// CHECK1-NEXT:    store i32 [[CONV_I]], ptr [[DOTOMP_IV_I]], align 4, !noalias !14
+// CHECK1-NEXT:    store i32 [[CONV_I]], ptr [[DOTOMP_IV_I]], align 4, !noalias [[META14]]
 // CHECK1-NEXT:    br label [[OMP_INNER_FOR_COND_I:%.*]]
 // CHECK1:       omp.inner.for.cond.i:
-// CHECK1-NEXT:    [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV_I]], align 4, !noalias !14
+// CHECK1-NEXT:    [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV_I]], align 4, !noalias [[META14]]
 // CHECK1-NEXT:    [[CONV1_I:%.*]] = sext i32 [[TMP20]] to i64
-// CHECK1-NEXT:    [[TMP21:%.*]] = load i64, ptr [[DOTUB__ADDR_I]], align 8, !noalias !14
+// CHECK1-NEXT:    [[TMP21:%.*]] = load i64, ptr [[DOTUB__ADDR_I]], align 8, !noalias [[META14]]
 // CHECK1-NEXT:    [[CMP_I:%.*]] = icmp ule i64 [[CONV1_I]], [[TMP21]]
 // CHECK1-NEXT:    br i1 [[CMP_I]], label [[OMP_INNER_FOR_BODY_I:%.*]], label [[DOTOMP_OUTLINED__EXIT:%.*]]
 // CHECK1:       omp.inner.for.body.i:
-// CHECK1-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV_I]], align 4, !noalias !14
-// CHECK1-NEXT:    store i32 [[TMP22]], ptr [[I_I]], align 4, !noalias !14
-// CHECK1-NEXT:    [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV_I]], align 4, !noalias !14
+// CHECK1-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV_I]], align 4, !noalias [[META14]]
+// CHECK1-NEXT:    store i32 [[TMP22]], ptr [[I_I]], align 4, !noalias [[META14]]
+// CHECK1-NEXT:    [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV_I]], align 4, !noalias [[META14]]
 // CHECK1-NEXT:    [[ADD2_I:%.*]] = add nsw i32 [[TMP23]], 1
-// CHECK1-NEXT:    store i32 [[ADD2_I]], ptr [[DOTOMP_IV_I]], align 4, !noalias !14
+// CHECK1-NEXT:    store i32 [[ADD2_I]], ptr [[DOTOMP_IV_I]], align 4, !noalias [[META14]]
 // CHECK1-NEXT:    br label [[OMP_INNER_FOR_COND_I]], !llvm.loop [[LOOP15:![0-9]+]]
 // CHECK1:       .omp_outlined..exit:
 // CHECK1-NEXT:    ret i32 0
@@ -310,34 +310,34 @@ struct S {
 // CHECK1-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META25:![0-9]+]])
 // CHECK1-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META27:![0-9]+]])
 // CHECK1-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META29:![0-9]+]])
-// CHECK1-NEXT:    store i32 [[TMP2]], ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !31
-// CHECK1-NEXT:    store ptr [[TMP5]], ptr [[DOTPART_ID__ADDR_I]], align 8, !noalias !31
-// CHECK1-NEXT:    store ptr null, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !31
-// CHECK1-NEXT:    store ptr null, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !31
-// CHECK1-NEXT:    store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 8, !noalias !31
-// CHECK1-NEXT:    store i64 [[TMP9]], ptr [[DOTLB__ADDR_I]], align 8, !noalias !31
-// CHECK1-NEXT:    store i64 [[TMP11]], ptr [[DOTUB__ADDR_I]], align 8, !noalias !31
-// CHECK1-NEXT:    store i64 [[TMP13]], ptr [[DOTST__ADDR_I]], align 8, !noalias !31
-// CHECK1-NEXT:    store i32 [[TMP15]], ptr [[DOTLITER__ADDR_I]], align 4, !noalias !31
-// CHECK1-NEXT:    store ptr [[TMP17]], ptr [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !31
-// CHECK1-NEXT:    store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !31
-// CHECK1-NEXT:    [[TMP18:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !31
-// CHECK1-NEXT:    [[TMP19:%.*]] = load i64, ptr [[DOTLB__ADDR_I]], align 8, !noalias !31
+// CHECK1-NEXT:    store i32 [[TMP2]], ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias [[META31:![0-9]+]]
+// CHECK1-NEXT:    store ptr [[TMP5]], ptr [[DOTPART_ID__ADDR_I]], align 8, !noalias [[META31]]
+// CHECK1-NEXT:    store ptr null, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias [[META31]]
+// CHECK1-NEXT:    store ptr null, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias [[META31]]
+// CHECK1-NEXT:    store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 8, !noalias [[META31]]
+// CHECK1-NEXT:    store i64 [[TMP9]], ptr [[DOTLB__ADDR_I]], align 8, !noalias [[META31]]
+// CHECK1-NEXT:    store i64 [[TMP11]], ptr [[DOTUB__ADDR_I]], align 8, !noalias [[META31]]
+// CHECK1-NEXT:    store i64 [[TMP13]], ptr [[DOTST__ADDR_I]], align 8, !noalias [[META31]]
+// CHECK1-NEXT:    store i32 [[TMP15]], ptr [[DOTLITER__ADDR_I]], align 4, !noalias [[META31]]
+// CHECK1-NEXT:    store ptr [[TMP17]], ptr [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias [[META31]]
+// CHECK1-NEXT:    store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, !noalias [[META31]]
+// CHECK1-NEXT:    [[TMP18:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias [[META31]]
+// CHECK1-NEXT:    [[TMP19:%.*]] = load i64, ptr [[DOTLB__ADDR_I]], align 8, !noalias [[META31]]
 // CHECK1-NEXT:    [[CONV_I:%.*]] = trunc i64 [[TMP19]] to i32
-// CHECK1-NEXT:    store i32 [[CONV_I]], ptr [[DOTOMP_IV_I]], align 4, !noalias !31
+// CHECK1-NEXT:    store i32 [[CONV_I]], ptr [[DOTOMP_IV_I]], align 4, !noalias [[META31]]
 // CHECK1-NEXT:    br label [[OMP_INNER_FOR_COND_I:%.*]]
 // CHECK1:       omp.inner.for.cond.i:
-// CHECK1-NEXT:    [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV_I]], align 4, !noalias !31, !llvm.access.group [[ACC_GRP32:![0-9]+]]
+// CHECK1-NEXT:    [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV_I]], align 4, !noalias [[META31]], !llvm.access.group [[ACC_GRP32:![0-9]+]]
 // CHECK1-NEXT:    [[CONV1_I:%.*]] = sext i32 [[TMP20]] to i64
-// CHECK1-NEXT:    [[TMP21:%.*]] = load i64, ptr [[DOTUB__ADDR_I]], align 8, !noalias !31, !llvm.access.group [[ACC_GRP32]]
+// CHECK1-NEXT:    [[TMP21:%.*]] = load i64, ptr [[DOTUB__ADDR_I]], align 8, !noalias [[META31]], !llvm.access.group [[ACC_GRP32]]
 // CHECK1-NEXT:    [[CMP_I:%.*]] = icmp ule i64 [[CONV1_I]], [[TMP21]]
 // CHECK1-NEXT:    br i1 [[CMP_I]], label [[OMP_INNER_FOR_BODY_I:%.*]], label [[DOTOMP_OUTLINED__2_EXIT:%.*]]
 // CHECK1:       omp.inner.for.body.i:
-// CHECK1-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV_I]], align 4, !noalias !31, !llvm.access.group [[ACC_GRP32]]
-// CHECK1-NEXT:    store i32 [[TMP22]], ptr [[I_I]], align 4, !noalias !31, !llvm.access.group [[ACC_GRP32]]
-// CHECK1-NEXT:    [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV_I]], align 4, !noalias !31, !llvm.access.group [[ACC_GRP32]]
+// CHECK1-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV_I]], align 4, !noalias [[META31]], !llvm.access.group [[ACC_GRP32]]
+// CHECK1-NEXT:    store i32 [[TMP22]], ptr [[I_I]], align 4, !noalias [[META31]], !llvm.access.group [[ACC_GRP32]]
+// CHECK1-NEXT:    [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV_I]], align 4, !noalias [[META31]], !llvm.access.group [[ACC_GRP32]]
 // CHECK1-NEXT:    [[ADD2_I:%.*]] = add nsw i32 [[TMP23]], 1
-// CHECK1-NEXT:    store i32 [[ADD2_I]], ptr [[DOTOMP_IV_I]], align 4, !noalias !31, !llvm.access.group [[ACC_GRP32]]
+// CHECK1-NEXT:    store i32 [[ADD2_I]], ptr [[DOTOMP_IV_I]], align 4, !noalias [[META31]], !llvm.access.group [[ACC_GRP32]]
 // CHECK1-NEXT:    br label [[OMP_INNER_FOR_COND_I]], !llvm.loop [[LOOP33:![0-9]+]]
 // CHECK1:       .omp_outlined..2.exit:
 // CHECK1-NEXT:    ret i32 0
@@ -420,8 +420,8 @@ struct S {
 // CHECK1-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP24]], ptr align 8 [[AGG_CAPTURED]], i64 24, i1 false)
 // CHECK1-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3]], ptr [[TMP21]], i32 0, i32 1
 // CHECK1-NEXT:    [[TMP26:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
-// CHECK1-NEXT:    [[TOBOOL:%.*]] = trunc i8 [[TMP26]] to i1
-// CHECK1-NEXT:    [[TMP27:%.*]] = sext i1 [[TOBOOL]] to i32
+// CHECK1-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP26]] to i1
+// CHECK1-NEXT:    [[TMP27:%.*]] = sext i1 [[LOADEDV]] to i32
 // CHECK1-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP22]], i32 0, i32 5
 // CHECK1-NEXT:    store i64 0, ptr [[TMP28]], align 8
 // CHECK1-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP22]], i32 0, i32 6
@@ -505,31 +505,31 @@ struct S {
 // CHECK1-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META41:![0-9]+]])
 // CHECK1-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META43:![0-9]+]])
 // CHECK1-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META45:![0-9]+]])
-// CHECK1-NEXT:    store i32 [[TMP2]], ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !47
-// CHECK1-NEXT:    store ptr [[TMP5]], ptr [[DOTPART_ID__ADDR_I]], align 8, !noalias !47
-// CHECK1-NEXT:    store ptr [[TMP8]], ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !47
-// CHECK1-NEXT:    store ptr @.omp_task_privates_map., ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !47
-// CHECK1-NEXT:    store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 8, !noalias !47
-// CHECK1-NEXT:    store i64 [[TMP10]], ptr [[DOTLB__ADDR_I]], align 8, !noalias !47
-// CHECK1-NEXT:    store i64 [[TMP12]], ptr [[DOTUB__ADDR_I]], align 8, !noalias !47
-// CHECK1-NEXT:    store i64 [[TMP14]], ptr [[DOTST__ADDR_I]], align 8, !noalias !47
-// CHECK1-NEXT:    store i32 [[TMP16]], ptr [[DOTLITER__ADDR_I]], align 4, !noalias !47
-// CHECK1-NEXT:    store ptr [[TMP18]], ptr [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !47
-// CHECK1-NEXT:    store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !47
-// CHECK1-NEXT:    [[TMP19:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !47
-// CHECK1-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !47
-// CHECK1-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !47
+// CHECK1-NEXT:    store i32 [[TMP2]], ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias [[META47:![0-9]+]]
+// CHECK1-NEXT:    store ptr [[TMP5]], ptr [[DOTPART_ID__ADDR_I]], align 8, !noalias [[META47]]
+// CHECK1-NEXT:    store ptr [[TMP8]], ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias [[META47]]
+// CHECK1-NEXT:    store ptr @.omp_task_privates_map., ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias [[META47]]
+// CHECK1-NEXT:    store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 8, !noalias [[META47]]
+// CHECK1-NEXT:    store i64 [[TMP10]], ptr [[DOTLB__ADDR_I]], align 8, !noalias [[META47]]
+// CHECK1-NEXT:    store i64 [[TMP12]], ptr [[DOTUB__ADDR_I]], align 8, !noalias [[META47]]
+// CHECK1-NEXT:    store i64 [[TMP14]], ptr [[DOTST__ADDR_I]], align 8, !noalias [[META47]]
+// CHECK1-NEXT:    store i32 [[TMP16]], ptr [[DOTLITER__ADDR_I]], align 4, !noalias [[META47]]
+// CHECK1-NEXT:    store ptr [[TMP18]], ptr [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias [[META47]]
+// CHECK1-NEXT:    store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, !noalias [[META47]]
+// CHECK1-NEXT:    [[TMP19:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias [[META47]]
+// CHECK1-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias [[META47]]
+// CHECK1-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias [[META47]]
 // CHECK1-NEXT:    call void [[TMP20]](ptr [[TMP21]], ptr [[DOTLASTPRIV_PTR_ADDR_I]]) #[[ATTR2]]
 // CHECK1-NEXT:    [[TMP22:%.*]] = load ptr, ptr [[TMP19]], align 8
-// CHECK1-NEXT:    [[TMP23:%.*]] = load ptr, ptr [[DOTLASTPRIV_PTR_ADDR_I]], align 8, !noalias !47
+// CHECK1-NEXT:    [[TMP23:%.*]] = load ptr, ptr [[DOTLASTPRIV_PTR_ADDR_I]], align 8, !noalias [[META47]]
 // CHECK1-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP19]], i32 0, i32 1
 // CHECK1-NEXT:    [[TMP25:%.*]] = load ptr, ptr [[TMP24]], align 8
 // CHECK1-NEXT:    [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4
-// CHECK1-NEXT:    store i32 [[TMP26]], ptr [[DOTCAPTURE_EXPR__I]], align 4, !noalias !47
+// CHECK1-NEXT:    store i32 [[TMP26]], ptr [[DOTCAPTURE_EXPR__I]], align 4, !noalias [[META47]]
 // CHECK1-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP19]], i32 0, i32 1
 // CHECK1-NEXT:    [[TMP28:%.*]] = load ptr, ptr [[TMP27]], align 8
 // CHECK1-NEXT:    [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4
-// CHECK1-NEXT:    store i32 [[TMP29]], ptr [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !47
+// CHECK1-NEXT:    store i32 [[TMP29]], ptr [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias [[META47]]
 // CHECK1-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP19]], i32 0, i32 2
 // CHECK1-NEXT:    [[TMP31:%.*]] = load ptr, ptr [[TMP30]], align 8
 // CHECK1-NEXT:    [[TMP32:%.*]] = load ptr, ptr [[TMP31]], align 8
@@ -546,63 +546,63 @@ struct S {
 // CHECK1-NEXT:    [[ARRAYIDX5_I:%.*]] = getelementptr inbounds i8, ptr [[TMP36]], i64 [[IDXPROM4_I]]
 // CHECK1-NEXT:    [[TMP40:%.*]] = load i8, ptr [[ARRAYIDX5_I]], align 1
 // CHECK1-NEXT:    [[CONV_I:%.*]] = sext i8 [[TMP40]] to i32
-// CHECK1-NEXT:    store i32 [[CONV_I]], ptr [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !47
-// CHECK1-NEXT:    [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__I]], align 4, !noalias !47
+// CHECK1-NEXT:    store i32 [[CONV_I]], ptr [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias [[META47]]
+// CHECK1-NEXT:    [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__I]], align 4, !noalias [[META47]]
 // CHECK1-NEXT:    [[CONV7_I:%.*]] = sext i32 [[TMP41]] to i64
-// CHECK1-NEXT:    [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !47
-// CHECK1-NEXT:    [[TMP43:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !47
+// CHECK1-NEXT:    [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias [[META47]]
+// CHECK1-NEXT:    [[TMP43:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias [[META47]]
 // CHECK1-NEXT:    [[SUB8_I:%.*]] = sub i32 [[TMP42]], [[TMP43]]
 // CHECK1-NEXT:    [[SUB9_I:%.*]] = sub i32 [[SUB8_I]], 1
 // CHECK1-NEXT:    [[CONV11_I:%.*]] = zext i32 [[SUB8_I]] to i64
 // CHECK1-NEXT:    [[MUL_I:%.*]] = mul nsw i64 [[CONV7_I]], [[CONV11_I]]
 // CHECK1-NEXT:    [[SUB12_I:%.*]] = sub nsw i64 [[MUL_I]], 1
-// CHECK1-NEXT:    store i64 [[SUB12_I]], ptr [[DOTCAPTURE_EXPR_6_I]], align 8, !noalias !47
-// CHECK1-NEXT:    store i32 0, ptr [[I_I]], align 4, !noalias !47
-// CHECK1-NEXT:    [[TMP44:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !47
-// CHECK1-NEXT:    store i32 [[TMP44]], ptr [[J_I]], align 4, !noalias !47
-// CHECK1-NEXT:    [[TMP45:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__I]], align 4, !noalias !47
+// CHECK1-NEXT:    store i64 [[SUB12_I]], ptr [[DOTCAPTURE_EXPR_6_I]], align 8, !noalias [[META47]]
+// CHECK1-NEXT:    store i32 0, ptr [[I_I]], align 4, !noalias [[META47]]
+// CHECK1-NEXT:    [[TMP44:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias [[META47]]
+// CHECK1-NEXT:    store i32 [[TMP44]], ptr [[J_I]], align 4, !noalias [[META47]]
+// CHECK1-NEXT:    [[TMP45:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__I]], align 4, !noalias [[META47]]
 // CHECK1-NEXT:    [[CMP_I:%.*]] = icmp slt i32 0, [[TMP45]]
 // CHECK1-NEXT:    br i1 [[CMP_I]], label [[LAND_LHS_TRUE_I:%.*]], label [[TASKLOOP_IF_END_I:%.*]]
 // CHECK1:       land.lhs.true.i:
-// CHECK1-NEXT:    [[TMP46:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !47
-// CHECK1-NEXT:    [[TMP47:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !47
+// CHECK1-NEXT:    [[TMP46:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias [[META47]]
+// CHECK1-NEXT:    [[TMP47:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias [[META47]]
 // CHECK1-NEXT:    [[CMP13_I:%.*]] = icmp slt i32 [[TMP46]], [[TMP47]]
 // CHECK1-NEXT:    br i1 [[CMP13_I]], label [[TASKLOOP_IF_THEN_I:%.*]], label [[TASKLOOP_IF_END_I]]
 // CHECK1:       taskloop.if.then.i:
-// CHECK1-NEXT:    [[TMP48:%.*]] = load i64, ptr [[DOTLB__ADDR_I]], align 8, !noalias !47
-// CHECK1-NEXT:    store i64 [[TMP48]], ptr [[DOTOMP_IV_I]], align 8, !noalias !47
+// CHECK1-NEXT:    [[TMP48:%.*]] = load i64, ptr [[DOTLB__ADDR_I]], align 8, !noalias [[META47]]
+// CHECK1-NEXT:    store i64 [[TMP48]], ptr [[DOTOMP_IV_I]], align 8, !noalias [[META47]]
 // CHECK1-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP19]], i32 0, i32 1
 // CHECK1-NEXT:    [[TMP50:%.*]] = load ptr, ptr [[TMP49]], align 8
 // CHECK1-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP19]], i32 0, i32 2
 // CHECK1-NEXT:    [[TMP52:%.*]] = load ptr, ptr [[TMP51]], align 8
 // CHECK1-NEXT:    br label [[OMP_INNER_FOR_COND_I:%.*]]
 // CHECK1:       omp.inner.for.cond.i:
-// CHECK1-NEXT:    [[TMP53:%.*]] = load i64, ptr [[DOTOMP_IV_I]], align 8, !noalias !47, !llvm.access.group [[ACC_GRP48:![0-9]+]]
-// CHECK1-NEXT:    [[TMP54:%.*]] = load i64, ptr [[DOTUB__ADDR_I]], align 8, !noalias !47, !llvm.access.group [[ACC_GRP48]]
+// CHECK1-NEXT:    [[TMP53:%.*]] = load i64, ptr [[DOTOMP_IV_I]], align 8, !noalias [[META47]], !llvm.access.group [[ACC_GRP48:![0-9]+]]
+// CHECK1-NEXT:    [[TMP54:%.*]] = load i64, ptr [[DOTUB__ADDR_I]], align 8, !noalias [[META47]], !llvm.access.group [[ACC_GRP48]]
 // CHECK1-NEXT:    [[CMP16_I:%.*]] = icmp ule i64 [[TMP53]], [[TMP54]]
 // CHECK1-NEXT:    br i1 [[CMP16_I]], label [[OMP_INNER_FOR_BODY_I:%.*]], label [[OMP_INNER_FOR_END_I:%.*]]
 // CHECK1:       omp.inner.for.body.i:
-// CHECK1-NEXT:    [[TMP55:%.*]] = load i64, ptr [[DOTOMP_IV_I]], align 8, !noalias !47, !llvm.access.group [[ACC_GRP48]]
-// CHECK1-NEXT:    [[TMP56:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !47, !llvm.access.group [[ACC_GRP48]]
-// CHECK1-NEXT:    [[TMP57:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !47, !llvm.access.group [[ACC_GRP48]]
+// CHECK1-NEXT:    [[TMP55:%.*]] = load i64, ptr [[DOTOMP_IV_I]], align 8, !noalias [[META47]], !llvm.access.group [[ACC_GRP48]]
+// CHECK1-NEXT:    [[TMP56:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias [[META47]], !llvm.access.group [[ACC_GRP48]]
+// CHECK1-NEXT:    [[TMP57:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias [[META47]], !llvm.access.group [[ACC_GRP48]]
 // CHECK1-NEXT:    [[SUB17_I:%.*]] = sub i32 [[TMP56]], [[TMP57]]
 // CHECK1-NEXT:    [[SUB18_I:%.*]] = sub i32 [[SUB17_I]], 1
 // CHECK1-NEXT:    [[CONV22_I:%.*]] = zext i32 [[SUB17_I]] to i64
 // CHECK1-NEXT:    [[DIV23_I:%.*]] = sdiv i64 [[TMP55]], [[CONV22_I]]
 // CHECK1-NEXT:    [[CONV26_I:%.*]] = trunc i64 [[DIV23_I]] to i32
-// CHECK1-NEXT:    store i32 [[CONV26_I]], ptr [[I14_I]], align 4, !noalias !47, !llvm.access.group [[ACC_GRP48]]
-// CHECK1-NEXT:    [[TMP58:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !47, !llvm.access.group [[ACC_GRP48]]
+// CHECK1-NEXT:    store i32 [[CONV26_I]], ptr [[I14_I]], align 4, !noalias [[META47]], !llvm.access.group [[ACC_GRP48]]
+// CHECK1-NEXT:    [[TMP58:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias [[META47]], !llvm.access.group [[ACC_GRP48]]
 // CHECK1-NEXT:    [[CONV27_I:%.*]] = sext i32 [[TMP58]] to i64
-// CHECK1-NEXT:    [[TMP59:%.*]] = load i64, ptr [[DOTOMP_IV_I]], align 8, !noalias !47, !llvm.access.group [[ACC_GRP48]]
-// CHECK1-NEXT:    [[TMP60:%.*]] = load i64, ptr [[DOTOMP_IV_I]], align 8, !noalias !47, !llvm.access.group [[ACC_GRP48]]
-// CHECK1-NEXT:    [[TMP61:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !47, !llvm.access.group [[ACC_GRP48]]
-// CHECK1-NEXT:    [[TMP62:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !47, !llvm.access.group [[ACC_GRP48]]
+// CHECK1-NEXT:    [[TMP59:%.*]] = load i64, ptr [[DOTOMP_IV_I]], align 8, !noalias [[META47]], !llvm.access.group [[ACC_GRP48]]
+// CHECK1-NEXT:    [[TMP60:%.*]] = load i64, ptr [[DOTOMP_IV_I]], align 8, !noalias [[META47]], !llvm.access.group [[ACC_GRP48]]
+// CHECK1-NEXT:    [[TMP61:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias [[META47]], !llvm.access.group [[ACC_GRP48]]
+// CHECK1-NEXT:    [[TMP62:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias [[META47]], !llvm.access.group [[ACC_GRP48]]
 // CHECK1-NEXT:    [[SUB28_I:%.*]] = sub i32 [[TMP61]], [[TMP62]]
 // CHECK1-NEXT:    [[SUB29_I:%.*]] = sub i32 [[SUB28_I]], 1
 // CHECK1-NEXT:    [[CONV33_I:%.*]] = zext i32 [[SUB28_I]] to i64
 // CHECK1-NEXT:    [[DIV34_I:%.*]] = sdiv i64 [[TMP60]], [[CONV33_I]]
-// CHECK1-NEXT:    [[TMP63:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !47, !llvm.access.group [[ACC_GRP48]]
-// CHECK1-NEXT:    [[TMP64:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !47, !llvm.access.group [[ACC_GRP48]]
+// CHECK1-NEXT:    [[TMP63:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias [[META47]], !llvm.access.group [[ACC_GRP48]]
+// CHECK1-NEXT:    [[TMP64:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias [[META47]], !llvm.access.group [[ACC_GRP48]]
 // CHECK1-NEXT:    [[SUB35_I:%.*]] = sub i32 [[TMP63]], [[TMP64]]
 // CHECK1-NEXT:    [[SUB36_I:%.*]] = sub i32 [[SUB35_I]], 1
 // CHECK1-NEXT:    [[CONV40_I:%.*]] = zext i32 [[SUB35_I]] to i64
@@ -610,15 +610,15 @@ struct S {
 // CHECK1-NEXT:    [[SUB42_I:%.*]] = sub nsw i64 [[TMP59]], [[MUL41_I]]
 // CHECK1-NEXT:    [[ADD44_I:%.*]] = add nsw i64 [[CONV27_I]], [[SUB42_I]]
 // CHECK1-NEXT:    [[CONV45_I:%.*]] = trunc i64 [[ADD44_I]] to i32
-// CHECK1-NEXT:    store i32 [[CONV45_I]], ptr [[J15_I]], align 4, !noalias !47, !llvm.access.group [[ACC_GRP48]]
-// CHECK1-NEXT:    [[TMP65:%.*]] = load i64, ptr [[DOTOMP_IV_I]], align 8, !noalias !47, !llvm.access.group [[ACC_GRP48]]
+// CHECK1-NEXT:    store i32 [[CONV45_I]], ptr [[J15_I]], align 4, !noalias [[META47]], !llvm.access.group [[ACC_GRP48]]
+// CHECK1-NEXT:    [[TMP65:%.*]] = load i64, ptr [[DOTOMP_IV_I]], align 8, !noalias [[META47]], !llvm.access.group [[ACC_GRP48]]
 // CHECK1-NEXT:    [[ADD46_I:%.*]] = add nsw i64 [[TMP65]], 1
-// CHECK1-NEXT:    store i64 [[ADD46_I]], ptr [[DOTOMP_IV_I]], align 8, !noalias !47, !llvm.access.group [[ACC_GRP48]]
+// CHECK1-NEXT:    store i64 [[ADD46_I]], ptr [[DOTOMP_IV_I]], align 8, !noalias [[META47]], !llvm.access.group [[ACC_GRP48]]
 // CHECK1-NEXT:    br label [[OMP_INNER_FOR_COND_I]], !llvm.loop [[LOOP49:![0-9]+]]
 // CHECK1:       omp.inner.for.end.i:
 // CHECK1-NEXT:    br label [[TASKLOOP_IF_END_I]]
 // CHECK1:       taskloop.if.end.i:
-// CHECK1-NEXT:    [[TMP66:%.*]] = load i32, ptr [[DOTLITER__ADDR_I]], align 4, !noalias !47
+// CHECK1-NEXT:    [[TMP66:%.*]] = load i32, ptr [[DOTLITER__ADDR_I]], align 4, !noalias [[META47]]
 // CHECK1-NEXT:    [[TMP67:%.*]] = icmp ne i32 [[TMP66]], 0
 // CHECK1-NEXT:    br i1 [[TMP67]], label [[DOTOMP_LASTPRIVATE_THEN_I:%.*]], label [[DOTOMP_OUTLINED__5_EXIT:%.*]]
 // CHECK1:       .omp.lastprivate.then.i:
@@ -677,12 +677,12 @@ struct S {
 // CHECK1-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
 // CHECK1-NEXT:    [[TMP0:%.*]] = load i32, ptr [[C_ADDR]], align 4
 // CHECK1-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0
-// CHECK1-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
-// CHECK1-NEXT:    store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1
+// CHECK1-NEXT:    [[STOREDV:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK1-NEXT:    store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR_]], align 1
 // CHECK1-NEXT:    [[TMP1:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1
-// CHECK1-NEXT:    [[TOBOOL2:%.*]] = trunc i8 [[TMP1]] to i1
-// CHECK1-NEXT:    [[FROMBOOL3:%.*]] = zext i1 [[TOBOOL2]] to i8
-// CHECK1-NEXT:    store i8 [[FROMBOOL3]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
+// CHECK1-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP1]] to i1
+// CHECK1-NEXT:    [[STOREDV2:%.*]] = zext i1 [[LOADEDV]] to i8
+// CHECK1-NEXT:    store i8 [[STOREDV2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
 // CHECK1-NEXT:    [[TMP2:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8
 // CHECK1-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 3, ptr @_ZN1SC2Ei.omp_outlined, ptr [[THIS1]], ptr [[C_ADDR]], i64 [[TMP2]])
 // CHECK1-NEXT:    ret void
@@ -720,7 +720,7 @@ struct S {
 // CHECK1-NEXT:    store ptr [[TMP1]], ptr [[TMP7]], align 8
 // CHECK1-NEXT:    call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP3]])
 // CHECK1-NEXT:    [[TMP8:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
-// CHECK1-NEXT:    [[TOBOOL:%.*]] = trunc i8 [[TMP8]] to i1
+// CHECK1-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP8]] to i1
 // CHECK1-NEXT:    store ptr [[TMP]], ptr [[_TMP1]], align 8
 // CHECK1-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP1]], align 4
 // CHECK1-NEXT:    store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_2]], align 4
@@ -729,7 +729,7 @@ struct S {
 // CHECK1-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK1-NEXT:    [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK1-NEXT:    store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_3]], align 4
-// CHECK1-NEXT:    [[TMP11:%.*]] = select i1 [[TOBOOL]], i32 2, i32 0
+// CHECK1-NEXT:    [[TMP11:%.*]] = select i1 [[LOADEDV]], i32 2, i32 0
 // CHECK1-NEXT:    [[TMP12:%.*]] = or i32 [[TMP11]], 1
 // CHECK1-NEXT:    [[TMP13:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP3]], i32 [[TMP12]], i64 80, i64 16, ptr @.omp_task_entry..8)
 // CHECK1-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_5:%.*]], ptr [[TMP13]], i32 0, i32 0
@@ -803,54 +803,54 @@ struct S {
 // CHECK1-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META56:![0-9]+]])
 // CHECK1-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META58:![0-9]+]])
 // CHECK1-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META60:![0-9]+]])
-// CHECK1-NEXT:    store i32 [[TMP2]], ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !62
-// CHECK1-NEXT:    store ptr [[TMP5]], ptr [[DOTPART_ID__ADDR_I]], align 8, !noalias !62
-// CHECK1-NEXT:    store ptr null, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !62
-// CHECK1-NEXT:    store ptr null, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !62
-// CHECK1-NEXT:    store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 8, !noalias !62
-// CHECK1-NEXT:    store i64 [[TMP9]], ptr [[DOTLB__ADDR_I]], align 8, !noalias !62
-// CHECK1-NEXT:    store i64 [[TMP11]], ptr [[DOTUB__ADDR_I]], align 8, !noalias !62
-// CHECK1-NEXT:    store i64 [[TMP13]], ptr [[DOTST__ADDR_I]], align 8, !noalias !62
-// CHECK1-NEXT:    store i32 [[TMP15]], ptr [[DOTLITER__ADDR_I]], align 4, !noalias !62
-// CHECK1-NEXT:    store ptr [[TMP17]], ptr [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !62
-// CHECK1-NEXT:    store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !62
-// CHECK1-NEXT:    [[TMP18:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !62
+// CHECK1-NEXT:    store i32 [[TMP2]], ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias [[META62:![0-9]+]]
+// CHECK1-NEXT:    store ptr [[TMP5]], ptr [[DOTPART_ID__ADDR_I]], align 8, !noalias [[META62]]
+// CHECK1-NEXT:    store ptr null, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias [[META62]]
+// CHECK1-NEXT:    store ptr null, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias [[META62]]
+// CHECK1-NEXT:    store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 8, !noalias [[META62]]
+// CHECK1-NEXT:    store i64 [[TMP9]], ptr [[DOTLB__ADDR_I]], align 8, !noalias [[META62]]
+// CHECK1-NEXT:    store i64 [[TMP11]], ptr [[DOTUB__ADDR_I]], align 8, !noalias [[META62]]
+// CHECK1-NEXT:    store i64 [[TMP13]], ptr [[DOTST__ADDR_I]], align 8, !noalias [[META62]]
+// CHECK1-NEXT:    store i32 [[TMP15]], ptr [[DOTLITER__ADDR_I]], align 4, !noalias [[META62]]
+// CHECK1-NEXT:    store ptr [[TMP17]], ptr [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias [[META62]]
+// CHECK1-NEXT:    store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, !noalias [[META62]]
+// CHECK1-NEXT:    [[TMP18:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias [[META62]]
 // CHECK1-NEXT:    [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8
-// CHECK1-NEXT:    store ptr [[TMP_I]], ptr [[TMP1_I]], align 8, !noalias !62
+// CHECK1-NEXT:    store ptr [[TMP_I]], ptr [[TMP1_I]], align 8, !noalias [[META62]]
 // CHECK1-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP18]], i32 0, i32 1
 // CHECK1-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[TMP20]], align 8
 // CHECK1-NEXT:    [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
-// CHECK1-NEXT:    store i32 [[TMP22]], ptr [[DOTCAPTURE_EXPR__I]], align 4, !noalias !62
-// CHECK1-NEXT:    [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__I]], align 4, !noalias !62
+// CHECK1-NEXT:    store i32 [[TMP22]], ptr [[DOTCAPTURE_EXPR__I]], align 4, !noalias [[META62]]
+// CHECK1-NEXT:    [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__I]], align 4, !noalias [[META62]]
 // CHECK1-NEXT:    [[SUB3_I:%.*]] = sub nsw i32 [[TMP23]], 1
-// CHECK1-NEXT:    store i32 [[SUB3_I]], ptr [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !62
-// CHECK1-NEXT:    store ptr [[A_I]], ptr [[TMP4_I]], align 8, !noalias !62
-// CHECK1-NEXT:    [[TMP24:%.*]] = load ptr, ptr [[TMP4_I]], align 8, !noalias !62
+// CHECK1-NEXT:    store i32 [[SUB3_I]], ptr [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias [[META62]]
+// CHECK1-NEXT:    store ptr [[A_I]], ptr [[TMP4_I]], align 8, !noalias [[META62]]
+// CHECK1-NEXT:    [[TMP24:%.*]] = load ptr, ptr [[TMP4_I]], align 8, !noalias [[META62]]
 // CHECK1-NEXT:    store i32 0, ptr [[TMP24]], align 4
-// CHECK1-NEXT:    [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__I]], align 4, !noalias !62
+// CHECK1-NEXT:    [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__I]], align 4, !noalias [[META62]]
 // CHECK1-NEXT:    [[CMP_I:%.*]] = icmp slt i32 0, [[TMP25]]
 // CHECK1-NEXT:    br i1 [[CMP_I]], label [[TASKLOOP_IF_THEN_I:%.*]], label [[DOTOMP_OUTLINED__7_EXIT:%.*]]
 // CHECK1:       taskloop.if.then.i:
-// CHECK1-NEXT:    store ptr [[A5_I]], ptr [[TMP6_I]], align 8, !noalias !62
-// CHECK1-NEXT:    [[TMP26:%.*]] = load i64, ptr [[DOTLB__ADDR_I]], align 8, !noalias !62
+// CHECK1-NEXT:    store ptr [[A5_I]], ptr [[TMP6_I]], align 8, !noalias [[META62]]
+// CHECK1-NEXT:    [[TMP26:%.*]] = load i64, ptr [[DOTLB__ADDR_I]], align 8, !noalias [[META62]]
 // CHECK1-NEXT:    [[CONV_I:%.*]] = trunc i64 [[TMP26]] to i32
-// CHECK1-NEXT:    store i32 [[CONV_I]], ptr [[DOTOMP_IV_I]], align 4, !noalias !62
+// CHECK1-NEXT:    store i32 [[CONV_I]], ptr [[DOTOMP_IV_I]], align 4, !noalias [[META62]]
 // CHECK1-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP18]], i32 0, i32 1
 // CHECK1-NEXT:    [[TMP28:%.*]] = load ptr, ptr [[TMP27]], align 8
 // CHECK1-NEXT:    br label [[OMP_INNER_FOR_COND_I:%.*]]
 // CHECK1:       omp.inner.for.cond.i:
-// CHECK1-NEXT:    [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV_I]], align 4, !noalias !62, !llvm.access.group [[ACC_GRP63:![0-9]+]]
+// CHECK1-NEXT:    [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV_I]], align 4, !noalias [[META62]], !llvm.access.group [[ACC_GRP63:![0-9]+]]
 // CHECK1-NEXT:    [[CONV7_I:%.*]] = sext i32 [[TMP29]] to i64
-// CHECK1-NEXT:    [[TMP30:%.*]] = load i64, ptr [[DOTUB__ADDR_I]], align 8, !noalias !62, !llvm.access.group [[ACC_GRP63]]
+// CHECK1-NEXT:    [[TMP30:%.*]] = load i64, ptr [[DOTUB__ADDR_I]], align 8, !noalias [[META62]], !llvm.access.group [[ACC_GRP63]]
 // CHECK1-NEXT:    [[CMP8_I:%.*]] = icmp ule i64 [[CONV7_I]], [[TMP30]]
 // CHECK1-NEXT:    br i1 [[CMP8_I]], label [[OMP_INNER_FOR_BODY_I:%.*]], label [[OMP_INNER_FOR_END_I:%.*]]
 // CHECK1:       omp.inner.for.body.i:
-// CHECK1-NEXT:    [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV_I]], align 4, !noalias !62, !llvm.access.group [[ACC_GRP63]]
-// CHECK1-NEXT:    [[TMP32:%.*]] = load ptr, ptr [[TMP6_I]], align 8, !noalias !62, !llvm.access.group [[ACC_GRP63]]
+// CHECK1-NEXT:    [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV_I]], align 4, !noalias [[META62]], !llvm.access.group [[ACC_GRP63]]
+// CHECK1-NEXT:    [[TMP32:%.*]] = load ptr, ptr [[TMP6_I]], align 8, !noalias [[META62]], !llvm.access.group [[ACC_GRP63]]
 // CHECK1-NEXT:    store i32 [[TMP31]], ptr [[TMP32]], align 4, !llvm.access.group [[ACC_GRP63]]
-// CHECK1-NEXT:    [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV_I]], align 4, !noalias !62, !llvm.access.group [[ACC_GRP63]]
+// CHECK1-NEXT:    [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV_I]], align 4, !noalias [[META62]], !llvm.access.group [[ACC_GRP63]]
 // CHECK1-NEXT:    [[ADD9_I:%.*]] = add nsw i32 [[TMP33]], 1
-// CHECK1-NEXT:    store i32 [[ADD9_I]], ptr [[DOTOMP_IV_I]], align 4, !noalias !62, !llvm.access.group [[ACC_GRP63]]
+// CHECK1-NEXT:    store i32 [[ADD9_I]], ptr [[DOTOMP_IV_I]], align 4, !noalias [[META62]], !llvm.access.group [[ACC_GRP63]]
 // CHECK1-NEXT:    br label [[OMP_INNER_FOR_COND_I]], !llvm.loop [[LOOP64:![0-9]+]]
 // CHECK1:       omp.inner.for.end.i:
 // CHECK1-NEXT:    br label [[DOTOMP_OUTLINED__7_EXIT]]
@@ -878,8 +878,8 @@ struct S {
 // CHECK2-NEXT:    [[I:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTCAPTURE_EXPR_3:%.*]] = alloca i8, align 1
 // CHECK2-NEXT:    [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4
-// CHECK2-NEXT:    [[DOTCAPTURE_EXPR__CASTED6:%.*]] = alloca i64, align 8
-// CHECK2-NEXT:    [[DOTCAPTURE_EXPR__CASTED8:%.*]] = alloca i64, align 8
+// CHECK2-NEXT:    [[DOTCAPTURE_EXPR__CASTED5:%.*]] = alloca i64, align 8
+// CHECK2-NEXT:    [[DOTCAPTURE_EXPR__CASTED7:%.*]] = alloca i64, align 8
 // CHECK2-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
@@ -900,21 +900,21 @@ struct S {
 // CHECK2-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @main.omp_outlined.1, i64 [[TMP6]])
 // CHECK2-NEXT:    [[TMP7:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
 // CHECK2-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0
-// CHECK2-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
-// CHECK2-NEXT:    store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_3]], align 1
+// CHECK2-NEXT:    [[STOREDV:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK2-NEXT:    store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR_3]], align 1
 // CHECK2-NEXT:    [[TMP8:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
 // CHECK2-NEXT:    store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_4]], align 4
 // CHECK2-NEXT:    [[TMP9:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_3]], align 1
-// CHECK2-NEXT:    [[TOBOOL5:%.*]] = trunc i8 [[TMP9]] to i1
-// CHECK2-NEXT:    [[FROMBOOL7:%.*]] = zext i1 [[TOBOOL5]] to i8
-// CHECK2-NEXT:    store i8 [[FROMBOOL7]], ptr [[DOTCAPTURE_EXPR__CASTED6]], align 1
-// CHECK2-NEXT:    [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED6]], align 8
+// CHECK2-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP9]] to i1
+// CHECK2-NEXT:    [[STOREDV6:%.*]] = zext i1 [[LOADEDV]] to i8
+// CHECK2-NEXT:    store i8 [[STOREDV6]], ptr [[DOTCAPTURE_EXPR__CASTED5]], align 1
+// CHECK2-NEXT:    [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED5]], align 8
 // CHECK2-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
-// CHECK2-NEXT:    store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR__CASTED8]], align 4
-// CHECK2-NEXT:    [[TMP12:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED8]], align 8
+// CHECK2-NEXT:    store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR__CASTED7]], align 4
+// CHECK2-NEXT:    [[TMP12:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED7]], align 8
 // CHECK2-NEXT:    [[TMP13:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_3]], align 1
-// CHECK2-NEXT:    [[TOBOOL9:%.*]] = trunc i8 [[TMP13]] to i1
-// CHECK2-NEXT:    br i1 [[TOBOOL9]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
+// CHECK2-NEXT:    [[LOADEDV8:%.*]] = trunc i8 [[TMP13]] to i1
+// CHECK2-NEXT:    br i1 [[LOADEDV8]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
 // CHECK2:       omp_if.then:
 // CHECK2-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 5, ptr @main.omp_outlined.4, ptr [[I]], ptr [[ARGC_ADDR]], ptr [[ARGV_ADDR]], i64 [[TMP10]], i64 [[TMP12]])
 // CHECK2-NEXT:    br label [[OMP_IF_END:%.*]]
@@ -1011,34 +1011,34 @@ struct S {
 // CHECK2-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]])
 // CHECK2-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]])
 // CHECK2-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]])
-// CHECK2-NEXT:    store i32 [[TMP2]], ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14
-// CHECK2-NEXT:    store ptr [[TMP5]], ptr [[DOTPART_ID__ADDR_I]], align 8, !noalias !14
-// CHECK2-NEXT:    store ptr null, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14
-// CHECK2-NEXT:    store ptr null, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14
-// CHECK2-NEXT:    store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 8, !noalias !14
-// CHECK2-NEXT:    store i64 [[TMP9]], ptr [[DOTLB__ADDR_I]], align 8, !noalias !14
-// CHECK2-NEXT:    store i64 [[TMP11]], ptr [[DOTUB__ADDR_I]], align 8, !noalias !14
-// CHECK2-NEXT:    store i64 [[TMP13]], ptr [[DOTST__ADDR_I]], align 8, !noalias !14
-// CHECK2-NEXT:    store i32 [[TMP15]], ptr [[DOTLITER__ADDR_I]], align 4, !noalias !14
-// CHECK2-NEXT:    store ptr [[TMP17]], ptr [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !14
-// CHECK2-NEXT:    store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !14
-// CHECK2-NEXT:    [[TMP18:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !14
-// CHECK2-NEXT:    [[TMP19:%.*]] = load i64, ptr [[DOTLB__ADDR_I]], align 8, !noalias !14
+// CHECK2-NEXT:    store i32 [[TMP2]], ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias [[META14:![0-9]+]]
+// CHECK2-NEXT:    store ptr [[TMP5]], ptr [[DOTPART_ID__ADDR_I]], align 8, !noalias [[META14]]
+// CHECK2-NEXT:    store ptr null, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias [[META14]]
+// CHECK2-NEXT:    store ptr null, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias [[META14]]
+// CHECK2-NEXT:    store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 8, !noalias [[META14]]
+// CHECK2-NEXT:    store i64 [[TMP9]], ptr [[DOTLB__ADDR_I]], align 8, !noalias [[META14]]
+// CHECK2-NEXT:    store i64 [[TMP11]], ptr [[DOTUB__ADDR_I]], align 8, !noalias [[META14]]
+// CHECK2-NEXT:    store i64 [[TMP13]], ptr [[DOTST__ADDR_I]], align 8, !noalias [[META14]]
+// CHECK2-NEXT:    store i32 [[TMP15]], ptr [[DOTLITER__ADDR_I]], align 4, !noalias [[META14]]
+// CHECK2-NEXT:    store ptr [[TMP17]], ptr [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias [[META14]]
+// CHECK2-NEXT:    store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, !noalias [[META14]]
+// CHECK2-NEXT:    [[TMP18:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias [[META14]]
+// CHECK2-NEXT:    [[TMP19:%.*]] = load i64, ptr [[DOTLB__ADDR_I]], align 8, !noalias [[META14]]
 // CHECK2-NEXT:    [[CONV_I:%.*]] = trunc i64 [[TMP19]] to i32
-// CHECK2-NEXT:    store i32 [[CONV_I]], ptr [[DOTOMP_IV_I]], align 4, !noalias !14
+// CHECK2-NEXT:    store i32 [[CONV_I]], ptr [[DOTOMP_IV_I]], align 4, !noalias [[META14]]
 // CHECK2-NEXT:    br label [[OMP_INNER_FOR_COND_I:%.*]]
 // CHECK2:       omp.inner.for.cond.i:
-// CHECK2-NEXT:    [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV_I]], align 4, !noalias !14
+// CHECK2-NEXT:    [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV_I]], align 4, !noalias [[META14]]
 // CHECK2-NEXT:    [[CONV1_I:%.*]] = sext i32 [[TMP20]] to i64
-// CHECK2-NEXT:    [[TMP21:%.*]] = load i64, ptr [[DOTUB__ADDR_I]], align 8, !noalias !14
+// CHECK2-NEXT:    [[TMP21:%.*]] = load i64, ptr [[DOTUB__ADDR_I]], align 8, !noalias [[META14]]
 // CHECK2-NEXT:    [[CMP_I:%.*]] = icmp ule i64 [[CONV1_I]], [[TMP21]]
 // CHECK2-NEXT:    br i1 [[CMP_I]], label [[OMP_INNER_FOR_BODY_I:%.*]], label [[DOTOMP_OUTLINED__EXIT:%.*]]
 // CHECK2:       omp.inner.for.body.i:
-// CHECK2-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV_I]], align 4, !noalias !14
-// CHECK2-NEXT:    store i32 [[TMP22]], ptr [[I_I]], align 4, !noalias !14
-// CHECK2-NEXT:    [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV_I]], align 4, !noalias !14
+// CHECK2-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV_I]], align 4, !noalias [[META14]]
+// CHECK2-NEXT:    store i32 [[TMP22]], ptr [[I_I]], align 4, !noalias [[META14]]
+// CHECK2-NEXT:    [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV_I]], align 4, !noalias [[META14]]
 // CHECK2-NEXT:    [[ADD2_I:%.*]] = add nsw i32 [[TMP23]], 1
-// CHECK2-NEXT:    store i32 [[ADD2_I]], ptr [[DOTOMP_IV_I]], align 4, !noalias !14
+// CHECK2-NEXT:    store i32 [[ADD2_I]], ptr [[DOTOMP_IV_I]], align 4, !noalias [[META14]]
 // CHECK2-NEXT:    br label [[OMP_INNER_FOR_COND_I]], !llvm.loop [[LOOP15:![0-9]+]]
 // CHECK2:       .omp_outlined..exit:
 // CHECK2-NEXT:    ret i32 0
@@ -1122,34 +1122,34 @@ struct S {
 // CHECK2-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META25:![0-9]+]])
 // CHECK2-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META27:![0-9]+]])
 // CHECK2-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META29:![0-9]+]])
-// CHECK2-NEXT:    store i32 [[TMP2]], ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !31
-// CHECK2-NEXT:    store ptr [[TMP5]], ptr [[DOTPART_ID__ADDR_I]], align 8, !noalias !31
-// CHECK2-NEXT:    store ptr null, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !31
-// CHECK2-NEXT:    store ptr null, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !31
-// CHECK2-NEXT:    store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 8, !noalias !31
-// CHECK2-NEXT:    store i64 [[TMP9]], ptr [[DOTLB__ADDR_I]], align 8, !noalias !31
-// CHECK2-NEXT:    store i64 [[TMP11]], ptr [[DOTUB__ADDR_I]], align 8, !noalias !31
-// CHECK2-NEXT:    store i64 [[TMP13]], ptr [[DOTST__ADDR_I]], align 8, !noalias !31
-// CHECK2-NEXT:    store i32 [[TMP15]], ptr [[DOTLITER__ADDR_I]], align 4, !noalias !31
-// CHECK2-NEXT:    store ptr [[TMP17]], ptr [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !31
-// CHECK2-NEXT:    store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !31
-// CHECK2-NEXT:    [[TMP18:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !31
-// CHECK2-NEXT:    [[TMP19:%.*]] = load i64, ptr [[DOTLB__ADDR_I]], align 8, !noalias !31
+// CHECK2-NEXT:    store i32 [[TMP2]], ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias [[META31:![0-9]+]]
+// CHECK2-NEXT:    store ptr [[TMP5]], ptr [[DOTPART_ID__ADDR_I]], align 8, !noalias [[META31]]
+// CHECK2-NEXT:    store ptr null, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias [[META31]]
+// CHECK2-NEXT:    store ptr null, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias [[META31]]
+// CHECK2-NEXT:    store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 8, !noalias [[META31]]
+// CHECK2-NEXT:    store i64 [[TMP9]], ptr [[DOTLB__ADDR_I]], align 8, !noalias [[META31]]
+// CHECK2-NEXT:    store i64 [[TMP11]], ptr [[DOTUB__ADDR_I]], align 8, !noalias [[META31]]
+// CHECK2-NEXT:    store i64 [[TMP13]], ptr [[DOTST__ADDR_I]], align 8, !noalias [[META31]]
+// CHECK2-NEXT:    store i32 [[TMP15]], ptr [[DOTLITER__ADDR_I]], align 4, !noalias [[META31]]
+// CHECK2-NEXT:    store ptr [[TMP17]], ptr [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias [[META31]]
+// CHECK2-NEXT:    store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, !noalias [[META31]]
+// CHECK2-NEXT:    [[TMP18:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias [[META31]]
+// CHECK2-NEXT:    [[TMP19:%.*]] = load i64, ptr [[DOTLB__ADDR_I]], align 8, !noalias [[META31]]
 // CHECK2-NEXT:    [[CONV_I:%.*]] = trunc i64 [[TMP19]] to i32
-// CHECK2-NEXT:    store i32 [[CONV_I]], ptr [[DOTOMP_IV_I]], align 4, !noalias !31
+// CHECK2-NEXT:    store i32 [[CONV_I]], ptr [[DOTOMP_IV_I]], align 4, !noalias [[META31]]
 // CHECK2-NEXT:    br label [[OMP_INNER_FOR_COND_I:%.*]]
 // CHECK2:       omp.inner.for.cond.i:
-// CHECK2-NEXT:    [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV_I]], align 4, !noalias !31, !llvm.access.group [[ACC_GRP32:![0-9]+]]
+// CHECK2-NEXT:    [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV_I]], align 4, !noalias [[META31]], !llvm.access.group [[ACC_GRP32:![0-9]+]]
 // CHECK2-NEXT:    [[CONV1_I:%.*]] = sext i32 [[TMP20]] to i64
-// CHECK2-NEXT:    [[TMP21:%.*]] = load i64, ptr [[DOTUB__ADDR_I]], align 8, !noalias !31, !llvm.access.group [[ACC_GRP32]]
+// CHECK2-NEXT:    [[TMP21:%.*]] = load i64, ptr [[DOTUB__ADDR_I]], align 8, !noalias [[META31]], !llvm.access.group [[ACC_GRP32]]
 // CHECK2-NEXT:    [[CMP_I:%.*]] = icmp ule i64 [[CONV1_I]], [[TMP21]]
 // CHECK2-NEXT:    br i1 [[CMP_I]], label [[OMP_INNER_FOR_BODY_I:%.*]], label [[DOTOMP_OUTLINED__2_EXIT:%.*]]
 // CHECK2:       omp.inner.for.body.i:
-// CHECK2-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV_I]], align 4, !noalias !31, !llvm.access.group [[ACC_GRP32]]
-// CHECK2-NEXT:    store i32 [[TMP22]], ptr [[I_I]], align 4, !noalias !31, !llvm.access.group [[ACC_GRP32]]
-// CHECK2-NEXT:    [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV_I]], align 4, !noalias !31, !llvm.access.group [[ACC_GRP32]]
+// CHECK2-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV_I]], align 4, !noalias [[META31]], !llvm.access.group [[ACC_GRP32]]
+// CHECK2-NEXT:    store i32 [[TMP22]], ptr [[I_I]], align 4, !noalias [[META31]], !llvm.access.group [[ACC_GRP32]]
+// CHECK2-NEXT:    [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV_I]], align 4, !noalias [[META31]], !llvm.access.group [[ACC_GRP32]]
 // CHECK2-NEXT:    [[ADD2_I:%.*]] = add nsw i32 [[TMP23]], 1
-// CHECK2-NEXT:    store i32 [[ADD2_I]], ptr [[DOTOMP_IV_I]], align 4, !noalias !31, !llvm.access.group [[ACC_GRP32]]
+// CHECK2-NEXT:    store i32 [[ADD2_I]], ptr [[DOTOMP_IV_I]], align 4, !noalias [[META31]], !llvm.access.group [[ACC_GRP32]]
 // CHECK2-NEXT:    br label [[OMP_INNER_FOR_COND_I]], !llvm.loop [[LOOP33:![0-9]+]]
 // CHECK2:       .omp_outlined..2.exit:
 // CHECK2-NEXT:    ret i32 0
@@ -1232,8 +1232,8 @@ struct S {
 // CHECK2-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP24]], ptr align 8 [[AGG_CAPTURED]], i64 24, i1 false)
 // CHECK2-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3]], ptr [[TMP21]], i32 0, i32 1
 // CHECK2-NEXT:    [[TMP26:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
-// CHECK2-NEXT:    [[TOBOOL:%.*]] = trunc i8 [[TMP26]] to i1
-// CHECK2-NEXT:    [[TMP27:%.*]] = sext i1 [[TOBOOL]] to i32
+// CHECK2-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP26]] to i1
+// CHECK2-NEXT:    [[TMP27:%.*]] = sext i1 [[LOADEDV]] to i32
 // CHECK2-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP22]], i32 0, i32 5
 // CHECK2-NEXT:    store i64 0, ptr [[TMP28]], align 8
 // CHECK2-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP22]], i32 0, i32 6
@@ -1317,31 +1317,31 @@ struct S {
 // CHECK2-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META41:![0-9]+]])
 // CHECK2-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META43:![0-9]+]])
 // CHECK2-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META45:![0-9]+]])
-// CHECK2-NEXT:    store i32 [[TMP2]], ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !47
-// CHECK2-NEXT:    store ptr [[TMP5]], ptr [[DOTPART_ID__ADDR_I]], align 8, !noalias !47
-// CHECK2-NEXT:    store ptr [[TMP8]], ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !47
-// CHECK2-NEXT:    store ptr @.omp_task_privates_map., ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !47
-// CHECK2-NEXT:    store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 8, !noalias !47
-// CHECK2-NEXT:    store i64 [[TMP10]], ptr [[DOTLB__ADDR_I]], align 8, !noalias !47
-// CHECK2-NEXT:    store i64 [[TMP12]], ptr [[DOTUB__ADDR_I]], align 8, !noalias !47
-// CHECK2-NEXT:    store i64 [[TMP14]], ptr [[DOTST__ADDR_I]], align 8, !noalias !47
-// CHECK2-NEXT:    store i32 [[TMP16]], ptr [[DOTLITER__ADDR_I]], align 4, !noalias !47
-// CHECK2-NEXT:    store ptr [[TMP18]], ptr [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !47
-// CHECK2-NEXT:    store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !47
-// CHECK2-NEXT:    [[TMP19:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !47
-// CHECK2-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !47
-// CHECK2-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !47
+// CHECK2-NEXT:    store i32 [[TMP2]], ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias [[META47:![0-9]+]]
+// CHECK2-NEXT:    store ptr [[TMP5]], ptr [[DOTPART_ID__ADDR_I]], align 8, !noalias [[META47]]
+// CHECK2-NEXT:    store ptr [[TMP8]], ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias [[META47]]
+// CHECK2-NEXT:    store ptr @.omp_task_privates_map., ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias [[META47]]
+// CHECK2-NEXT:    store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 8, !noalias [[META47]]
+// CHECK2-NEXT:    store i64 [[TMP10]], ptr [[DOTLB__ADDR_I]], align 8, !noalias [[META47]]
+// CHECK2-NEXT:    store i64 [[TMP12]], ptr [[DOTUB__ADDR_I]], align 8, !noalias [[META47]]
+// CHECK2-NEXT:    store i64 [[TMP14]], ptr [[DOTST__ADDR_I]], align 8, !noalias [[META47]]
+// CHECK2-NEXT:    store i32 [[TMP16]], ptr [[DOTLITER__ADDR_I]], align 4, !noalias [[META47]]
+// CHECK2-NEXT:    store ptr [[TMP18]], ptr [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias [[META47]]
+// CHECK2-NEXT:    store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, !noalias [[META47]]
+// CHECK2-NEXT:    [[TMP19:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias [[META47]]
+// CHECK2-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias [[META47]]
+// CHECK2-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias [[META47]]
 // CHECK2-NEXT:    call void [[TMP20]](ptr [[TMP21]], ptr [[DOTLASTPRIV_PTR_ADDR_I]]) #[[ATTR2]]
 // CHECK2-NEXT:    [[TMP22:%.*]] = load ptr, ptr [[TMP19]], align 8
-// CHECK2-NEXT:    [[TMP23:%.*]] = load ptr, ptr [[DOTLASTPRIV_PTR_ADDR_I]], align 8, !noalias !47
+// CHECK2-NEXT:    [[TMP23:%.*]] = load ptr, ptr [[DOTLASTPRIV_PTR_ADDR_I]], align 8, !noalias [[META47]]
 // CHECK2-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP19]], i32 0, i32 1
 // CHECK2-NEXT:    [[TMP25:%.*]] = load ptr, ptr [[TMP24]], align 8
 // CHECK2-NEXT:    [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4
-// CHECK2-NEXT:    store i32 [[TMP26]], ptr [[DOTCAPTURE_EXPR__I]], align 4, !noalias !47
+// CHECK2-NEXT:    store i32 [[TMP26]], ptr [[DOTCAPTURE_EXPR__I]], align 4, !noalias [[META47]]
 // CHECK2-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP19]], i32 0, i32 1
 // CHECK2-NEXT:    [[TMP28:%.*]] = load ptr, ptr [[TMP27]], align 8
 // CHECK2-NEXT:    [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4
-// CHECK2-NEXT:    store i32 [[TMP29]], ptr [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !47
+// CHECK2-NEXT:    store i32 [[TMP29]], ptr [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias [[META47]]
 // CHECK2-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP19]], i32 0, i32 2
 // CHECK2-NEXT:    [[TMP31:%.*]] = load ptr, ptr [[TMP30]], align 8
 // CHECK2-NEXT:    [[TMP32:%.*]] = load ptr, ptr [[TMP31]], align 8
@@ -1358,63 +1358,63 @@ struct S {
 // CHECK2-NEXT:    [[ARRAYIDX5_I:%.*]] = getelementptr inbounds i8, ptr [[TMP36]], i64 [[IDXPROM4_I]]
 // CHECK2-NEXT:    [[TMP40:%.*]] = load i8, ptr [[ARRAYIDX5_I]], align 1
 // CHECK2-NEXT:    [[CONV_I:%.*]] = sext i8 [[TMP40]] to i32
-// CHECK2-NEXT:    store i32 [[CONV_I]], ptr [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !47
-// CHECK2-NEXT:    [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__I]], align 4, !noalias !47
+// CHECK2-NEXT:    store i32 [[CONV_I]], ptr [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias [[META47]]
+// CHECK2-NEXT:    [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__I]], align 4, !noalias [[META47]]
 // CHECK2-NEXT:    [[CONV7_I:%.*]] = sext i32 [[TMP41]] to i64
-// CHECK2-NEXT:    [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !47
-// CHECK2-NEXT:    [[TMP43:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !47
+// CHECK2-NEXT:    [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias [[META47]]
+// CHECK2-NEXT:    [[TMP43:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias [[META47]]
 // CHECK2-NEXT:    [[SUB8_I:%.*]] = sub i32 [[TMP42]], [[TMP43]]
 // CHECK2-NEXT:    [[SUB9_I:%.*]] = sub i32 [[SUB8_I]], 1
 // CHECK2-NEXT:    [[CONV11_I:%.*]] = zext i32 [[SUB8_I]] to i64
 // CHECK2-NEXT:    [[MUL_I:%.*]] = mul nsw i64 [[CONV7_I]], [[CONV11_I]]
 // CHECK2-NEXT:    [[SUB12_I:%.*]] = sub nsw i64 [[MUL_I]], 1
-// CHECK2-NEXT:    store i64 [[SUB12_I]], ptr [[DOTCAPTURE_EXPR_6_I]], align 8, !noalias !47
-// CHECK2-NEXT:    store i32 0, ptr [[I_I]], align 4, !noalias !47
-// CHECK2-NEXT:    [[TMP44:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !47
-// CHECK2-NEXT:    store i32 [[TMP44]], ptr [[J_I]], align 4, !noalias !47
-// CHECK2-NEXT:    [[TMP45:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__I]], align 4, !noalias !47
+// CHECK2-NEXT:    store i64 [[SUB12_I]], ptr [[DOTCAPTURE_EXPR_6_I]], align 8, !noalias [[META47]]
+// CHECK2-NEXT:    store i32 0, ptr [[I_I]], align 4, !noalias [[META47]]
+// CHECK2-NEXT:    [[TMP44:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias [[META47]]
+// CHECK2-NEXT:    store i32 [[TMP44]], ptr [[J_I]], align 4, !noalias [[META47]]
+// CHECK2-NEXT:    [[TMP45:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__I]], align 4, !noalias [[META47]]
 // CHECK2-NEXT:    [[CMP_I:%.*]] = icmp slt i32 0, [[TMP45]]
 // CHECK2-NEXT:    br i1 [[CMP_I]], label [[LAND_LHS_TRUE_I:%.*]], label [[TASKLOOP_IF_END_I:%.*]]
 // CHECK2:       land.lhs.true.i:
-// CHECK2-NEXT:    [[TMP46:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !47
-// CHECK2-NEXT:    [[TMP47:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !47
+// CHECK2-NEXT:    [[TMP46:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias [[META47]]
+// CHECK2-NEXT:    [[TMP47:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias [[META47]]
 // CHECK2-NEXT:    [[CMP13_I:%.*]] = icmp slt i32 [[TMP46]], [[TMP47]]
 // CHECK2-NEXT:    br i1 [[CMP13_I]], label [[TASKLOOP_IF_THEN_I:%.*]], label [[TASKLOOP_IF_END_I]]
 // CHECK2:       taskloop.if.then.i:
-// CHECK2-NEXT:    [[TMP48:%.*]] = load i64, ptr [[DOTLB__ADDR_I]], align 8, !noalias !47
-// CHECK2-NEXT:    store i64 [[TMP48]], ptr [[DOTOMP_IV_I]], align 8, !noalias !47
+// CHECK2-NEXT:    [[TMP48:%.*]] = load i64, ptr [[DOTLB__ADDR_I]], align 8, !noalias [[META47]]
+// CHECK2-NEXT:    store i64 [[TMP48]], ptr [[DOTOMP_IV_I]], align 8, !noalias [[META47]]
 // CHECK2-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP19]], i32 0, i32 1
 // CHECK2-NEXT:    [[TMP50:%.*]] = load ptr, ptr [[TMP49]], align 8
 // CHECK2-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP19]], i32 0, i32 2
 // CHECK2-NEXT:    [[TMP52:%.*]] = load ptr, ptr [[TMP51]], align 8
 // CHECK2-NEXT:    br label [[OMP_INNER_FOR_COND_I:%.*]]
 // CHECK2:       omp.inner.for.cond.i:
-// CHECK2-NEXT:    [[TMP53:%.*]] = load i64, ptr [[DOTOMP_IV_I]], align 8, !noalias !47, !llvm.access.group [[ACC_GRP48:![0-9]+]]
-// CHECK2-NEXT:    [[TMP54:%.*]] = load i64, ptr [[DOTUB__ADDR_I]], align 8, !noalias !47, !llvm.access.group [[ACC_GRP48]]
+// CHECK2-NEXT:    [[TMP53:%.*]] = load i64, ptr [[DOTOMP_IV_I]], align 8, !noalias [[META47]], !llvm.access.group [[ACC_GRP48:![0-9]+]]
+// CHECK2-NEXT:    [[TMP54:%.*]] = load i64, ptr [[DOTUB__ADDR_I]], align 8, !noalias [[META47]], !llvm.access.group [[ACC_GRP48]]
 // CHECK2-NEXT:    [[CMP16_I:%.*]] = icmp ule i64 [[TMP53]], [[TMP54]]
 // CHECK2-NEXT:    br i1 [[CMP16_I]], label [[OMP_INNER_FOR_BODY_I:%.*]], label [[OMP_INNER_FOR_END_I:%.*]]
 // CHECK2:       omp.inner.for.body.i:
-// CHECK2-NEXT:    [[TMP55:%.*]] = load i64, ptr [[DOTOMP_IV_I]], align 8, !noalias !47, !llvm.access.group [[ACC_GRP48]]
-// CHECK2-NEXT:    [[TMP56:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !47, !llvm.access.group [[ACC_GRP48]]
-// CHECK2-NEXT:    [[TMP57:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !47, !llvm.access.group [[ACC_GRP48]]
+// CHECK2-NEXT:    [[TMP55:%.*]] = load i64, ptr [[DOTOMP_IV_I]], align 8, !noalias [[META47]], !llvm.access.group [[ACC_GRP48]]
+// CHECK2-NEXT:    [[TMP56:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias [[META47]], !llvm.access.group [[ACC_GRP48]]
+// CHECK2-NEXT:    [[TMP57:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias [[META47]], !llvm.access.group [[ACC_GRP48]]
 // CHECK2-NEXT:    [[SUB17_I:%.*]] = sub i32 [[TMP56]], [[TMP57]]
 // CHECK2-NEXT:    [[SUB18_I:%.*]] = sub i32 [[SUB17_I]], 1
 // CHECK2-NEXT:    [[CONV22_I:%.*]] = zext i32 [[SUB17_I]] to i64
 // CHECK2-NEXT:    [[DIV23_I:%.*]] = sdiv i64 [[TMP55]], [[CONV22_I]]
 // CHECK2-NEXT:    [[CONV26_I:%.*]] = trunc i64 [[DIV23_I]] to i32
-// CHECK2-NEXT:    store i32 [[CONV26_I]], ptr [[I14_I]], align 4, !noalias !47, !llvm.access.group [[ACC_GRP48]]
-// CHECK2-NEXT:    [[TMP58:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !47, !llvm.access.group [[ACC_GRP48]]
+// CHECK2-NEXT:    store i32 [[CONV26_I]], ptr [[I14_I]], align 4, !noalias [[META47]], !llvm.access.group [[ACC_GRP48]]
+// CHECK2-NEXT:    [[TMP58:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias [[META47]], !llvm.access.group [[ACC_GRP48]]
 // CHECK2-NEXT:    [[CONV27_I:%.*]] = sext i32 [[TMP58]] to i64
-// CHECK2-NEXT:    [[TMP59:%.*]] = load i64, ptr [[DOTOMP_IV_I]], align 8, !noalias !47, !llvm.access.group [[ACC_GRP48]]
-// CHECK2-NEXT:    [[TMP60:%.*]] = load i64, ptr [[DOTOMP_IV_I]], align 8, !noalias !47, !llvm.access.group [[ACC_GRP48]]
-// CHECK2-NEXT:    [[TMP61:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !47, !llvm.access.group [[ACC_GRP48]]
-// CHECK2-NEXT:    [[TMP62:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !47, !llvm.access.group [[ACC_GRP48]]
+// CHECK2-NEXT:    [[TMP59:%.*]] = load i64, ptr [[DOTOMP_IV_I]], align 8, !noalias [[META47]], !llvm.access.group [[ACC_GRP48]]
+// CHECK2-NEXT:    [[TMP60:%.*]] = load i64, ptr [[DOTOMP_IV_I]], align 8, !noalias [[META47]], !llvm.access.group [[ACC_GRP48]]
+// CHECK2-NEXT:    [[TMP61:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias [[META47]], !llvm.access.group [[ACC_GRP48]]
+// CHECK2-NEXT:    [[TMP62:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias [[META47]], !llvm.access.group [[ACC_GRP48]]
 // CHECK2-NEXT:    [[SUB28_I:%.*]] = sub i32 [[TMP61]], [[TMP62]]
 // CHECK2-NEXT:    [[SUB29_I:%.*]] = sub i32 [[SUB28_I]], 1
 // CHECK2-NEXT:    [[CONV33_I:%.*]] = zext i32 [[SUB28_I]] to i64
 // CHECK2-NEXT:    [[DIV34_I:%.*]] = sdiv i64 [[TMP60]], [[CONV33_I]]
-// CHECK2-NEXT:    [[TMP63:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !47, !llvm.access.group [[ACC_GRP48]]
-// CHECK2-NEXT:    [[TMP64:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !47, !llvm.access.group [[ACC_GRP48]]
+// CHECK2-NEXT:    [[TMP63:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias [[META47]], !llvm.access.group [[ACC_GRP48]]
+// CHECK2-NEXT:    [[TMP64:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias [[META47]], !llvm.access.group [[ACC_GRP48]]
 // CHECK2-NEXT:    [[SUB35_I:%.*]] = sub i32 [[TMP63]], [[TMP64]]
 // CHECK2-NEXT:    [[SUB36_I:%.*]] = sub i32 [[SUB35_I]], 1
 // CHECK2-NEXT:    [[CONV40_I:%.*]] = zext i32 [[SUB35_I]] to i64
@@ -1422,15 +1422,15 @@ struct S {
 // CHECK2-NEXT:    [[SUB42_I:%.*]] = sub nsw i64 [[TMP59]], [[MUL41_I]]
 // CHECK2-NEXT:    [[ADD44_I:%.*]] = add nsw i64 [[CONV27_I]], [[SUB42_I]]
 // CHECK2-NEXT:    [[CONV45_I:%.*]] = trunc i64 [[ADD44_I]] to i32
-// CHECK2-NEXT:    store i32 [[CONV45_I]], ptr [[J15_I]], align 4, !noalias !47, !llvm.access.group [[ACC_GRP48]]
-// CHECK2-NEXT:    [[TMP65:%.*]] = load i64, ptr [[DOTOMP_IV_I]], align 8, !noalias !47, !llvm.access.group [[ACC_GRP48]]
+// CHECK2-NEXT:    store i32 [[CONV45_I]], ptr [[J15_I]], align 4, !noalias [[META47]], !llvm.access.group [[ACC_GRP48]]
+// CHECK2-NEXT:    [[TMP65:%.*]] = load i64, ptr [[DOTOMP_IV_I]], align 8, !noalias [[META47]], !llvm.access.group [[ACC_GRP48]]
 // CHECK2-NEXT:    [[ADD46_I:%.*]] = add nsw i64 [[TMP65]], 1
-// CHECK2-NEXT:    store i64 [[ADD46_I]], ptr [[DOTOMP_IV_I]], align 8, !noalias !47, !llvm.access.group [[ACC_GRP48]]
+// CHECK2-NEXT:    store i64 [[ADD46_I]], ptr [[DOTOMP_IV_I]], align 8, !noalias [[META47]], !llvm.access.group [[ACC_GRP48]]
 // CHECK2-NEXT:    br label [[OMP_INNER_FOR_COND_I]], !llvm.loop [[LOOP49:![0-9]+]]
 // CHECK2:       omp.inner.for.end.i:
 // CHECK2-NEXT:    br label [[TASKLOOP_IF_END_I]]
 // CHECK2:       taskloop.if.end.i:
-// CHECK2-NEXT:    [[TMP66:%.*]] = load i32, ptr [[DOTLITER__ADDR_I]], align 4, !noalias !47
+// CHECK2-NEXT:    [[TMP66:%.*]] = load i32, ptr [[DOTLITER__ADDR_I]], align 4, !noalias [[META47]]
 // CHECK2-NEXT:    [[TMP67:%.*]] = icmp ne i32 [[TMP66]], 0
 // CHECK2-NEXT:    br i1 [[TMP67]], label [[DOTOMP_LASTPRIVATE_THEN_I:%.*]], label [[DOTOMP_OUTLINED__5_EXIT:%.*]]
 // CHECK2:       .omp.lastprivate.then.i:
@@ -1489,12 +1489,12 @@ struct S {
 // CHECK2-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
 // CHECK2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[C_ADDR]], align 4
 // CHECK2-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0
-// CHECK2-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
-// CHECK2-NEXT:    store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1
+// CHECK2-NEXT:    [[STOREDV:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK2-NEXT:    store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR_]], align 1
 // CHECK2-NEXT:    [[TMP1:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1
-// CHECK2-NEXT:    [[TOBOOL2:%.*]] = trunc i8 [[TMP1]] to i1
-// CHECK2-NEXT:    [[FROMBOOL3:%.*]] = zext i1 [[TOBOOL2]] to i8
-// CHECK2-NEXT:    store i8 [[FROMBOOL3]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
+// CHECK2-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP1]] to i1
+// CHECK2-NEXT:    [[STOREDV2:%.*]] = zext i1 [[LOADEDV]] to i8
+// CHECK2-NEXT:    store i8 [[STOREDV2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
 // CHECK2-NEXT:    [[TMP2:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8
 // CHECK2-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 3, ptr @_ZN1SC2Ei.omp_outlined, ptr [[THIS1]], ptr [[C_ADDR]], i64 [[TMP2]])
 // CHECK2-NEXT:    ret void
@@ -1532,7 +1532,7 @@ struct S {
 // CHECK2-NEXT:    store ptr [[TMP1]], ptr [[TMP7]], align 8
 // CHECK2-NEXT:    call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP3]])
 // CHECK2-NEXT:    [[TMP8:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
-// CHECK2-NEXT:    [[TOBOOL:%.*]] = trunc i8 [[TMP8]] to i1
+// CHECK2-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP8]] to i1
 // CHECK2-NEXT:    store ptr [[TMP]], ptr [[_TMP1]], align 8
 // CHECK2-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP1]], align 4
 // CHECK2-NEXT:    store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_2]], align 4
@@ -1541,7 +1541,7 @@ struct S {
 // CHECK2-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK2-NEXT:    [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK2-NEXT:    store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_3]], align 4
-// CHECK2-NEXT:    [[TMP11:%.*]] = select i1 [[TOBOOL]], i32 2, i32 0
+// CHECK2-NEXT:    [[TMP11:%.*]] = select i1 [[LOADEDV]], i32 2, i32 0
 // CHECK2-NEXT:    [[TMP12:%.*]] = or i32 [[TMP11]], 1
 // CHECK2-NEXT:    [[TMP13:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP3]], i32 [[TMP12]], i64 80, i64 16, ptr @.omp_task_entry..8)
 // CHECK2-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_5:%.*]], ptr [[TMP13]], i32 0, i32 0
@@ -1615,54 +1615,54 @@ struct S {
 // CHECK2-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META56:![0-9]+]])
 // CHECK2-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META58:![0-9]+]])
 // CHECK2-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META60:![0-9]+]])
-// CHECK2-NEXT:    store i32 [[TMP2]], ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !62
-// CHECK2-NEXT:    store ptr [[TMP5]], ptr [[DOTPART_ID__ADDR_I]], align 8, !noalias !62
-// CHECK2-NEXT:    store ptr null, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !62
-// CHECK2-NEXT:    store ptr null, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !62
-// CHECK2-NEXT:    store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 8, !noalias !62
-// CHECK2-NEXT:    store i64 [[TMP9]], ptr [[DOTLB__ADDR_I]], align 8, !noalias !62
-// CHECK2-NEXT:    store i64 [[TMP11]], ptr [[DOTUB__ADDR_I]], align 8, !noalias !62
-// CHECK2-NEXT:    store i64 [[TMP13]], ptr [[DOTST__ADDR_I]], align 8, !noalias !62
-// CHECK2-NEXT:    store i32 [[TMP15]], ptr [[DOTLITER__ADDR_I]], align 4, !noalias !62
-// CHECK2-NEXT:    store ptr [[TMP17]], ptr [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !62
-// CHECK2-NEXT:    store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !62
-// CHECK2-NEXT:    [[TMP18:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !62
+// CHECK2-NEXT:    store i32 [[TMP2]], ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias [[META62:![0-9]+]]
+// CHECK2-NEXT:    store ptr [[TMP5]], ptr [[DOTPART_ID__ADDR_I]], align 8, !noalias [[META62]]
+// CHECK2-NEXT:    store ptr null, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias [[META62]]
+// CHECK2-NEXT:    store ptr null, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias [[META62]]
+// CHECK2-NEXT:    store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 8, !noalias [[META62]]
+// CHECK2-NEXT:    store i64 [[TMP9]], ptr [[DOTLB__ADDR_I]], align 8, !noalias [[META62]]
+// CHECK2-NEXT:    store i64 [[TMP11]], ptr [[DOTUB__ADDR_I]], align 8, !noalias [[META62]]
+// CHECK2-NEXT:    store i64 [[TMP13]], ptr [[DOTST__ADDR_I]], align 8, !noalias [[META62]]
+// CHECK2-NEXT:    store i32 [[TMP15]], ptr [[DOTLITER__ADDR_I]], align 4, !noalias [[META62]]
+// CHECK2-NEXT:    store ptr [[TMP17]], ptr [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias [[META62]]
+// CHECK2-NEXT:    store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, !noalias [[META62]]
+// CHECK2-NEXT:    [[TMP18:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias [[META62]]
 // CHECK2-NEXT:    [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8
-// CHECK2-NEXT:    store ptr [[TMP_I]], ptr [[TMP1_I]], align 8, !noalias !62
+// CHECK2-NEXT:    store ptr [[TMP_I]], ptr [[TMP1_I]], align 8, !noalias [[META62]]
 // CHECK2-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP18]], i32 0, i32 1
 // CHECK2-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[TMP20]], align 8
 // CHECK2-NEXT:    [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
-// CHECK2-NEXT:    store i32 [[TMP22]], ptr [[DOTCAPTURE_EXPR__I]], align 4, !noalias !62
-// CHECK2-NEXT:    [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__I]], align 4, !noalias !62
+// CHECK2-NEXT:    store i32 [[TMP22]], ptr [[DOTCAPTURE_EXPR__I]], align 4, !noalias [[META62]]
+// CHECK2-NEXT:    [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__I]], align 4, !noalias [[META62]]
 // CHECK2-NEXT:    [[SUB3_I:%.*]] = sub nsw i32 [[TMP23]], 1
-// CHECK2-NEXT:    store i32 [[SUB3_I]], ptr [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !62
-// CHECK2-NEXT:    store ptr [[A_I]], ptr [[TMP4_I]], align 8, !noalias !62
-// CHECK2-NEXT:    [[TMP24:%.*]] = load ptr, ptr [[TMP4_I]], align 8, !noalias !62
+// CHECK2-NEXT:    store i32 [[SUB3_I]], ptr [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias [[META62]]
+// CHECK2-NEXT:    store ptr [[A_I]], ptr [[TMP4_I]], align 8, !noalias [[META62]]
+// CHECK2-NEXT:    [[TMP24:%.*]] = load ptr, ptr [[TMP4_I]], align 8, !noalias [[META62]]
 // CHECK2-NEXT:    store i32 0, ptr [[TMP24]], align 4
-// CHECK2-NEXT:    [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__I]], align 4, !noalias !62
+// CHECK2-NEXT:    [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__I]], align 4, !noalias [[META62]]
 // CHECK2-NEXT:    [[CMP_I:%.*]] = icmp slt i32 0, [[TMP25]]
 // CHECK2-NEXT:    br i1 [[CMP_I]], label [[TASKLOOP_IF_THEN_I:%.*]], label [[DOTOMP_OUTLINED__7_EXIT:%.*]]
 // CHECK2:       taskloop.if.then.i:
-// CHECK2-NEXT:    store ptr [[A5_I]], ptr [[TMP6_I]], align 8, !noalias !62
-// CHECK2-NEXT:    [[TMP26:%.*]] = load i64, ptr [[DOTLB__ADDR_I]], align 8, !noalias !62
+// CHECK2-NEXT:    store ptr [[A5_I]], ptr [[TMP6_I]], align 8, !noalias [[META62]]
+// CHECK2-NEXT:    [[TMP26:%.*]] = load i64, ptr [[DOTLB__ADDR_I]], align 8, !noalias [[META62]]
 // CHECK2-NEXT:    [[CONV_I:%.*]] = trunc i64 [[TMP26]] to i32
-// CHECK2-NEXT:    store i32 [[CONV_I]], ptr [[DOTOMP_IV_I]], align 4, !noalias !62
+// CHECK2-NEXT:    store i32 [[CONV_I]], ptr [[DOTOMP_IV_I]], align 4, !noalias [[META62]]
 // CHECK2-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP18]], i32 0, i32 1
 // CHECK2-NEXT:    [[TMP28:%.*]] = load ptr, ptr [[TMP27]], align 8
 // CHECK2-NEXT:    br label [[OMP_INNER_FOR_COND_I:%.*]]
 // CHECK2:       omp.inner.for.cond.i:
-// CHECK2-NEXT:    [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV_I]], align 4, !noalias !62, !llvm.access.group [[ACC_GRP63:![0-9]+]]
+// CHECK2-NEXT:    [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV_I]], align 4, !noalias [[META62]], !llvm.access.group [[ACC_GRP63:![0-9]+]]
 // CHECK2-NEXT:    [[CONV7_I:%.*]] = sext i32 [[TMP29]] to i64
-// CHECK2-NEXT:    [[TMP30:%.*]] = load i64, ptr [[DOTUB__ADDR_I]], align 8, !noalias !62, !llvm.access.group [[ACC_GRP63]]
+// CHECK2-NEXT:    [[TMP30:%.*]] = load i64, ptr [[DOTUB__ADDR_I]], align 8, !noalias [[META62]], !llvm.access.group [[ACC_GRP63]]
 // CHECK2-NEXT:    [[CMP8_I:%.*]] = icmp ule i64 [[CONV7_I]], [[TMP30]]
 // CHECK2-NEXT:    br i1 [[CMP8_I]], label [[OMP_INNER_FOR_BODY_I:%.*]], label [[OMP_INNER_FOR_END_I:%.*]]
 // CHECK2:       omp.inner.for.body.i:
-// CHECK2-NEXT:    [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV_I]], align 4, !noalias !62, !llvm.access.group [[ACC_GRP63]]
-// CHECK2-NEXT:    [[TMP32:%.*]] = load ptr, ptr [[TMP6_I]], align 8, !noalias !62, !llvm.access.group [[ACC_GRP63]]
+// CHECK2-NEXT:    [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV_I]], align 4, !noalias [[META62]], !llvm.access.group [[ACC_GRP63]]
+// CHECK2-NEXT:    [[TMP32:%.*]] = load ptr, ptr [[TMP6_I]], align 8, !noalias [[META62]], !llvm.access.group [[ACC_GRP63]]
 // CHECK2-NEXT:    store i32 [[TMP31]], ptr [[TMP32]], align 4, !llvm.access.group [[ACC_GRP63]]
-// CHECK2-NEXT:    [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV_I]], align 4, !noalias !62, !llvm.access.group [[ACC_GRP63]]
+// CHECK2-NEXT:    [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV_I]], align 4, !noalias [[META62]], !llvm.access.group [[ACC_GRP63]]
 // CHECK2-NEXT:    [[ADD9_I:%.*]] = add nsw i32 [[TMP33]], 1
-// CHECK2-NEXT:    store i32 [[ADD9_I]], ptr [[DOTOMP_IV_I]], align 4, !noalias !62, !llvm.access.group [[ACC_GRP63]]
+// CHECK2-NEXT:    store i32 [[ADD9_I]], ptr [[DOTOMP_IV_I]], align 4, !noalias [[META62]], !llvm.access.group [[ACC_GRP63]]
 // CHECK2-NEXT:    br label [[OMP_INNER_FOR_COND_I]], !llvm.loop [[LOOP64:![0-9]+]]
 // CHECK2:       omp.inner.for.end.i:
 // CHECK2-NEXT:    br label [[DOTOMP_OUTLINED__7_EXIT]]
@@ -1690,8 +1690,8 @@ struct S {
 // CHECK3-NEXT:    [[I:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTCAPTURE_EXPR_3:%.*]] = alloca i8, align 1
 // CHECK3-NEXT:    [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4
-// CHECK3-NEXT:    [[DOTCAPTURE_EXPR__CASTED6:%.*]] = alloca i64, align 8
-// CHECK3-NEXT:    [[DOTCAPTURE_EXPR__CASTED8:%.*]] = alloca i64, align 8
+// CHECK3-NEXT:    [[DOTCAPTURE_EXPR__CASTED5:%.*]] = alloca i64, align 8
+// CHECK3-NEXT:    [[DOTCAPTURE_EXPR__CASTED7:%.*]] = alloca i64, align 8
 // CHECK3-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
@@ -1712,21 +1712,21 @@ struct S {
 // CHECK3-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @main.omp_outlined.1, i64 [[TMP6]])
 // CHECK3-NEXT:    [[TMP7:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
 // CHECK3-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0
-// CHECK3-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
-// CHECK3-NEXT:    store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_3]], align 1
+// CHECK3-NEXT:    [[STOREDV:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK3-NEXT:    store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR_3]], align 1
 // CHECK3-NEXT:    [[TMP8:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
 // CHECK3-NEXT:    store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_4]], align 4
 // CHECK3-NEXT:    [[TMP9:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_3]], align 1
-// CHECK3-NEXT:    [[TOBOOL5:%.*]] = trunc i8 [[TMP9]] to i1
-// CHECK3-NEXT:    [[FROMBOOL7:%.*]] = zext i1 [[TOBOOL5]] to i8
-// CHECK3-NEXT:    store i8 [[FROMBOOL7]], ptr [[DOTCAPTURE_EXPR__CASTED6]], align 1
-// CHECK3-NEXT:    [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED6]], align 8
+// CHECK3-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP9]] to i1
+// CHECK3-NEXT:    [[STOREDV6:%.*]] = zext i1 [[LOADEDV]] to i8
+// CHECK3-NEXT:    store i8 [[STOREDV6]], ptr [[DOTCAPTURE_EXPR__CASTED5]], align 1
+// CHECK3-NEXT:    [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED5]], align 8
 // CHECK3-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
-// CHECK3-NEXT:    store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR__CASTED8]], align 4
-// CHECK3-NEXT:    [[TMP12:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED8]], align 8
+// CHECK3-NEXT:    store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR__CASTED7]], align 4
+// CHECK3-NEXT:    [[TMP12:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED7]], align 8
 // CHECK3-NEXT:    [[TMP13:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_3]], align 1
-// CHECK3-NEXT:    [[TOBOOL9:%.*]] = trunc i8 [[TMP13]] to i1
-// CHECK3-NEXT:    br i1 [[TOBOOL9]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
+// CHECK3-NEXT:    [[LOADEDV8:%.*]] = trunc i8 [[TMP13]] to i1
+// CHECK3-NEXT:    br i1 [[LOADEDV8]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
 // CHECK3:       omp_if.then:
 // CHECK3-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 5, ptr @main.omp_outlined.4, ptr [[I]], ptr [[ARGC_ADDR]], ptr [[ARGV_ADDR]], i64 [[TMP10]], i64 [[TMP12]])
 // CHECK3-NEXT:    br label [[OMP_IF_END:%.*]]
@@ -1823,34 +1823,34 @@ struct S {
 // CHECK3-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]])
 // CHECK3-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]])
 // CHECK3-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]])
-// CHECK3-NEXT:    store i32 [[TMP2]], ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14
-// CHECK3-NEXT:    store ptr [[TMP5]], ptr [[DOTPART_ID__ADDR_I]], align 8, !noalias !14
-// CHECK3-NEXT:    store ptr null, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14
-// CHECK3-NEXT:    store ptr null, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14
-// CHECK3-NEXT:    store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 8, !noalias !14
-// CHECK3-NEXT:    store i64 [[TMP9]], ptr [[DOTLB__ADDR_I]], align 8, !noalias !14
-// CHECK3-NEXT:    store i64 [[TMP11]], ptr [[DOTUB__ADDR_I]], align 8, !noalias !14
-// CHECK3-NEXT:    store i64 [[TMP13]], ptr [[DOTST__ADDR_I]], align 8, !noalias !14
-// CHECK3-NEXT:    store i32 [[TMP15]], ptr [[DOTLITER__ADDR_I]], align 4, !noalias !14
-// CHECK3-NEXT:    store ptr [[TMP17]], ptr [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !14
-// CHECK3-NEXT:    store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !14
-// CHECK3-NEXT:    [[TMP18:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !14
-// CHECK3-NEXT:    [[TMP19:%.*]] = load i64, ptr [[DOTLB__ADDR_I]], align 8, !noalias !14
+// CHECK3-NEXT:    store i32 [[TMP2]], ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias [[META14:![0-9]+]]
+// CHECK3-NEXT:    store ptr [[TMP5]], ptr [[DOTPART_ID__ADDR_I]], align 8, !noalias [[META14]]
+// CHECK3-NEXT:    store ptr null, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias [[META14]]
+// CHECK3-NEXT:    store ptr null, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias [[META14]]
+// CHECK3-NEXT:    store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 8, !noalias [[META14]]
+// CHECK3-NEXT:    store i64 [[TMP9]], ptr [[DOTLB__ADDR_I]], align 8, !noalias [[META14]]
+// CHECK3-NEXT:    store i64 [[TMP11]], ptr [[DOTUB__ADDR_I]], align 8, !noalias [[META14]]
+// CHECK3-NEXT:    store i64 [[TMP13]], ptr [[DOTST__ADDR_I]], align 8, !noalias [[META14]]
+// CHECK3-NEXT:    store i32 [[TMP15]], ptr [[DOTLITER__ADDR_I]], align 4, !noalias [[META14]]
+// CHECK3-NEXT:    store ptr [[TMP17]], ptr [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias [[META14]]
+// CHECK3-NEXT:    store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, !noalias [[META14]]
+// CHECK3-NEXT:    [[TMP18:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias [[META14]]
+// CHECK3-NEXT:    [[TMP19:%.*]] = load i64, ptr [[DOTLB__ADDR_I]], align 8, !noalias [[META14]]
 // CHECK3-NEXT:    [[CONV_I:%.*]] = trunc i64 [[TMP19]] to i32
-// CHECK3-NEXT:    store i32 [[CONV_I]], ptr [[DOTOMP_IV_I]], align 4, !noalias !14
+// CHECK3-NEXT:    store i32 [[CONV_I]], ptr [[DOTOMP_IV_I]], align 4, !noalias [[META14]]
 // CHECK3-NEXT:    br label [[OMP_INNER_FOR_COND_I:%.*]]
 // CHECK3:       omp.inner.for.cond.i:
-// CHECK3-NEXT:    [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV_I]], align 4, !noalias !14
+// CHECK3-NEXT:    [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV_I]], align 4, !noalias [[META14]]
 // CHECK3-NEXT:    [[CONV1_I:%.*]] = sext i32 [[TMP20]] to i64
-// CHECK3-NEXT:    [[TMP21:%.*]] = load i64, ptr [[DOTUB__ADDR_I]], align 8, !noalias !14
+// CHECK3-NEXT:    [[TMP21:%.*]] = load i64, ptr [[DOTUB__ADDR_I]], align 8, !noalias [[META14]]
 // CHECK3-NEXT:    [[CMP_I:%.*]] = icmp ule i64 [[CONV1_I]], [[TMP21]]
 // CHECK3-NEXT:    br i1 [[CMP_I]], label [[OMP_INNER_FOR_BODY_I:%.*]], label [[DOTOMP_OUTLINED__EXIT:%.*]]
 // CHECK3:       omp.inner.for.body.i:
-// CHECK3-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV_I]], align 4, !noalias !14
-// CHECK3-NEXT:    store i32 [[TMP22]], ptr [[I_I]], align 4, !noalias !14
-// CHECK3-NEXT:    [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV_I]], align 4, !noalias !14
+// CHECK3-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV_I]], align 4, !noalias [[META14]]
+// CHECK3-NEXT:    store i32 [[TMP22]], ptr [[I_I]], align 4, !noalias [[META14]]
+// CHECK3-NEXT:    [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV_I]], align 4, !noalias [[META14]]
 // CHECK3-NEXT:    [[ADD2_I:%.*]] = add nsw i32 [[TMP23]], 1
-// CHECK3-NEXT:    store i32 [[ADD2_I]], ptr [[DOTOMP_IV_I]], align 4, !noalias !14
+// CHECK3-NEXT:    store i32 [[ADD2_I]], ptr [[DOTOMP_IV_I]], align 4, !noalias [[META14]]
 // CHECK3-NEXT:    br label [[OMP_INNER_FOR_COND_I]], !llvm.loop [[LOOP15:![0-9]+]]
 // CHECK3:       .omp_outlined..exit:
 // CHECK3-NEXT:    ret i32 0
@@ -1934,34 +1934,34 @@ struct S {
 // CHECK3-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META25:![0-9]+]])
 // CHECK3-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META27:![0-9]+]])
 // CHECK3-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META29:![0-9]+]])
-// CHECK3-NEXT:    store i32 [[TMP2]], ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !31
-// CHECK3-NEXT:    store ptr [[TMP5]], ptr [[DOTPART_ID__ADDR_I]], align 8, !noalias !31
-// CHECK3-NEXT:    store ptr null, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !31
-// CHECK3-NEXT:    store ptr null, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !31
-// CHECK3-NEXT:    store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 8, !noalias !31
-// CHECK3-NEXT:    store i64 [[TMP9]], ptr [[DOTLB__ADDR_I]], align 8, !noalias !31
-// CHECK3-NEXT:    store i64 [[TMP11]], ptr [[DOTUB__ADDR_I]], align 8, !noalias !31
-// CHECK3-NEXT:    store i64 [[TMP13]], ptr [[DOTST__ADDR_I]], align 8, !noalias !31
-// CHECK3-NEXT:    store i32 [[TMP15]], ptr [[DOTLITER__ADDR_I]], align 4, !noalias !31
-// CHECK3-NEXT:    store ptr [[TMP17]], ptr [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !31
-// CHECK3-NEXT:    store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !31
-// CHECK3-NEXT:    [[TMP18:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !31
-// CHECK3-NEXT:    [[TMP19:%.*]] = load i64, ptr [[DOTLB__ADDR_I]], align 8, !noalias !31
+// CHECK3-NEXT:    store i32 [[TMP2]], ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias [[META31:![0-9]+]]
+// CHECK3-NEXT:    store ptr [[TMP5]], ptr [[DOTPART_ID__ADDR_I]], align 8, !noalias [[META31]]
+// CHECK3-NEXT:    store ptr null, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias [[META31]]
+// CHECK3-NEXT:    store ptr null, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias [[META31]]
+// CHECK3-NEXT:    store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 8, !noalias [[META31]]
+// CHECK3-NEXT:    store i64 [[TMP9]], ptr [[DOTLB__ADDR_I]], align 8, !noalias [[META31]]
+// CHECK3-NEXT:    store i64 [[TMP11]], ptr [[DOTUB__ADDR_I]], align 8, !noalias [[META31]]
+// CHECK3-NEXT:    store i64 [[TMP13]], ptr [[DOTST__ADDR_I]], align 8, !noalias [[META31]]
+// CHECK3-NEXT:    store i32 [[TMP15]], ptr [[DOTLITER__ADDR_I]], align 4, !noalias [[META31]]
+// CHECK3-NEXT:    store ptr [[TMP17]], ptr [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias [[META31]]
+// CHECK3-NEXT:    store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, !noalias [[META31]]
+// CHECK3-NEXT:    [[TMP18:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias [[META31]]
+// CHECK3-NEXT:    [[TMP19:%.*]] = load i64, ptr [[DOTLB__ADDR_I]], align 8, !noalias [[META31]]
 // CHECK3-NEXT:    [[CONV_I:%.*]] = trunc i64 [[TMP19]] to i32
-// CHECK3-NEXT:    store i32 [[CONV_I]], ptr [[DOTOMP_IV_I]], align 4, !noalias !31
+// CHECK3-NEXT:    store i32 [[CONV_I]], ptr [[DOTOMP_IV_I]], align 4, !noalias [[META31]]
 // CHECK3-NEXT:    br label [[OMP_INNER_FOR_COND_I:%.*]]
 // CHECK3:       omp.inner.for.cond.i:
-// CHECK3-NEXT:    [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV_I]], align 4, !noalias !31, !llvm.access.group [[ACC_GRP32:![0-9]+]]
+// CHECK3-NEXT:    [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV_I]], align 4, !noalias [[META31]], !llvm.access.group [[ACC_GRP32:![0-9]+]]
 // CHECK3-NEXT:    [[CONV1_I:%.*]] = sext i32 [[TMP20]] to i64
-// CHECK3-NEXT:    [[TMP21:%.*]] = load i64, ptr [[DOTUB__ADDR_I]], align 8, !noalias !31, !llvm.access.group [[ACC_GRP32]]
+// CHECK3-NEXT:    [[TMP21:%.*]] = load i64, ptr [[DOTUB__ADDR_I]], align 8, !noalias [[META31]], !llvm.access.group [[ACC_GRP32]]
 // CHECK3-NEXT:    [[CMP_I:%.*]] = icmp ule i64 [[CONV1_I]], [[TMP21]]
 // CHECK3-NEXT:    br i1 [[CMP_I]], label [[OMP_INNER_FOR_BODY_I:%.*]], label [[DOTOMP_OUTLINED__2_EXIT:%.*]]
 // CHECK3:       omp.inner.for.body.i:
-// CHECK3-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV_I]], align 4, !noalias !31, !llvm.access.group [[ACC_GRP32]]
-// CHECK3-NEXT:    store i32 [[TMP22]], ptr [[I_I]], align 4, !noalias !31, !llvm.access.group [[ACC_GRP32]]
-// CHECK3-NEXT:    [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV_I]], align 4, !noalias !31, !llvm.access.group [[ACC_GRP32]]
+// CHECK3-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV_I]], align 4, !noalias [[META31]], !llvm.access.group [[ACC_GRP32]]
+// CHECK3-NEXT:    store i32 [[TMP22]], ptr [[I_I]], align 4, !noalias [[META31]], !llvm.access.group [[ACC_GRP32]]
+// CHECK3-NEXT:    [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV_I]], align 4, !noalias [[META31]], !llvm.access.group [[ACC_GRP32]]
 // CHECK3-NEXT:    [[ADD2_I:%.*]] = add nsw i32 [[TMP23]], 1
-// CHECK3-NEXT:    store i32 [[ADD2_I]], ptr [[DOTOMP_IV_I]], align 4, !noalias !31, !llvm.access.group [[ACC_GRP32]]
+// CHECK3-NEXT:    store i32 [[ADD2_I]], ptr [[DOTOMP_IV_I]], align 4, !noalias [[META31]], !llvm.access.group [[ACC_GRP32]]
 // CHECK3-NEXT:    br label [[OMP_INNER_FOR_COND_I]], !llvm.loop [[LOOP33:![0-9]+]]
 // CHECK3:       .omp_outlined..2.exit:
 // CHECK3-NEXT:    ret i32 0
@@ -2008,9 +2008,9 @@ struct S {
 // CHECK3-NEXT:    store ptr [[TMP2]], ptr [[TMP9]], align 8
 // CHECK3-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[AGG_CAPTURED]], i32 0, i32 3
 // CHECK3-NEXT:    [[TMP11:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
-// CHECK3-NEXT:    [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1
-// CHECK3-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
-// CHECK3-NEXT:    store i8 [[FROMBOOL]], ptr [[TMP10]], align 8
+// CHECK3-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP11]] to i1
+// CHECK3-NEXT:    [[STOREDV:%.*]] = zext i1 [[LOADEDV]] to i8
+// CHECK3-NEXT:    store i8 [[STOREDV]], ptr [[TMP10]], align 8
 // CHECK3-NEXT:    [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4
 // CHECK3-NEXT:    call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP4]])
 // CHECK3-NEXT:    [[TMP13:%.*]] = load i32, ptr [[TMP1]], align 4
@@ -2049,8 +2049,8 @@ struct S {
 // CHECK3-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP26]], ptr align 8 [[AGG_CAPTURED]], i64 32, i1 false)
 // CHECK3-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3]], ptr [[TMP23]], i32 0, i32 1
 // CHECK3-NEXT:    [[TMP28:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
-// CHECK3-NEXT:    [[TOBOOL16:%.*]] = trunc i8 [[TMP28]] to i1
-// CHECK3-NEXT:    [[TMP29:%.*]] = sext i1 [[TOBOOL16]] to i32
+// CHECK3-NEXT:    [[LOADEDV16:%.*]] = trunc i8 [[TMP28]] to i1
+// CHECK3-NEXT:    [[TMP29:%.*]] = sext i1 [[LOADEDV16]] to i32
 // CHECK3-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP24]], i32 0, i32 5
 // CHECK3-NEXT:    store i64 0, ptr [[TMP30]], align 8
 // CHECK3-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP24]], i32 0, i32 6
@@ -2134,31 +2134,31 @@ struct S {
 // CHECK3-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META41:![0-9]+]])
 // CHECK3-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META43:![0-9]+]])
 // CHECK3-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META45:![0-9]+]])
-// CHECK3-NEXT:    store i32 [[TMP2]], ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !47
-// CHECK3-NEXT:    store ptr [[TMP5]], ptr [[DOTPART_ID__ADDR_I]], align 8, !noalias !47
-// CHECK3-NEXT:    store ptr [[TMP8]], ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !47
-// CHECK3-NEXT:    store ptr @.omp_task_privates_map., ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !47
-// CHECK3-NEXT:    store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 8, !noalias !47
-// CHECK3-NEXT:    store i64 [[TMP10]], ptr [[DOTLB__ADDR_I]], align 8, !noalias !47
-// CHECK3-NEXT:    store i64 [[TMP12]], ptr [[DOTUB__ADDR_I]], align 8, !noalias !47
-// CHECK3-NEXT:    store i64 [[TMP14]], ptr [[DOTST__ADDR_I]], align 8, !noalias !47
-// CHECK3-NEXT:    store i32 [[TMP16]], ptr [[DOTLITER__ADDR_I]], align 4, !noalias !47
-// CHECK3-NEXT:    store ptr [[TMP18]], ptr [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !47
-// CHECK3-NEXT:    store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !47
-// CHECK3-NEXT:    [[TMP19:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !47
-// CHECK3-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !47
-// CHECK3-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !47
+// CHECK3-NEXT:    store i32 [[TMP2]], ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias [[META47:![0-9]+]]
+// CHECK3-NEXT:    store ptr [[TMP5]], ptr [[DOTPART_ID__ADDR_I]], align 8, !noalias [[META47]]
+// CHECK3-NEXT:    store ptr [[TMP8]], ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias [[META47]]
+// CHECK3-NEXT:    store ptr @.omp_task_privates_map., ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias [[META47]]
+// CHECK3-NEXT:    store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 8, !noalias [[META47]]
+// CHECK3-NEXT:    store i64 [[TMP10]], ptr [[DOTLB__ADDR_I]], align 8, !noalias [[META47]]
+// CHECK3-NEXT:    store i64 [[TMP12]], ptr [[DOTUB__ADDR_I]], align 8, !noalias [[META47]]
+// CHECK3-NEXT:    store i64 [[TMP14]], ptr [[DOTST__ADDR_I]], align 8, !noalias [[META47]]
+// CHECK3-NEXT:    store i32 [[TMP16]], ptr [[DOTLITER__ADDR_I]], align 4, !noalias [[META47]]
+// CHECK3-NEXT:    store ptr [[TMP18]], ptr [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias [[META47]]
+// CHECK3-NEXT:    store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, !noalias [[META47]]
+// CHECK3-NEXT:    [[TMP19:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias [[META47]]
+// CHECK3-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias [[META47]]
+// CHECK3-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias [[META47]]
 // CHECK3-NEXT:    call void [[TMP20]](ptr [[TMP21]], ptr [[DOTLASTPRIV_PTR_ADDR_I]]) #[[ATTR2]]
 // CHECK3-NEXT:    [[TMP22:%.*]] = load ptr, ptr [[TMP19]], align 8
-// CHECK3-NEXT:    [[TMP23:%.*]] = load ptr, ptr [[DOTLASTPRIV_PTR_ADDR_I]], align 8, !noalias !47
+// CHECK3-NEXT:    [[TMP23:%.*]] = load ptr, ptr [[DOTLASTPRIV_PTR_ADDR_I]], align 8, !noalias [[META47]]
 // CHECK3-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP19]], i32 0, i32 1
 // CHECK3-NEXT:    [[TMP25:%.*]] = load ptr, ptr [[TMP24]], align 8
 // CHECK3-NEXT:    [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4
-// CHECK3-NEXT:    store i32 [[TMP26]], ptr [[DOTCAPTURE_EXPR__I]], align 4, !noalias !47
+// CHECK3-NEXT:    store i32 [[TMP26]], ptr [[DOTCAPTURE_EXPR__I]], align 4, !noalias [[META47]]
 // CHECK3-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP19]], i32 0, i32 1
 // CHECK3-NEXT:    [[TMP28:%.*]] = load ptr, ptr [[TMP27]], align 8
 // CHECK3-NEXT:    [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4
-// CHECK3-NEXT:    store i32 [[TMP29]], ptr [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !47
+// CHECK3-NEXT:    store i32 [[TMP29]], ptr [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias [[META47]]
 // CHECK3-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP19]], i32 0, i32 2
 // CHECK3-NEXT:    [[TMP31:%.*]] = load ptr, ptr [[TMP30]], align 8
 // CHECK3-NEXT:    [[TMP32:%.*]] = load ptr, ptr [[TMP31]], align 8
@@ -2175,68 +2175,68 @@ struct S {
 // CHECK3-NEXT:    [[ARRAYIDX5_I:%.*]] = getelementptr inbounds i8, ptr [[TMP36]], i64 [[IDXPROM4_I]]
 // CHECK3-NEXT:    [[TMP40:%.*]] = load i8, ptr [[ARRAYIDX5_I]], align 1
 // CHECK3-NEXT:    [[CONV_I:%.*]] = sext i8 [[TMP40]] to i32
-// CHECK3-NEXT:    store i32 [[CONV_I]], ptr [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !47
-// CHECK3-NEXT:    [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__I]], align 4, !noalias !47
+// CHECK3-NEXT:    store i32 [[CONV_I]], ptr [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias [[META47]]
+// CHECK3-NEXT:    [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__I]], align 4, !noalias [[META47]]
 // CHECK3-NEXT:    [[CONV7_I:%.*]] = sext i32 [[TMP41]] to i64
-// CHECK3-NEXT:    [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !47
-// CHECK3-NEXT:    [[TMP43:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !47
+// CHECK3-NEXT:    [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias [[META47]]
+// CHECK3-NEXT:    [[TMP43:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias [[META47]]
 // CHECK3-NEXT:    [[SUB8_I:%.*]] = sub i32 [[TMP42]], [[TMP43]]
 // CHECK3-NEXT:    [[SUB9_I:%.*]] = sub i32 [[SUB8_I]], 1
 // CHECK3-NEXT:    [[CONV11_I:%.*]] = zext i32 [[SUB8_I]] to i64
 // CHECK3-NEXT:    [[MUL_I:%.*]] = mul nsw i64 [[CONV7_I]], [[CONV11_I]]
 // CHECK3-NEXT:    [[SUB12_I:%.*]] = sub nsw i64 [[MUL_I]], 1
-// CHECK3-NEXT:    store i64 [[SUB12_I]], ptr [[DOTCAPTURE_EXPR_6_I]], align 8, !noalias !47
-// CHECK3-NEXT:    store i32 0, ptr [[I_I]], align 4, !noalias !47
-// CHECK3-NEXT:    [[TMP44:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !47
-// CHECK3-NEXT:    store i32 [[TMP44]], ptr [[J_I]], align 4, !noalias !47
-// CHECK3-NEXT:    [[TMP45:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__I]], align 4, !noalias !47
+// CHECK3-NEXT:    store i64 [[SUB12_I]], ptr [[DOTCAPTURE_EXPR_6_I]], align 8, !noalias [[META47]]
+// CHECK3-NEXT:    store i32 0, ptr [[I_I]], align 4, !noalias [[META47]]
+// CHECK3-NEXT:    [[TMP44:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias [[META47]]
+// CHECK3-NEXT:    store i32 [[TMP44]], ptr [[J_I]], align 4, !noalias [[META47]]
+// CHECK3-NEXT:    [[TMP45:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__I]], align 4, !noalias [[META47]]
 // CHECK3-NEXT:    [[CMP_I:%.*]] = icmp slt i32 0, [[TMP45]]
 // CHECK3-NEXT:    br i1 [[CMP_I]], label [[LAND_LHS_TRUE_I:%.*]], label [[TASKLOOP_IF_END_I:%.*]]
 // CHECK3:       land.lhs.true.i:
-// CHECK3-NEXT:    [[TMP46:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !47
-// CHECK3-NEXT:    [[TMP47:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !47
+// CHECK3-NEXT:    [[TMP46:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias [[META47]]
+// CHECK3-NEXT:    [[TMP47:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias [[META47]]
 // CHECK3-NEXT:    [[CMP13_I:%.*]] = icmp slt i32 [[TMP46]], [[TMP47]]
 // CHECK3-NEXT:    br i1 [[CMP13_I]], label [[TASKLOOP_IF_THEN_I:%.*]], label [[TASKLOOP_IF_END_I]]
 // CHECK3:       taskloop.if.then.i:
-// CHECK3-NEXT:    [[TMP48:%.*]] = load i64, ptr [[DOTLB__ADDR_I]], align 8, !noalias !47
-// CHECK3-NEXT:    store i64 [[TMP48]], ptr [[DOTOMP_IV_I]], align 8, !noalias !47
+// CHECK3-NEXT:    [[TMP48:%.*]] = load i64, ptr [[DOTLB__ADDR_I]], align 8, !noalias [[META47]]
+// CHECK3-NEXT:    store i64 [[TMP48]], ptr [[DOTOMP_IV_I]], align 8, !noalias [[META47]]
 // CHECK3-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP19]], i32 0, i32 1
 // CHECK3-NEXT:    [[TMP50:%.*]] = load ptr, ptr [[TMP49]], align 8
 // CHECK3-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP19]], i32 0, i32 2
 // CHECK3-NEXT:    [[TMP52:%.*]] = load ptr, ptr [[TMP51]], align 8
 // CHECK3-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP19]], i32 0, i32 3
 // CHECK3-NEXT:    [[TMP54:%.*]] = load i8, ptr [[TMP53]], align 1
-// CHECK3-NEXT:    [[TOBOOL_I:%.*]] = trunc i8 [[TMP54]] to i1
-// CHECK3-NEXT:    br i1 [[TOBOOL_I]], label [[OMP_IF_THEN_I:%.*]], label [[OMP_IF_ELSE_I:%.*]]
+// CHECK3-NEXT:    [[LOADEDV_I:%.*]] = trunc i8 [[TMP54]] to i1
+// CHECK3-NEXT:    br i1 [[LOADEDV_I]], label [[OMP_IF_THEN_I:%.*]], label [[OMP_IF_ELSE_I:%.*]]
 // CHECK3:       omp_if.then.i:
 // CHECK3-NEXT:    br label [[OMP_INNER_FOR_COND_I:%.*]]
 // CHECK3:       omp.inner.for.cond.i:
-// CHECK3-NEXT:    [[TMP55:%.*]] = load i64, ptr [[DOTOMP_IV_I]], align 8, !noalias !47, !llvm.access.group [[ACC_GRP48:![0-9]+]]
-// CHECK3-NEXT:    [[TMP56:%.*]] = load i64, ptr [[DOTUB__ADDR_I]], align 8, !noalias !47, !llvm.access.group [[ACC_GRP48]]
+// CHECK3-NEXT:    [[TMP55:%.*]] = load i64, ptr [[DOTOMP_IV_I]], align 8, !noalias [[META47]], !llvm.access.group [[ACC_GRP48:![0-9]+]]
+// CHECK3-NEXT:    [[TMP56:%.*]] = load i64, ptr [[DOTUB__ADDR_I]], align 8, !noalias [[META47]], !llvm.access.group [[ACC_GRP48]]
 // CHECK3-NEXT:    [[CMP16_I:%.*]] = icmp ule i64 [[TMP55]], [[TMP56]]
 // CHECK3-NEXT:    br i1 [[CMP16_I]], label [[OMP_INNER_FOR_BODY_I:%.*]], label [[OMP_INNER_FOR_END_I:%.*]]
 // CHECK3:       omp.inner.for.body.i:
-// CHECK3-NEXT:    [[TMP57:%.*]] = load i64, ptr [[DOTOMP_IV_I]], align 8, !noalias !47, !llvm.access.group [[ACC_GRP48]]
-// CHECK3-NEXT:    [[TMP58:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !47, !llvm.access.group [[ACC_GRP48]]
-// CHECK3-NEXT:    [[TMP59:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !47, !llvm.access.group [[ACC_GRP48]]
+// CHECK3-NEXT:    [[TMP57:%.*]] = load i64, ptr [[DOTOMP_IV_I]], align 8, !noalias [[META47]], !llvm.access.group [[ACC_GRP48]]
+// CHECK3-NEXT:    [[TMP58:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias [[META47]], !llvm.access.group [[ACC_GRP48]]
+// CHECK3-NEXT:    [[TMP59:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias [[META47]], !llvm.access.group [[ACC_GRP48]]
 // CHECK3-NEXT:    [[SUB17_I:%.*]] = sub i32 [[TMP58]], [[TMP59]]
 // CHECK3-NEXT:    [[SUB18_I:%.*]] = sub i32 [[SUB17_I]], 1
 // CHECK3-NEXT:    [[CONV22_I:%.*]] = zext i32 [[SUB17_I]] to i64
 // CHECK3-NEXT:    [[DIV23_I:%.*]] = sdiv i64 [[TMP57]], [[CONV22_I]]
 // CHECK3-NEXT:    [[CONV26_I:%.*]] = trunc i64 [[DIV23_I]] to i32
-// CHECK3-NEXT:    store i32 [[CONV26_I]], ptr [[I14_I]], align 4, !noalias !47, !llvm.access.group [[ACC_GRP48]]
-// CHECK3-NEXT:    [[TMP60:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !47, !llvm.access.group [[ACC_GRP48]]
+// CHECK3-NEXT:    store i32 [[CONV26_I]], ptr [[I14_I]], align 4, !noalias [[META47]], !llvm.access.group [[ACC_GRP48]]
+// CHECK3-NEXT:    [[TMP60:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias [[META47]], !llvm.access.group [[ACC_GRP48]]
 // CHECK3-NEXT:    [[CONV27_I:%.*]] = sext i32 [[TMP60]] to i64
-// CHECK3-NEXT:    [[TMP61:%.*]] = load i64, ptr [[DOTOMP_IV_I]], align 8, !noalias !47, !llvm.access.group [[ACC_GRP48]]
-// CHECK3-NEXT:    [[TMP62:%.*]] = load i64, ptr [[DOTOMP_IV_I]], align 8, !noalias !47, !llvm.access.group [[ACC_GRP48]]
-// CHECK3-NEXT:    [[TMP63:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !47, !llvm.access.group [[ACC_GRP48]]
-// CHECK3-NEXT:    [[TMP64:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !47, !llvm.access.group [[ACC_GRP48]]
+// CHECK3-NEXT:    [[TMP61:%.*]] = load i64, ptr [[DOTOMP_IV_I]], align 8, !noalias [[META47]], !llvm.access.group [[ACC_GRP48]]
+// CHECK3-NEXT:    [[TMP62:%.*]] = load i64, ptr [[DOTOMP_IV_I]], align 8, !noalias [[META47]], !llvm.access.group [[ACC_GRP48]]
+// CHECK3-NEXT:    [[TMP63:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias [[META47]], !llvm.access.group [[ACC_GRP48]]
+// CHECK3-NEXT:    [[TMP64:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias [[META47]], !llvm.access.group [[ACC_GRP48]]
 // CHECK3-NEXT:    [[SUB28_I:%.*]] = sub i32 [[TMP63]], [[TMP64]]
 // CHECK3-NEXT:    [[SUB29_I:%.*]] = sub i32 [[SUB28_I]], 1
 // CHECK3-NEXT:    [[CONV33_I:%.*]] = zext i32 [[SUB28_I]] to i64
 // CHECK3-NEXT:    [[DIV34_I:%.*]] = sdiv i64 [[TMP62]], [[CONV33_I]]
-// CHECK3-NEXT:    [[TMP65:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !47, !llvm.access.group [[ACC_GRP48]]
-// CHECK3-NEXT:    [[TMP66:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !47, !llvm.access.group [[ACC_GRP48]]
+// CHECK3-NEXT:    [[TMP65:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias [[META47]], !llvm.access.group [[ACC_GRP48]]
+// CHECK3-NEXT:    [[TMP66:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias [[META47]], !llvm.access.group [[ACC_GRP48]]
 // CHECK3-NEXT:    [[SUB35_I:%.*]] = sub i32 [[TMP65]], [[TMP66]]
 // CHECK3-NEXT:    [[SUB36_I:%.*]] = sub i32 [[SUB35_I]], 1
 // CHECK3-NEXT:    [[CONV40_I:%.*]] = zext i32 [[SUB35_I]] to i64
@@ -2244,42 +2244,42 @@ struct S {
 // CHECK3-NEXT:    [[SUB42_I:%.*]] = sub nsw i64 [[TMP61]], [[MUL41_I]]
 // CHECK3-NEXT:    [[ADD44_I:%.*]] = add nsw i64 [[CONV27_I]], [[SUB42_I]]
 // CHECK3-NEXT:    [[CONV45_I:%.*]] = trunc i64 [[ADD44_I]] to i32
-// CHECK3-NEXT:    store i32 [[CONV45_I]], ptr [[J15_I]], align 4, !noalias !47, !llvm.access.group [[ACC_GRP48]]
-// CHECK3-NEXT:    [[TMP67:%.*]] = load i64, ptr [[DOTOMP_IV_I]], align 8, !noalias !47, !llvm.access.group [[ACC_GRP48]]
+// CHECK3-NEXT:    store i32 [[CONV45_I]], ptr [[J15_I]], align 4, !noalias [[META47]], !llvm.access.group [[ACC_GRP48]]
+// CHECK3-NEXT:    [[TMP67:%.*]] = load i64, ptr [[DOTOMP_IV_I]], align 8, !noalias [[META47]], !llvm.access.group [[ACC_GRP48]]
 // CHECK3-NEXT:    [[ADD46_I:%.*]] = add nsw i64 [[TMP67]], 1
-// CHECK3-NEXT:    store i64 [[ADD46_I]], ptr [[DOTOMP_IV_I]], align 8, !noalias !47, !llvm.access.group [[ACC_GRP48]]
+// CHECK3-NEXT:    store i64 [[ADD46_I]], ptr [[DOTOMP_IV_I]], align 8, !noalias [[META47]], !llvm.access.group [[ACC_GRP48]]
 // CHECK3-NEXT:    br label [[OMP_INNER_FOR_COND_I]], !llvm.loop [[LOOP49:![0-9]+]]
 // CHECK3:       omp.inner.for.end.i:
 // CHECK3-NEXT:    br label [[OMP_IF_END_I:%.*]]
 // CHECK3:       omp_if.else.i:
 // CHECK3-NEXT:    br label [[OMP_INNER_FOR_COND47_I:%.*]]
 // CHECK3:       omp.inner.for.cond47.i:
-// CHECK3-NEXT:    [[TMP68:%.*]] = load i64, ptr [[DOTOMP_IV_I]], align 8, !noalias !47
-// CHECK3-NEXT:    [[TMP69:%.*]] = load i64, ptr [[DOTUB__ADDR_I]], align 8, !noalias !47
+// CHECK3-NEXT:    [[TMP68:%.*]] = load i64, ptr [[DOTOMP_IV_I]], align 8, !noalias [[META47]]
+// CHECK3-NEXT:    [[TMP69:%.*]] = load i64, ptr [[DOTUB__ADDR_I]], align 8, !noalias [[META47]]
 // CHECK3-NEXT:    [[CMP48_I:%.*]] = icmp ule i64 [[TMP68]], [[TMP69]]
 // CHECK3-NEXT:    br i1 [[CMP48_I]], label [[OMP_INNER_FOR_BODY49_I:%.*]], label [[OMP_INNER_FOR_END82_I:%.*]]
 // CHECK3:       omp.inner.for.body49.i:
-// CHECK3-NEXT:    [[TMP70:%.*]] = load i64, ptr [[DOTOMP_IV_I]], align 8, !noalias !47
-// CHECK3-NEXT:    [[TMP71:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !47
-// CHECK3-NEXT:    [[TMP72:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !47
+// CHECK3-NEXT:    [[TMP70:%.*]] = load i64, ptr [[DOTOMP_IV_I]], align 8, !noalias [[META47]]
+// CHECK3-NEXT:    [[TMP71:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias [[META47]]
+// CHECK3-NEXT:    [[TMP72:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias [[META47]]
 // CHECK3-NEXT:    [[SUB50_I:%.*]] = sub i32 [[TMP71]], [[TMP72]]
 // CHECK3-NEXT:    [[SUB51_I:%.*]] = sub i32 [[SUB50_I]], 1
 // CHECK3-NEXT:    [[CONV55_I:%.*]] = zext i32 [[SUB50_I]] to i64
 // CHECK3-NEXT:    [[DIV56_I:%.*]] = sdiv i64 [[TMP70]], [[CONV55_I]]
 // CHECK3-NEXT:    [[CONV59_I:%.*]] = trunc i64 [[DIV56_I]] to i32
-// CHECK3-NEXT:    store i32 [[CONV59_I]], ptr [[I14_I]], align 4, !noalias !47
-// CHECK3-NEXT:    [[TMP73:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !47
+// CHECK3-NEXT:    store i32 [[CONV59_I]], ptr [[I14_I]], align 4, !noalias [[META47]]
+// CHECK3-NEXT:    [[TMP73:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias [[META47]]
 // CHECK3-NEXT:    [[CONV60_I:%.*]] = sext i32 [[TMP73]] to i64
-// CHECK3-NEXT:    [[TMP74:%.*]] = load i64, ptr [[DOTOMP_IV_I]], align 8, !noalias !47
-// CHECK3-NEXT:    [[TMP75:%.*]] = load i64, ptr [[DOTOMP_IV_I]], align 8, !noalias !47
-// CHECK3-NEXT:    [[TMP76:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !47
-// CHECK3-NEXT:    [[TMP77:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !47
+// CHECK3-NEXT:    [[TMP74:%.*]] = load i64, ptr [[DOTOMP_IV_I]], align 8, !noalias [[META47]]
+// CHECK3-NEXT:    [[TMP75:%.*]] = load i64, ptr [[DOTOMP_IV_I]], align 8, !noalias [[META47]]
+// CHECK3-NEXT:    [[TMP76:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias [[META47]]
+// CHECK3-NEXT:    [[TMP77:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias [[META47]]
 // CHECK3-NEXT:    [[SUB61_I:%.*]] = sub i32 [[TMP76]], [[TMP77]]
 // CHECK3-NEXT:    [[SUB62_I:%.*]] = sub i32 [[SUB61_I]], 1
 // CHECK3-NEXT:    [[CONV66_I:%.*]] = zext i32 [[SUB61_I]] to i64
 // CHECK3-NEXT:    [[DIV67_I:%.*]] = sdiv i64 [[TMP75]], [[CONV66_I]]
-// CHECK3-NEXT:    [[TMP78:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !47
-// CHECK3-NEXT:    [[TMP79:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !47
+// CHECK3-NEXT:    [[TMP78:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias [[META47]]
+// CHECK3-NEXT:    [[TMP79:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias [[META47]]
 // CHECK3-NEXT:    [[SUB68_I:%.*]] = sub i32 [[TMP78]], [[TMP79]]
 // CHECK3-NEXT:    [[SUB69_I:%.*]] = sub i32 [[SUB68_I]], 1
 // CHECK3-NEXT:    [[CONV73_I:%.*]] = zext i32 [[SUB68_I]] to i64
@@ -2287,17 +2287,17 @@ struct S {
 // CHECK3-NEXT:    [[SUB75_I:%.*]] = sub nsw i64 [[TMP74]], [[MUL74_I]]
 // CHECK3-NEXT:    [[ADD77_I:%.*]] = add nsw i64 [[CONV60_I]], [[SUB75_I]]
 // CHECK3-NEXT:    [[CONV78_I:%.*]] = trunc i64 [[ADD77_I]] to i32
-// CHECK3-NEXT:    store i32 [[CONV78_I]], ptr [[J15_I]], align 4, !noalias !47
-// CHECK3-NEXT:    [[TMP80:%.*]] = load i64, ptr [[DOTOMP_IV_I]], align 8, !noalias !47
+// CHECK3-NEXT:    store i32 [[CONV78_I]], ptr [[J15_I]], align 4, !noalias [[META47]]
+// CHECK3-NEXT:    [[TMP80:%.*]] = load i64, ptr [[DOTOMP_IV_I]], align 8, !noalias [[META47]]
 // CHECK3-NEXT:    [[ADD81_I:%.*]] = add nsw i64 [[TMP80]], 1
-// CHECK3-NEXT:    store i64 [[ADD81_I]], ptr [[DOTOMP_IV_I]], align 8, !noalias !47
+// CHECK3-NEXT:    store i64 [[ADD81_I]], ptr [[DOTOMP_IV_I]], align 8, !noalias [[META47]]
 // CHECK3-NEXT:    br label [[OMP_INNER_FOR_COND47_I]], !llvm.loop [[LOOP51:![0-9]+]]
 // CHECK3:       omp.inner.for.end82.i:
 // CHECK3-NEXT:    br label [[OMP_IF_END_I]]
 // CHECK3:       omp_if.end.i:
 // CHECK3-NEXT:    br label [[TASKLOOP_IF_END_I]]
 // CHECK3:       taskloop.if.end.i:
-// CHECK3-NEXT:    [[TMP81:%.*]] = load i32, ptr [[DOTLITER__ADDR_I]], align 4, !noalias !47
+// CHECK3-NEXT:    [[TMP81:%.*]] = load i32, ptr [[DOTLITER__ADDR_I]], align 4, !noalias [[META47]]
 // CHECK3-NEXT:    [[TMP82:%.*]] = icmp ne i32 [[TMP81]], 0
 // CHECK3-NEXT:    br i1 [[TMP82]], label [[DOTOMP_LASTPRIVATE_THEN_I:%.*]], label [[DOTOMP_OUTLINED__5_EXIT:%.*]]
 // CHECK3:       .omp.lastprivate.then.i:
@@ -2356,12 +2356,12 @@ struct S {
 // CHECK3-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
 // CHECK3-NEXT:    [[TMP0:%.*]] = load i32, ptr [[C_ADDR]], align 4
 // CHECK3-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0
-// CHECK3-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
-// CHECK3-NEXT:    store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1
+// CHECK3-NEXT:    [[STOREDV:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK3-NEXT:    store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR_]], align 1
 // CHECK3-NEXT:    [[TMP1:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1
-// CHECK3-NEXT:    [[TOBOOL2:%.*]] = trunc i8 [[TMP1]] to i1
-// CHECK3-NEXT:    [[FROMBOOL3:%.*]] = zext i1 [[TOBOOL2]] to i8
-// CHECK3-NEXT:    store i8 [[FROMBOOL3]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
+// CHECK3-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP1]] to i1
+// CHECK3-NEXT:    [[STOREDV2:%.*]] = zext i1 [[LOADEDV]] to i8
+// CHECK3-NEXT:    store i8 [[STOREDV2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
 // CHECK3-NEXT:    [[TMP2:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8
 // CHECK3-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 3, ptr @_ZN1SC2Ei.omp_outlined, ptr [[THIS1]], ptr [[C_ADDR]], i64 [[TMP2]])
 // CHECK3-NEXT:    ret void
@@ -2399,7 +2399,7 @@ struct S {
 // CHECK3-NEXT:    store ptr [[TMP1]], ptr [[TMP7]], align 8
 // CHECK3-NEXT:    call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP3]])
 // CHECK3-NEXT:    [[TMP8:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
-// CHECK3-NEXT:    [[TOBOOL:%.*]] = trunc i8 [[TMP8]] to i1
+// CHECK3-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP8]] to i1
 // CHECK3-NEXT:    store ptr [[TMP]], ptr [[_TMP1]], align 8
 // CHECK3-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP1]], align 4
 // CHECK3-NEXT:    store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_2]], align 4
@@ -2408,7 +2408,7 @@ struct S {
 // CHECK3-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK3-NEXT:    [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK3-NEXT:    store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_3]], align 4
-// CHECK3-NEXT:    [[TMP11:%.*]] = select i1 [[TOBOOL]], i32 2, i32 0
+// CHECK3-NEXT:    [[TMP11:%.*]] = select i1 [[LOADEDV]], i32 2, i32 0
 // CHECK3-NEXT:    [[TMP12:%.*]] = or i32 [[TMP11]], 1
 // CHECK3-NEXT:    [[TMP13:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP3]], i32 [[TMP12]], i64 80, i64 16, ptr @.omp_task_entry..8)
 // CHECK3-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_5:%.*]], ptr [[TMP13]], i32 0, i32 0
@@ -2482,54 +2482,54 @@ struct S {
 // CHECK3-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META58:![0-9]+]])
 // CHECK3-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META60:![0-9]+]])
 // CHECK3-NEXT:    call void @llvm.experimental.noalias.scope.decl(metadata [[META62:![0-9]+]])
-// CHECK3-NEXT:    store i32 [[TMP2]], ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !64
-// CHECK3-NEXT:    store ptr [[TMP5]], ptr [[DOTPART_ID__ADDR_I]], align 8, !noalias !64
-// CHECK3-NEXT:    store ptr null, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !64
-// CHECK3-NEXT:    store ptr null, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !64
-// CHECK3-NEXT:    store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 8, !noalias !64
-// CHECK3-NEXT:    store i64 [[TMP9]], ptr [[DOTLB__ADDR_I]], align 8, !noalias !64
-// CHECK3-NEXT:    store i64 [[TMP11]], ptr [[DOTUB__ADDR_I]], align 8, !noalias !64
-// CHECK3-NEXT:    store i64 [[TMP13]], ptr [[DOTST__ADDR_I]], align 8, !noalias !64
-// CHECK3-NEXT:    store i32 [[TMP15]], ptr [[DOTLITER__ADDR_I]], align 4, !noalias !64
-// CHECK3-NEXT:    store ptr [[TMP17]], ptr [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !64
-// CHECK3-NEXT:    store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !64
-// CHECK3-NEXT:    [[TMP18:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !64
+// CHECK3-NEXT:    store i32 [[TMP2]], ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias [[META64:![0-9]+]]
+// CHECK3-NEXT:    store ptr [[TMP5]], ptr [[DOTPART_ID__ADDR_I]], align 8, !noalias [[META64]]
+// CHECK3-NEXT:    store ptr null, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias [[META64]]
+// CHECK3-NEXT:    store ptr null, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias [[META64]]
+// CHECK3-NEXT:    store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 8, !noalias [[META64]]
+// CHECK3-NEXT:    store i64 [[TMP9]], ptr [[DOTLB__ADDR_I]], align 8, !noalias [[META64]]
+// CHECK3-NEXT:    store i64 [[TMP11]], ptr [[DOTUB__ADDR_I]], align 8, !noalias [[META64]]
+// CHECK3-NEXT:    store i64 [[TMP13]], ptr [[DOTST__ADDR_I]], align 8, !noalias [[META64]]
+// CHECK3-NEXT:    store i32 [[TMP15]], ptr [[DOTLITER__ADDR_I]], align 4, !noalias [[META64]]
+// CHECK3-NEXT:    store ptr [[TMP17]], ptr [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias [[META64]]
+// CHECK3-NEXT:    store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, !noalias [[META64]]
+// CHECK3-NEXT:    [[TMP18:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias [[META64]]
 // CHECK3-NEXT:    [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8
-// CHECK3-NEXT:    store ptr [[TMP_I]], ptr [[TMP1_I]], align 8, !noalias !64
+// CHECK3-NEXT:    store ptr [[TMP_I]], ptr [[TMP1_I]], align 8, !noalias [[META64]]
 // CHECK3-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP18]], i32 0, i32 1
 // CHECK3-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[TMP20]], align 8
 // CHECK3-NEXT:    [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
-// CHECK3-NEXT:    store i32 [[TMP22]], ptr [[DOTCAPTURE_EXPR__I]], align 4, !noalias !64
-// CHECK3-NEXT:    [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__I]], align 4, !noalias !64
+// CHECK3-NEXT:    store i32 [[TMP22]], ptr [[DOTCAPTURE_EXPR__I]], align 4, !noalias [[META64]]
+// CHECK3-NEXT:    [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__I]], align 4, !noalias [[META64]]
 // CHECK3-NEXT:    [[SUB3_I:%.*]] = sub nsw i32 [[TMP23]], 1
-// CHECK3-NEXT:    store i32 [[SUB3_I]], ptr [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !64
-// CHECK3-NEXT:    store ptr [[A_I]], ptr [[TMP4_I]], align 8, !noalias !64
-// CHECK3-NEXT:    [[TMP24:%.*]] = load ptr, ptr [[TMP4_I]], align 8, !noalias !64
+// CHECK3-NEXT:    store i32 [[SUB3_I]], ptr [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias [[META64]]
+// CHECK3-NEXT:    store ptr [[A_I]], ptr [[TMP4_I]], align 8, !noalias [[META64]]
+// CHECK3-NEXT:    [[TMP24:%.*]] = load ptr, ptr [[TMP4_I]], align 8, !noalias [[META64]]
 // CHECK3-NEXT:    store i32 0, ptr [[TMP24]], align 4
-// CHECK3-NEXT:    [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__I]], align 4, !noalias !64
+// CHECK3-NEXT:    [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__I]], align 4, !noalias [[META64]]
 // CHECK3-NEXT:    [[CMP_I:%.*]] = icmp slt i32 0, [[TMP25]]
 // CHECK3-NEXT:    br i1 [[CMP_I]], label [[TASKLOOP_IF_THEN_I:%.*]], label [[DOTOMP_OUTLINED__7_EXIT:%.*]]
 // CHECK3:       taskloop.if.then.i:
-// CHECK3-NEXT:    store ptr [[A5_I]], ptr [[TMP6_I]], align 8, !noalias !64
-// CHECK3-NEXT:    [[TMP26:%.*]] = load i64, ptr [[DOTLB__ADDR_I]], align 8, !noalias !64
+// CHECK3-NEXT:    store ptr [[A5_I]], ptr [[TMP6_I]], align 8, !noalias [[META64]]
+// CHECK3-NEXT:    [[TMP26:%.*]] = load i64, ptr [[DOTLB__ADDR_I]], align 8, !noalias [[META64]]
 // CHECK3-NEXT:    [[CONV_I:%.*]] = trunc i64 [[TMP26]] to i32
-// CHECK3-NEXT:    store i32 [[CONV_I]], ptr [[DOTOMP_IV_I]], align 4, !noalias !64
+// CHECK3-NEXT:    store i32 [[CONV_I]], ptr [[DOTOMP_IV_I]], align 4, !noalias [[META64]]
 // CHECK3-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP18]], i32 0, i32 1
 // CHECK3-NEXT:    [[TMP28:%.*]] = load ptr, ptr [[TMP27]], align 8
 // CHECK3-NEXT:    br label [[OMP_INNER_FOR_COND_I:%.*]]
 // CHECK3:       omp.inner.for.cond.i:
-// CHECK3-NEXT:    [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV_I]], align 4, !noalias !64, !llvm.access.group [[ACC_GRP65:![0-9]+]]
+// CHECK3-NEXT:    [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV_I]], align 4, !noalias [[META64]], !llvm.access.group [[ACC_GRP65:![0-9]+]]
 // CHECK3-NEXT:    [[CONV7_I:%.*]] = sext i32 [[TMP29]] to i64
-// CHECK3-NEXT:    [[TMP30:%.*]] = load i64, ptr [[DOTUB__ADDR_I]], align 8, !noalias !64, !llvm.access.group [[ACC_GRP65]]
+// CHECK3-NEXT:    [[TMP30:%.*]] = load i64, ptr [[DOTUB__ADDR_I]], align 8, !noalias [[META64]], !llvm.access.group [[ACC_GRP65]]
 // CHECK3-NEXT:    [[CMP8_I:%.*]] = icmp ule i64 [[CONV7_I]], [[TMP30]]
 // CHECK3-NEXT:    br i1 [[CMP8_I]], label [[OMP_INNER_FOR_BODY_I:%.*]], label [[OMP_INNER_FOR_END_I:%.*]]
 // CHECK3:       omp.inner.for.body.i:
-// CHECK3-NEXT:    [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV_I]], align 4, !noalias !64, !llvm.access.group [[ACC_GRP65]]
-// CHECK3-NEXT:    [[TMP32:%.*]] = load ptr, ptr [[TMP6_I]], align 8, !noalias !64, !llvm.access.group [[ACC_GRP65]]
+// CHECK3-NEXT:    [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV_I]], align 4, !noalias [[META64]], !llvm.access.group [[ACC_GRP65]]
+// CHECK3-NEXT:    [[TMP32:%.*]] = load ptr, ptr [[TMP6_I]], align 8, !noalias [[META64]], !llvm.access.group [[ACC_GRP65]]
 // CHECK3-NEXT:    store i32 [[TMP31]], ptr [[TMP32]], align 4, !llvm.access.group [[ACC_GRP65]]
-// CHECK3-NEXT:    [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV_I]], align 4, !noalias !64, !llvm.access.group [[ACC_GRP65]]
+// CHECK3-NEXT:    [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV_I]], align 4, !noalias [[META64]], !llvm.access.group [[ACC_GRP65]]
 // CHECK3-NEXT:    [[ADD9_I:%.*]] = add nsw i32 [[TMP33]], 1
-// CHECK3-NEXT:    store i32 [[ADD9_I]], ptr [[DOTOMP_IV_I]], align 4, !noalias !64, !llvm.access.group [[ACC_GRP65]]
+// CHECK3-NEXT:    store i32 [[ADD9_I]], ptr [[DOTOMP_IV_I]], align 4, !noalias [[META64]], !llvm.access.group [[ACC_GRP65]]
 // CHECK3-NEXT:    br label [[OMP_INNER_FOR_COND_I]], !llvm.loop [[LOOP66:![0-9]+]]
 // CHECK3:       omp.inner.for.end.i:
 // CHECK3-NEXT:    br label [[DOTOMP_OUTLINED__7_EXIT]]
@@ -2641,8 +2641,8 @@ struct S {
 // CHECK5-NEXT:    store i32 10, ptr [[I9]], align 4
 // CHECK5-NEXT:    [[TMP12:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
 // CHECK5-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP12]], 0
-// CHECK5-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
-// CHECK5-NEXT:    store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_21]], align 1
+// CHECK5-NEXT:    [[STOREDV:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK5-NEXT:    store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR_21]], align 1
 // CHECK5-NEXT:    [[TMP13:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
 // CHECK5-NEXT:    store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_22]], align 4
 // CHECK5-NEXT:    [[TMP14:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
@@ -2815,8 +2815,8 @@ struct S {
 // CHECK5-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
 // CHECK5-NEXT:    [[TMP0:%.*]] = load i32, ptr [[C_ADDR]], align 4
 // CHECK5-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0
-// CHECK5-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
-// CHECK5-NEXT:    store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1
+// CHECK5-NEXT:    [[STOREDV:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK5-NEXT:    store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR_]], align 1
 // CHECK5-NEXT:    store ptr [[TMP]], ptr [[_TMP2]], align 8
 // CHECK5-NEXT:    [[TMP1:%.*]] = load i32, ptr [[C_ADDR]], align 4
 // CHECK5-NEXT:    store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_3]], align 4
@@ -2980,8 +2980,8 @@ struct S {
 // CHECK6-NEXT:    store i32 10, ptr [[I9]], align 4
 // CHECK6-NEXT:    [[TMP12:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
 // CHECK6-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP12]], 0
-// CHECK6-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
-// CHECK6-NEXT:    store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_21]], align 1
+// CHECK6-NEXT:    [[STOREDV:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK6-NEXT:    store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR_21]], align 1
 // CHECK6-NEXT:    [[TMP13:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
 // CHECK6-NEXT:    store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_22]], align 4
 // CHECK6-NEXT:    [[TMP14:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
@@ -3154,8 +3154,8 @@ struct S {
 // CHECK6-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
 // CHECK6-NEXT:    [[TMP0:%.*]] = load i32, ptr [[C_ADDR]], align 4
 // CHECK6-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0
-// CHECK6-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
-// CHECK6-NEXT:    store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1
+// CHECK6-NEXT:    [[STOREDV:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK6-NEXT:    store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR_]], align 1
 // CHECK6-NEXT:    store ptr [[TMP]], ptr [[_TMP2]], align 8
 // CHECK6-NEXT:    [[TMP1:%.*]] = load i32, ptr [[C_ADDR]], align 4
 // CHECK6-NEXT:    store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_3]], align 4
@@ -3319,8 +3319,8 @@ struct S {
 // CHECK7-NEXT:    store i32 10, ptr [[I9]], align 4
 // CHECK7-NEXT:    [[TMP12:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
 // CHECK7-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP12]], 0
-// CHECK7-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
-// CHECK7-NEXT:    store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_21]], align 1
+// CHECK7-NEXT:    [[STOREDV:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK7-NEXT:    store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR_21]], align 1
 // CHECK7-NEXT:    [[TMP13:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
 // CHECK7-NEXT:    store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_22]], align 4
 // CHECK7-NEXT:    [[TMP14:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
@@ -3372,143 +3372,143 @@ struct S {
 // CHECK7-NEXT:    [[TMP30:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8
 // CHECK7-NEXT:    call void @llvm.assume(i1 true) [ "align"(ptr [[TMP30]], i64 8) ]
 // CHECK7-NEXT:    [[TMP31:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_21]], align 1
-// CHECK7-NEXT:    [[TOBOOL48:%.*]] = trunc i8 [[TMP31]] to i1
-// CHECK7-NEXT:    br i1 [[TOBOOL48]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
+// CHECK7-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP31]] to i1
+// CHECK7-NEXT:    br i1 [[LOADEDV]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
 // CHECK7:       omp_if.then:
-// CHECK7-NEXT:    br label [[OMP_INNER_FOR_COND49:%.*]]
-// CHECK7:       omp.inner.for.cond49:
+// CHECK7-NEXT:    br label [[OMP_INNER_FOR_COND48:%.*]]
+// CHECK7:       omp.inner.for.cond48:
 // CHECK7-NEXT:    [[TMP32:%.*]] = load i64, ptr [[DOTOMP_IV45]], align 8, !llvm.access.group [[ACC_GRP9:![0-9]+]]
 // CHECK7-NEXT:    [[TMP33:%.*]] = load i64, ptr [[DOTOMP_UB41]], align 8, !llvm.access.group [[ACC_GRP9]]
-// CHECK7-NEXT:    [[CMP50:%.*]] = icmp ule i64 [[TMP32]], [[TMP33]]
-// CHECK7-NEXT:    br i1 [[CMP50]], label [[OMP_INNER_FOR_BODY51:%.*]], label [[OMP_INNER_FOR_END84:%.*]]
-// CHECK7:       omp.inner.for.body51:
+// CHECK7-NEXT:    [[CMP49:%.*]] = icmp ule i64 [[TMP32]], [[TMP33]]
+// CHECK7-NEXT:    br i1 [[CMP49]], label [[OMP_INNER_FOR_BODY50:%.*]], label [[OMP_INNER_FOR_END83:%.*]]
+// CHECK7:       omp.inner.for.body50:
 // CHECK7-NEXT:    [[TMP34:%.*]] = load i64, ptr [[DOTOMP_IV45]], align 8, !llvm.access.group [[ACC_GRP9]]
 // CHECK7-NEXT:    [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_27]], align 4, !llvm.access.group [[ACC_GRP9]]
 // CHECK7-NEXT:    [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_26]], align 4, !llvm.access.group [[ACC_GRP9]]
-// CHECK7-NEXT:    [[SUB52:%.*]] = sub i32 [[TMP35]], [[TMP36]]
-// CHECK7-NEXT:    [[SUB53:%.*]] = sub i32 [[SUB52]], 1
-// CHECK7-NEXT:    [[ADD54:%.*]] = add i32 [[SUB53]], 1
-// CHECK7-NEXT:    [[DIV55:%.*]] = udiv i32 [[ADD54]], 1
-// CHECK7-NEXT:    [[MUL56:%.*]] = mul i32 1, [[DIV55]]
-// CHECK7-NEXT:    [[CONV57:%.*]] = zext i32 [[MUL56]] to i64
-// CHECK7-NEXT:    [[DIV58:%.*]] = sdiv i64 [[TMP34]], [[CONV57]]
-// CHECK7-NEXT:    [[MUL59:%.*]] = mul nsw i64 [[DIV58]], 1
-// CHECK7-NEXT:    [[ADD60:%.*]] = add nsw i64 0, [[MUL59]]
-// CHECK7-NEXT:    [[CONV61:%.*]] = trunc i64 [[ADD60]] to i32
-// CHECK7-NEXT:    store i32 [[CONV61]], ptr [[I46]], align 4, !llvm.access.group [[ACC_GRP9]]
+// CHECK7-NEXT:    [[SUB51:%.*]] = sub i32 [[TMP35]], [[TMP36]]
+// CHECK7-NEXT:    [[SUB52:%.*]] = sub i32 [[SUB51]], 1
+// CHECK7-NEXT:    [[ADD53:%.*]] = add i32 [[SUB52]], 1
+// CHECK7-NEXT:    [[DIV54:%.*]] = udiv i32 [[ADD53]], 1
+// CHECK7-NEXT:    [[MUL55:%.*]] = mul i32 1, [[DIV54]]
+// CHECK7-NEXT:    [[CONV56:%.*]] = zext i32 [[MUL55]] to i64
+// CHECK7-NEXT:    [[DIV57:%.*]] = sdiv i64 [[TMP34]], [[CONV56]]
+// CHECK7-NEXT:    [[MUL58:%.*]] = mul nsw i64 [[DIV57]], 1
+// CHECK7-NEXT:    [[ADD59:%.*]] = add nsw i64 0, [[MUL58]]
+// CHECK7-NEXT:    [[CONV60:%.*]] = trunc i64 [[ADD59]] to i32
+// CHECK7-NEXT:    store i32 [[CONV60]], ptr [[I46]], align 4, !llvm.access.group [[ACC_GRP9]]
 // CHECK7-NEXT:    [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_26]], align 4, !llvm.access.group [[ACC_GRP9]]
-// CHECK7-NEXT:    [[CONV62:%.*]] = sext i32 [[TMP37]] to i64
+// CHECK7-NEXT:    [[CONV61:%.*]] = sext i32 [[TMP37]] to i64
 // CHECK7-NEXT:    [[TMP38:%.*]] = load i64, ptr [[DOTOMP_IV45]], align 8, !llvm.access.group [[ACC_GRP9]]
 // CHECK7-NEXT:    [[TMP39:%.*]] = load i64, ptr [[DOTOMP_IV45]], align 8, !llvm.access.group [[ACC_GRP9]]
 // CHECK7-NEXT:    [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_27]], align 4, !llvm.access.group [[ACC_GRP9]]
 // CHECK7-NEXT:    [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_26]], align 4, !llvm.access.group [[ACC_GRP9]]
-// CHECK7-NEXT:    [[SUB63:%.*]] = sub i32 [[TMP40]], [[TMP41]]
-// CHECK7-NEXT:    [[SUB64:%.*]] = sub i32 [[SUB63]], 1
-// CHECK7-NEXT:    [[ADD65:%.*]] = add i32 [[SUB64]], 1
-// CHECK7-NEXT:    [[DIV66:%.*]] = udiv i32 [[ADD65]], 1
-// CHECK7-NEXT:    [[MUL67:%.*]] = mul i32 1, [[DIV66]]
-// CHECK7-NEXT:    [[CONV68:%.*]] = zext i32 [[MUL67]] to i64
-// CHECK7-NEXT:    [[DIV69:%.*]] = sdiv i64 [[TMP39]], [[CONV68]]
+// CHECK7-NEXT:    [[SUB62:%.*]] = sub i32 [[TMP40]], [[TMP41]]
+// CHECK7-NEXT:    [[SUB63:%.*]] = sub i32 [[SUB62]], 1
+// CHECK7-NEXT:    [[ADD64:%.*]] = add i32 [[SUB63]], 1
+// CHECK7-NEXT:    [[DIV65:%.*]] = udiv i32 [[ADD64]], 1
+// CHECK7-NEXT:    [[MUL66:%.*]] = mul i32 1, [[DIV65]]
+// CHECK7-NEXT:    [[CONV67:%.*]] = zext i32 [[MUL66]] to i64
+// CHECK7-NEXT:    [[DIV68:%.*]] = sdiv i64 [[TMP39]], [[CONV67]]
 // CHECK7-NEXT:    [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_27]], align 4, !llvm.access.group [[ACC_GRP9]]
 // CHECK7-NEXT:    [[TMP43:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_26]], align 4, !llvm.access.group [[ACC_GRP9]]
-// CHECK7-NEXT:    [[SUB70:%.*]] = sub i32 [[TMP42]], [[TMP43]]
-// CHECK7-NEXT:    [[SUB71:%.*]] = sub i32 [[SUB70]], 1
-// CHECK7-NEXT:    [[ADD72:%.*]] = add i32 [[SUB71]], 1
-// CHECK7-NEXT:    [[DIV73:%.*]] = udiv i32 [[ADD72]], 1
-// CHECK7-NEXT:    [[MUL74:%.*]] = mul i32 1, [[DIV73]]
-// CHECK7-NEXT:    [[CONV75:%.*]] = zext i32 [[MUL74]] to i64
-// CHECK7-NEXT:    [[MUL76:%.*]] = mul nsw i64 [[DIV69]], [[CONV75]]
-// CHECK7-NEXT:    [[SUB77:%.*]] = sub nsw i64 [[TMP38]], [[MUL76]]
-// CHECK7-NEXT:    [[MUL78:%.*]] = mul nsw i64 [[SUB77]], 1
-// CHECK7-NEXT:    [[ADD79:%.*]] = add nsw i64 [[CONV62]], [[MUL78]]
-// CHECK7-NEXT:    [[CONV80:%.*]] = trunc i64 [[ADD79]] to i32
-// CHECK7-NEXT:    store i32 [[CONV80]], ptr [[J47]], align 4, !llvm.access.group [[ACC_GRP9]]
-// CHECK7-NEXT:    br label [[OMP_BODY_CONTINUE81:%.*]]
-// CHECK7:       omp.body.continue81:
-// CHECK7-NEXT:    br label [[OMP_INNER_FOR_INC82:%.*]]
-// CHECK7:       omp.inner.for.inc82:
+// CHECK7-NEXT:    [[SUB69:%.*]] = sub i32 [[TMP42]], [[TMP43]]
+// CHECK7-NEXT:    [[SUB70:%.*]] = sub i32 [[SUB69]], 1
+// CHECK7-NEXT:    [[ADD71:%.*]] = add i32 [[SUB70]], 1
+// CHECK7-NEXT:    [[DIV72:%.*]] = udiv i32 [[ADD71]], 1
+// CHECK7-NEXT:    [[MUL73:%.*]] = mul i32 1, [[DIV72]]
+// CHECK7-NEXT:    [[CONV74:%.*]] = zext i32 [[MUL73]] to i64
+// CHECK7-NEXT:    [[MUL75:%.*]] = mul nsw i64 [[DIV68]], [[CONV74]]
+// CHECK7-NEXT:    [[SUB76:%.*]] = sub nsw i64 [[TMP38]], [[MUL75]]
+// CHECK7-NEXT:    [[MUL77:%.*]] = mul nsw i64 [[SUB76]], 1
+// CHECK7-NEXT:    [[ADD78:%.*]] = add nsw i64 [[CONV61]], [[MUL77]]
+// CHECK7-NEXT:    [[CONV79:%.*]] = trunc i64 [[ADD78]] to i32
+// CHECK7-NEXT:    store i32 [[CONV79]], ptr [[J47]], align 4, !llvm.access.group [[ACC_GRP9]]
+// CHECK7-NEXT:    br label [[OMP_BODY_CONTINUE80:%.*]]
+// CHECK7:       omp.body.continue80:
+// CHECK7-NEXT:    br label [[OMP_INNER_FOR_INC81:%.*]]
+// CHECK7:       omp.inner.for.inc81:
 // CHECK7-NEXT:    [[TMP44:%.*]] = load i64, ptr [[DOTOMP_IV45]], align 8, !llvm.access.group [[ACC_GRP9]]
-// CHECK7-NEXT:    [[ADD83:%.*]] = add nsw i64 [[TMP44]], 1
-// CHECK7-NEXT:    store i64 [[ADD83]], ptr [[DOTOMP_IV45]], align 8, !llvm.access.group [[ACC_GRP9]]
-// CHECK7-NEXT:    br label [[OMP_INNER_FOR_COND49]], !llvm.loop [[LOOP10:![0-9]+]]
-// CHECK7:       omp.inner.for.end84:
+// CHECK7-NEXT:    [[ADD82:%.*]] = add nsw i64 [[TMP44]], 1
+// CHECK7-NEXT:    store i64 [[ADD82]], ptr [[DOTOMP_IV45]], align 8, !llvm.access.group [[ACC_GRP9]]
+// CHECK7-NEXT:    br label [[OMP_INNER_FOR_COND48]], !llvm.loop [[LOOP10:![0-9]+]]
+// CHECK7:       omp.inner.for.end83:
 // CHECK7-NEXT:    br label [[OMP_IF_END:%.*]]
 // CHECK7:       omp_if.else:
-// CHECK7-NEXT:    br label [[OMP_INNER_FOR_COND85:%.*]]
-// CHECK7:       omp.inner.for.cond85:
+// CHECK7-NEXT:    br label [[OMP_INNER_FOR_COND84:%.*]]
+// CHECK7:       omp.inner.for.cond84:
 // CHECK7-NEXT:    [[TMP45:%.*]] = load i64, ptr [[DOTOMP_IV45]], align 8
 // CHECK7-NEXT:    [[TMP46:%.*]] = load i64, ptr [[DOTOMP_UB41]], align 8
-// CHECK7-NEXT:    [[CMP86:%.*]] = icmp ule i64 [[TMP45]], [[TMP46]]
-// CHECK7-NEXT:    br i1 [[CMP86]], label [[OMP_INNER_FOR_BODY87:%.*]], label [[OMP_INNER_FOR_END120:%.*]]
-// CHECK7:       omp.inner.for.body87:
+// CHECK7-NEXT:    [[CMP85:%.*]] = icmp ule i64 [[TMP45]], [[TMP46]]
+// CHECK7-NEXT:    br i1 [[CMP85]], label [[OMP_INNER_FOR_BODY86:%.*]], label [[OMP_INNER_FOR_END119:%.*]]
+// CHECK7:       omp.inner.for.body86:
 // CHECK7-NEXT:    [[TMP47:%.*]] = load i64, ptr [[DOTOMP_IV45]], align 8
 // CHECK7-NEXT:    [[TMP48:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_27]], align 4
 // CHECK7-NEXT:    [[TMP49:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_26]], align 4
-// CHECK7-NEXT:    [[SUB88:%.*]] = sub i32 [[TMP48]], [[TMP49]]
-// CHECK7-NEXT:    [[SUB89:%.*]] = sub i32 [[SUB88]], 1
-// CHECK7-NEXT:    [[ADD90:%.*]] = add i32 [[SUB89]], 1
-// CHECK7-NEXT:    [[DIV91:%.*]] = udiv i32 [[ADD90]], 1
-// CHECK7-NEXT:    [[MUL92:%.*]] = mul i32 1, [[DIV91]]
-// CHECK7-NEXT:    [[CONV93:%.*]] = zext i32 [[MUL92]] to i64
-// CHECK7-NEXT:    [[DIV94:%.*]] = sdiv i64 [[TMP47]], [[CONV93]]
-// CHECK7-NEXT:    [[MUL95:%.*]] = mul nsw i64 [[DIV94]], 1
-// CHECK7-NEXT:    [[ADD96:%.*]] = add nsw i64 0, [[MUL95]]
-// CHECK7-NEXT:    [[CONV97:%.*]] = trunc i64 [[ADD96]] to i32
-// CHECK7-NEXT:    store i32 [[CONV97]], ptr [[I46]], align 4
+// CHECK7-NEXT:    [[SUB87:%.*]] = sub i32 [[TMP48]], [[TMP49]]
+// CHECK7-NEXT:    [[SUB88:%.*]] = sub i32 [[SUB87]], 1
+// CHECK7-NEXT:    [[ADD89:%.*]] = add i32 [[SUB88]], 1
+// CHECK7-NEXT:    [[DIV90:%.*]] = udiv i32 [[ADD89]], 1
+// CHECK7-NEXT:    [[MUL91:%.*]] = mul i32 1, [[DIV90]]
+// CHECK7-NEXT:    [[CONV92:%.*]] = zext i32 [[MUL91]] to i64
+// CHECK7-NEXT:    [[DIV93:%.*]] = sdiv i64 [[TMP47]], [[CONV92]]
+// CHECK7-NEXT:    [[MUL94:%.*]] = mul nsw i64 [[DIV93]], 1
+// CHECK7-NEXT:    [[ADD95:%.*]] = add nsw i64 0, [[MUL94]]
+// CHECK7-NEXT:    [[CONV96:%.*]] = trunc i64 [[ADD95]] to i32
+// CHECK7-NEXT:    store i32 [[CONV96]], ptr [[I46]], align 4
 // CHECK7-NEXT:    [[TMP50:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_26]], align 4
-// CHECK7-NEXT:    [[CONV98:%.*]] = sext i32 [[TMP50]] to i64
+// CHECK7-NEXT:    [[CONV97:%.*]] = sext i32 [[TMP50]] to i64
 // CHECK7-NEXT:    [[TMP51:%.*]] = load i64, ptr [[DOTOMP_IV45]], align 8
 // CHECK7-NEXT:    [[TMP52:%.*]] = load i64, ptr [[DOTOMP_IV45]], align 8
 // CHECK7-NEXT:    [[TMP53:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_27]], align 4
 // CHECK7-NEXT:    [[TMP54:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_26]], align 4
-// CHECK7-NEXT:    [[SUB99:%.*]] = sub i32 [[TMP53]], [[TMP54]]
-// CHECK7-NEXT:    [[SUB100:%.*]] = sub i32 [[SUB99]], 1
-// CHECK7-NEXT:    [[ADD101:%.*]] = add i32 [[SUB100]], 1
-// CHECK7-NEXT:    [[DIV102:%.*]] = udiv i32 [[ADD101]], 1
-// CHECK7-NEXT:    [[MUL103:%.*]] = mul i32 1, [[DIV102]]
-// CHECK7-NEXT:    [[CONV104:%.*]] = zext i32 [[MUL103]] to i64
-// CHECK7-NEXT:    [[DIV105:%.*]] = sdiv i64 [[TMP52]], [[CONV104]]
+// CHECK7-NEXT:    [[SUB98:%.*]] = sub i32 [[TMP53]], [[TMP54]]
+// CHECK7-NEXT:    [[SUB99:%.*]] = sub i32 [[SUB98]], 1
+// CHECK7-NEXT:    [[ADD100:%.*]] = add i32 [[SUB99]], 1
+// CHECK7-NEXT:    [[DIV101:%.*]] = udiv i32 [[ADD100]], 1
+// CHECK7-NEXT:    [[MUL102:%.*]] = mul i32 1, [[DIV101]]
+// CHECK7-NEXT:    [[CONV103:%.*]] = zext i32 [[MUL102]] to i64
+// CHECK7-NEXT:    [[DIV104:%.*]] = sdiv i64 [[TMP52]], [[CONV103]]
 // CHECK7-NEXT:    [[TMP55:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_27]], align 4
 // CHECK7-NEXT:    [[TMP56:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_26]], align 4
-// CHECK7-NEXT:    [[SUB106:%.*]] = sub i32 [[TMP55]], [[TMP56]]
-// CHECK7-NEXT:    [[SUB107:%.*]] = sub i32 [[SUB106]], 1
-// CHECK7-NEXT:    [[ADD108:%.*]] = add i32 [[SUB107]], 1
-// CHECK7-NEXT:    [[DIV109:%.*]] = udiv i32 [[ADD108]], 1
-// CHECK7-NEXT:    [[MUL110:%.*]] = mul i32 1, [[DIV109]]
-// CHECK7-NEXT:    [[CONV111:%.*]] = zext i32 [[MUL110]] to i64
-// CHECK7-NEXT:    [[MUL112:%.*]] = mul nsw i64 [[DIV105]], [[CONV111]]
-// CHECK7-NEXT:    [[SUB113:%.*]] = sub nsw i64 [[TMP51]], [[MUL112]]
-// CHECK7-NEXT:    [[MUL114:%.*]] = mul nsw i64 [[SUB113]], 1
-// CHECK7-NEXT:    [[ADD115:%.*]] = add nsw i64 [[CONV98]], [[MUL114]]
-// CHECK7-NEXT:    [[CONV116:%.*]] = trunc i64 [[ADD115]] to i32
-// CHECK7-NEXT:    store i32 [[CONV116]], ptr [[J47]], align 4
-// CHECK7-NEXT:    br label [[OMP_BODY_CONTINUE117:%.*]]
-// CHECK7:       omp.body.continue117:
-// CHECK7-NEXT:    br label [[OMP_INNER_FOR_INC118:%.*]]
-// CHECK7:       omp.inner.for.inc118:
+// CHECK7-NEXT:    [[SUB105:%.*]] = sub i32 [[TMP55]], [[TMP56]]
+// CHECK7-NEXT:    [[SUB106:%.*]] = sub i32 [[SUB105]], 1
+// CHECK7-NEXT:    [[ADD107:%.*]] = add i32 [[SUB106]], 1
+// CHECK7-NEXT:    [[DIV108:%.*]] = udiv i32 [[ADD107]], 1
+// CHECK7-NEXT:    [[MUL109:%.*]] = mul i32 1, [[DIV108]]
+// CHECK7-NEXT:    [[CONV110:%.*]] = zext i32 [[MUL109]] to i64
+// CHECK7-NEXT:    [[MUL111:%.*]] = mul nsw i64 [[DIV104]], [[CONV110]]
+// CHECK7-NEXT:    [[SUB112:%.*]] = sub nsw i64 [[TMP51]], [[MUL111]]
+// CHECK7-NEXT:    [[MUL113:%.*]] = mul nsw i64 [[SUB112]], 1
+// CHECK7-NEXT:    [[ADD114:%.*]] = add nsw i64 [[CONV97]], [[MUL113]]
+// CHECK7-NEXT:    [[CONV115:%.*]] = trunc i64 [[ADD114]] to i32
+// CHECK7-NEXT:    store i32 [[CONV115]], ptr [[J47]], align 4
+// CHECK7-NEXT:    br label [[OMP_BODY_CONTINUE116:%.*]]
+// CHECK7:       omp.body.continue116:
+// CHECK7-NEXT:    br label [[OMP_INNER_FOR_INC117:%.*]]
+// CHECK7:       omp.inner.for.inc117:
 // CHECK7-NEXT:    [[TMP57:%.*]] = load i64, ptr [[DOTOMP_IV45]], align 8
-// CHECK7-NEXT:    [[ADD119:%.*]] = add nsw i64 [[TMP57]], 1
-// CHECK7-NEXT:    store i64 [[ADD119]], ptr [[DOTOMP_IV45]], align 8
-// CHECK7-NEXT:    br label [[OMP_INNER_FOR_COND85]], !llvm.loop [[LOOP12:![0-9]+]]
-// CHECK7:       omp.inner.for.end120:
+// CHECK7-NEXT:    [[ADD118:%.*]] = add nsw i64 [[TMP57]], 1
+// CHECK7-NEXT:    store i64 [[ADD118]], ptr [[DOTOMP_IV45]], align 8
+// CHECK7-NEXT:    br label [[OMP_INNER_FOR_COND84]], !llvm.loop [[LOOP12:![0-9]+]]
+// CHECK7:       omp.inner.for.end119:
 // CHECK7-NEXT:    br label [[OMP_IF_END]]
 // CHECK7:       omp_if.end:
 // CHECK7-NEXT:    [[TMP58:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_25]], align 4
-// CHECK7-NEXT:    [[SUB121:%.*]] = sub nsw i32 [[TMP58]], 0
-// CHECK7-NEXT:    [[DIV122:%.*]] = sdiv i32 [[SUB121]], 1
-// CHECK7-NEXT:    [[MUL123:%.*]] = mul nsw i32 [[DIV122]], 1
-// CHECK7-NEXT:    [[ADD124:%.*]] = add nsw i32 0, [[MUL123]]
-// CHECK7-NEXT:    store i32 [[ADD124]], ptr [[I20]], align 4
+// CHECK7-NEXT:    [[SUB120:%.*]] = sub nsw i32 [[TMP58]], 0
+// CHECK7-NEXT:    [[DIV121:%.*]] = sdiv i32 [[SUB120]], 1
+// CHECK7-NEXT:    [[MUL122:%.*]] = mul nsw i32 [[DIV121]], 1
+// CHECK7-NEXT:    [[ADD123:%.*]] = add nsw i32 0, [[MUL122]]
+// CHECK7-NEXT:    store i32 [[ADD123]], ptr [[I20]], align 4
 // CHECK7-NEXT:    [[TMP59:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_26]], align 4
 // CHECK7-NEXT:    [[TMP60:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_27]], align 4
 // CHECK7-NEXT:    [[TMP61:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_26]], align 4
-// CHECK7-NEXT:    [[SUB125:%.*]] = sub i32 [[TMP60]], [[TMP61]]
-// CHECK7-NEXT:    [[SUB126:%.*]] = sub i32 [[SUB125]], 1
-// CHECK7-NEXT:    [[ADD127:%.*]] = add i32 [[SUB126]], 1
-// CHECK7-NEXT:    [[DIV128:%.*]] = udiv i32 [[ADD127]], 1
-// CHECK7-NEXT:    [[MUL129:%.*]] = mul i32 [[DIV128]], 1
-// CHECK7-NEXT:    [[ADD130:%.*]] = add i32 [[TMP59]], [[MUL129]]
-// CHECK7-NEXT:    store i32 [[ADD130]], ptr [[J47]], align 4
+// CHECK7-NEXT:    [[SUB124:%.*]] = sub i32 [[TMP60]], [[TMP61]]
+// CHECK7-NEXT:    [[SUB125:%.*]] = sub i32 [[SUB124]], 1
+// CHECK7-NEXT:    [[ADD126:%.*]] = add i32 [[SUB125]], 1
+// CHECK7-NEXT:    [[DIV127:%.*]] = udiv i32 [[ADD126]], 1
+// CHECK7-NEXT:    [[MUL128:%.*]] = mul i32 [[DIV127]], 1
+// CHECK7-NEXT:    [[ADD129:%.*]] = add i32 [[TMP59]], [[MUL128]]
+// CHECK7-NEXT:    store i32 [[ADD129]], ptr [[J47]], align 4
 // CHECK7-NEXT:    br label [[SIMD_IF_END]]
 // CHECK7:       simd.if.end:
 // CHECK7-NEXT:    [[TMP62:%.*]] = load i32, ptr [[RETVAL]], align 4
@@ -3558,8 +3558,8 @@ struct S {
 // CHECK7-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
 // CHECK7-NEXT:    [[TMP0:%.*]] = load i32, ptr [[C_ADDR]], align 4
 // CHECK7-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0
-// CHECK7-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
-// CHECK7-NEXT:    store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1
+// CHECK7-NEXT:    [[STOREDV:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK7-NEXT:    store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR_]], align 1
 // CHECK7-NEXT:    store ptr [[TMP]], ptr [[_TMP2]], align 8
 // CHECK7-NEXT:    [[TMP1:%.*]] = load i32, ptr [[C_ADDR]], align 4
 // CHECK7-NEXT:    store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_3]], align 4
@@ -3723,8 +3723,8 @@ struct S {
 // CHECK8-NEXT:    store i32 10, ptr [[I9]], align 4
 // CHECK8-NEXT:    [[TMP12:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
 // CHECK8-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP12]], 0
-// CHECK8-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
-// CHECK8-NEXT:    store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_21]], align 1
+// CHECK8-NEXT:    [[STOREDV:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK8-NEXT:    store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR_21]], align 1
 // CHECK8-NEXT:    [[TMP13:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
 // CHECK8-NEXT:    store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_22]], align 4
 // CHECK8-NEXT:    [[TMP14:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
@@ -3776,143 +3776,143 @@ struct S {
 // CHECK8-NEXT:    [[TMP30:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8
 // CHECK8-NEXT:    call void @llvm.assume(i1 true) [ "align"(ptr [[TMP30]], i64 8) ]
 // CHECK8-NEXT:    [[TMP31:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_21]], align 1
-// CHECK8-NEXT:    [[TOBOOL48:%.*]] = trunc i8 [[TMP31]] to i1
-// CHECK8-NEXT:    br i1 [[TOBOOL48]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
+// CHECK8-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP31]] to i1
+// CHECK8-NEXT:    br i1 [[LOADEDV]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
 // CHECK8:       omp_if.then:
-// CHECK8-NEXT:    br label [[OMP_INNER_FOR_COND49:%.*]]
-// CHECK8:       omp.inner.for.cond49:
+// CHECK8-NEXT:    br label [[OMP_INNER_FOR_COND48:%.*]]
+// CHECK8:       omp.inner.for.cond48:
 // CHECK8-NEXT:    [[TMP32:%.*]] = load i64, ptr [[DOTOMP_IV45]], align 8, !llvm.access.group [[ACC_GRP9:![0-9]+]]
 // CHECK8-NEXT:    [[TMP33:%.*]] = load i64, ptr [[DOTOMP_UB41]], align 8, !llvm.access.group [[ACC_GRP9]]
-// CHECK8-NEXT:    [[CMP50:%.*]] = icmp ule i64 [[TMP32]], [[TMP33]]
-// CHECK8-NEXT:    br i1 [[CMP50]], label [[OMP_INNER_FOR_BODY51:%.*]], label [[OMP_INNER_FOR_END84:%.*]]
-// CHECK8:       omp.inner.for.body51:
+// CHECK8-NEXT:    [[CMP49:%.*]] = icmp ule i64 [[TMP32]], [[TMP33]]
+// CHECK8-NEXT:    br i1 [[CMP49]], label [[OMP_INNER_FOR_BODY50:%.*]], label [[OMP_INNER_FOR_END83:%.*]]
+// CHECK8:       omp.inner.for.body50:
 // CHECK8-NEXT:    [[TMP34:%.*]] = load i64, ptr [[DOTOMP_IV45]], align 8, !llvm.access.group [[ACC_GRP9]]
 // CHECK8-NEXT:    [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_27]], align 4, !llvm.access.group [[ACC_GRP9]]
 // CHECK8-NEXT:    [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_26]], align 4, !llvm.access.group [[ACC_GRP9]]
-// CHECK8-NEXT:    [[SUB52:%.*]] = sub i32 [[TMP35]], [[TMP36]]
-// CHECK8-NEXT:    [[SUB53:%.*]] = sub i32 [[SUB52]], 1
-// CHECK8-NEXT:    [[ADD54:%.*]] = add i32 [[SUB53]], 1
-// CHECK8-NEXT:    [[DIV55:%.*]] = udiv i32 [[ADD54]], 1
-// CHECK8-NEXT:    [[MUL56:%.*]] = mul i32 1, [[DIV55]]
-// CHECK8-NEXT:    [[CONV57:%.*]] = zext i32 [[MUL56]] to i64
-// CHECK8-NEXT:    [[DIV58:%.*]] = sdiv i64 [[TMP34]], [[CONV57]]
-// CHECK8-NEXT:    [[MUL59:%.*]] = mul nsw i64 [[DIV58]], 1
-// CHECK8-NEXT:    [[ADD60:%.*]] = add nsw i64 0, [[MUL59]]
-// CHECK8-NEXT:    [[CONV61:%.*]] = trunc i64 [[ADD60]] to i32
-// CHECK8-NEXT:    store i32 [[CONV61]], ptr [[I46]], align 4, !llvm.access.group [[ACC_GRP9]]
+// CHECK8-NEXT:    [[SUB51:%.*]] = sub i32 [[TMP35]], [[TMP36]]
+// CHECK8-NEXT:    [[SUB52:%.*]] = sub i32 [[SUB51]], 1
+// CHECK8-NEXT:    [[ADD53:%.*]] = add i32 [[SUB52]], 1
+// CHECK8-NEXT:    [[DIV54:%.*]] = udiv i32 [[ADD53]], 1
+// CHECK8-NEXT:    [[MUL55:%.*]] = mul i32 1, [[DIV54]]
+// CHECK8-NEXT:    [[CONV56:%.*]] = zext i32 [[MUL55]] to i64
+// CHECK8-NEXT:    [[DIV57:%.*]] = sdiv i64 [[TMP34]], [[CONV56]]
+// CHECK8-NEXT:    [[MUL58:%.*]] = mul nsw i64 [[DIV57]], 1
+// CHECK8-NEXT:    [[ADD59:%.*]] = add nsw i64 0, [[MUL58]]
+// CHECK8-NEXT:    [[CONV60:%.*]] = trunc i64 [[ADD59]] to i32
+// CHECK8-NEXT:    store i32 [[CONV60]], ptr [[I46]], align 4, !llvm.access.group [[ACC_GRP9]]
 // CHECK8-NEXT:    [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_26]], align 4, !llvm.access.group [[ACC_GRP9]]
-// CHECK8-NEXT:    [[CONV62:%.*]] = sext i32 [[TMP37]] to i64
+// CHECK8-NEXT:    [[CONV61:%.*]] = sext i32 [[TMP37]] to i64
 // CHECK8-NEXT:    [[TMP38:%.*]] = load i64, ptr [[DOTOMP_IV45]], align 8, !llvm.access.group [[ACC_GRP9]]
 // CHECK8-NEXT:    [[TMP39:%.*]] = load i64, ptr [[DOTOMP_IV45]], align 8, !llvm.access.group [[ACC_GRP9]]
 // CHECK8-NEXT:    [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_27]], align 4, !llvm.access.group [[ACC_GRP9]]
 // CHECK8-NEXT:    [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_26]], align 4, !llvm.access.group [[ACC_GRP9]]
-// CHECK8-NEXT:    [[SUB63:%.*]] = sub i32 [[TMP40]], [[TMP41]]
-// CHECK8-NEXT:    [[SUB64:%.*]] = sub i32 [[SUB63]], 1
-// CHECK8-NEXT:    [[ADD65:%.*]] = add i32 [[SUB64]], 1
-// CHECK8-NEXT:    [[DIV66:%.*]] = udiv i32 [[ADD65]], 1
-// CHECK8-NEXT:    [[MUL67:%.*]] = mul i32 1, [[DIV66]]
-// CHECK8-NEXT:    [[CONV68:%.*]] = zext i32 [[MUL67]] to i64
-// CHECK8-NEXT:    [[DIV69:%.*]] = sdiv i64 [[TMP39]], [[CONV68]]
+// CHECK8-NEXT:    [[SUB62:%.*]] = sub i32 [[TMP40]], [[TMP41]]
+// CHECK8-NEXT:    [[SUB63:%.*]] = sub i32 [[SUB62]], 1
+// CHECK8-NEXT:    [[ADD64:%.*]] = add i32 [[SUB63]], 1
+// CHECK8-NEXT:    [[DIV65:%.*]] = udiv i32 [[ADD64]], 1
+// CHECK8-NEXT:    [[MUL66:%.*]] = mul i32 1, [[DIV65]]
+// CHECK8-NEXT:    [[CONV67:%.*]] = zext i32 [[MUL66]] to i64
+// CHECK8-NEXT:    [[DIV68:%.*]] = sdiv i64 [[TMP39]], [[CONV67]]
 // CHECK8-NEXT:    [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_27]], align 4, !llvm.access.group [[ACC_GRP9]]
 // CHECK8-NEXT:    [[TMP43:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_26]], align 4, !llvm.access.group [[ACC_GRP9]]
-// CHECK8-NEXT:    [[SUB70:%.*]] = sub i32 [[TMP42]], [[TMP43]]
-// CHECK8-NEXT:    [[SUB71:%.*]] = sub i32 [[SUB70]], 1
-// CHECK8-NEXT:    [[ADD72:%.*]] = add i32 [[SUB71]], 1
-// CHECK8-NEXT:    [[DIV73:%.*]] = udiv i32 [[ADD72]], 1
-// CHECK8-NEXT:    [[MUL74:%.*]] = mul i32 1, [[DIV73]]
-// CHECK8-NEXT:    [[CONV75:%.*]] = zext i32 [[MUL74]] to i64
-// CHECK8-NEXT:    [[MUL76:%.*]] = mul nsw i64 [[DIV69]], [[CONV75]]
-// CHECK8-NEXT:    [[SUB77:%.*]] = sub nsw i64 [[TMP38]], [[MUL76]]
-// CHECK8-NEXT:    [[MUL78:%.*]] = mul nsw i64 [[SUB77]], 1
-// CHECK8-NEXT:    [[ADD79:%.*]] = add nsw i64 [[CONV62]], [[MUL78]]
-// CHECK8-NEXT:    [[CONV80:%.*]] = trunc i64 [[ADD79]] to i32
-// CHECK8-NEXT:    store i32 [[CONV80]], ptr [[J47]], align 4, !llvm.access.group [[ACC_GRP9]]
-// CHECK8-NEXT:    br label [[OMP_BODY_CONTINUE81:%.*]]
-// CHECK8:       omp.body.continue81:
-// CHECK8-NEXT:    br label [[OMP_INNER_FOR_INC82:%.*]]
-// CHECK8:       omp.inner.for.inc82:
+// CHECK8-NEXT:    [[SUB69:%.*]] = sub i32 [[TMP42]], [[TMP43]]
+// CHECK8-NEXT:    [[SUB70:%.*]] = sub i32 [[SUB69]], 1
+// CHECK8-NEXT:    [[ADD71:%.*]] = add i32 [[SUB70]], 1
+// CHECK8-NEXT:    [[DIV72:%.*]] = udiv i32 [[ADD71]], 1
+// CHECK8-NEXT:    [[MUL73:%.*]] = mul i32 1, [[DIV72]]
+// CHECK8-NEXT:    [[CONV74:%.*]] = zext i32 [[MUL73]] to i64
+// CHECK8-NEXT:    [[MUL75:%.*]] = mul nsw i64 [[DIV68]], [[CONV74]]
+// CHECK8-NEXT:    [[SUB76:%.*]] = sub nsw i64 [[TMP38]], [[MUL75]]
+// CHECK8-NEXT:    [[MUL77:%.*]] = mul nsw i64 [[SUB76]], 1
+// CHECK8-NEXT:    [[ADD78:%.*]] = add nsw i64 [[CONV61]], [[MUL77]]
+// CHECK8-NEXT:    [[CONV79:%.*]] = trunc i64 [[ADD78]] to i32
+// CHECK8-NEXT:    store i32 [[CONV79]], ptr [[J47]], align 4, !llvm.access.group [[ACC_GRP9]]
+// CHECK8-NEXT:    br label [[OMP_BODY_CONTINUE80:%.*]]
+// CHECK8:       omp.body.continue80:
+// CHECK8-NEXT:    br label [[OMP_INNER_FOR_INC81:%.*]]
+// CHECK8:       omp.inner.for.inc81:
 // CHECK8-NEXT:    [[TMP44:%.*]] = load i64, ptr [[DOTOMP_IV45]], align 8, !llvm.access.group [[ACC_GRP9]]
-// CHECK8-NEXT:    [[ADD83:%.*]] = add nsw i64 [[TMP44]], 1
-// CHECK8-NEXT:    store i64 [[ADD83]], ptr [[DOTOMP_IV45]], align 8, !llvm.access.group [[ACC_GRP9]]
-// CHECK8-NEXT:    br label [[OMP_INNER_FOR_COND49]], !llvm.loop [[LOOP10:![0-9]+]]
-// CHECK8:       omp.inner.for.end84:
+// CHECK8-NEXT:    [[ADD82:%.*]] = add nsw i64 [[TMP44]], 1
+// CHECK8-NEXT:    store i64 [[ADD82]], ptr [[DOTOMP_IV45]], align 8, !llvm.access.group [[ACC_GRP9]]
+// CHECK8-NEXT:    br label [[OMP_INNER_FOR_COND48]], !llvm.loop [[LOOP10:![0-9]+]]
+// CHECK8:       omp.inner.for.end83:
 // CHECK8-NEXT:    br label [[OMP_IF_END:%.*]]
 // CHECK8:       omp_if.else:
-// CHECK8-NEXT:    br label [[OMP_INNER_FOR_COND85:%.*]]
-// CHECK8:       omp.inner.for.cond85:
+// CHECK8-NEXT:    br label [[OMP_INNER_FOR_COND84:%.*]]
+// CHECK8:       omp.inner.for.cond84:
 // CHECK8-NEXT:    [[TMP45:%.*]] = load i64, ptr [[DOTOMP_IV45]], align 8
 // CHECK8-NEXT:    [[TMP46:%.*]] = load i64, ptr [[DOTOMP_UB41]], align 8
-// CHECK8-NEXT:    [[CMP86:%.*]] = icmp ule i64 [[TMP45]], [[TMP46]]
-// CHECK8-NEXT:    br i1 [[CMP86]], label [[OMP_INNER_FOR_BODY87:%.*]], label [[OMP_INNER_FOR_END120:%.*]]
-// CHECK8:       omp.inner.for.body87:
+// CHECK8-NEXT:    [[CMP85:%.*]] = icmp ule i64 [[TMP45]], [[TMP46]]
+// CHECK8-NEXT:    br i1 [[CMP85]], label [[OMP_INNER_FOR_BODY86:%.*]], label [[OMP_INNER_FOR_END119:%.*]]
+// CHECK8:       omp.inner.for.body86:
 // CHECK8-NEXT:    [[TMP47:%.*]] = load i64, ptr [[DOTOMP_IV45]], align 8
 // CHECK8-NEXT:    [[TMP48:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_27]], align 4
 // CHECK8-NEXT:    [[TMP49:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_26]], align 4
-// CHECK8-NEXT:    [[SUB88:%.*]] = sub i32 [[TMP48]], [[TMP49]]
-// CHECK8-NEXT:    [[SUB89:%.*]] = sub i32 [[SUB88]], 1
-// CHECK8-NEXT:    [[ADD90:%.*]] = add i32 [[SUB89]], 1
-// CHECK8-NEXT:    [[DIV91:%.*]] = udiv i32 [[ADD90]], 1
-// CHECK8-NEXT:    [[MUL92:%.*]] = mul i32 1, [[DIV91]]
-// CHECK8-NEXT:    [[CONV93:%.*]] = zext i32 [[MUL92]] to i64
-// CHECK8-NEXT:    [[DIV94:%.*]] = sdiv i64 [[TMP47]], [[CONV93]]
-// CHECK8-NEXT:    [[MUL95:%.*]] = mul nsw i64 [[DIV94]], 1
-// CHECK8-NEXT:    [[ADD96:%.*]] = add nsw i64 0, [[MUL95]]
-// CHECK8-NEXT:    [[CONV97:%.*]] = trunc i64 [[ADD96]] to i32
-// CHECK8-NEXT:    store i32 [[CONV97]], ptr [[I46]], align 4
+// CHECK8-NEXT:    [[SUB87:%.*]] = sub i32 [[TMP48]], [[TMP49]]
+// CHECK8-NEXT:    [[SUB88:%.*]] = sub i32 [[SUB87]], 1
+// CHECK8-NEXT:    [[ADD89:%.*]] = add i32 [[SUB88]], 1
+// CHECK8-NEXT:    [[DIV90:%.*]] = udiv i32 [[ADD89]], 1
+// CHECK8-NEXT:    [[MUL91:%.*]] = mul i32 1, [[DIV90]]
+// CHECK8-NEXT:    [[CONV92:%.*]] = zext i32 [[MUL91]] to i64
+// CHECK8-NEXT:    [[DIV93:%.*]] = sdiv i64 [[TMP47]], [[CONV92]]
+// CHECK8-NEXT:    [[MUL94:%.*]] = mul nsw i64 [[DIV93]], 1
+// CHECK8-NEXT:    [[ADD95:%.*]] = add nsw i64 0, [[MUL94]]
+// CHECK8-NEXT:    [[CONV96:%.*]] = trunc i64 [[ADD95]] to i32
+// CHECK8-NEXT:    store i32 [[CONV96]], ptr [[I46]], align 4
 // CHECK8-NEXT:    [[TMP50:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_26]], align 4
-// CHECK8-NEXT:    [[CONV98:%.*]] = sext i32 [[TMP50]] to i64
+// CHECK8-NEXT:    [[CONV97:%.*]] = sext i32 [[TMP50]] to i64
 // CHECK8-NEXT:    [[TMP51:%.*]] = load i64, ptr [[DOTOMP_IV45]], align 8
 // CHECK8-NEXT:    [[TMP52:%.*]] = load i64, ptr [[DOTOMP_IV45]], align 8
 // CHECK8-NEXT:    [[TMP53:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_27]], align 4
 // CHECK8-NEXT:    [[TMP54:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_26]], align 4
-// CHECK8-NEXT:    [[SUB99:%.*]] = sub i32 [[TMP53]], [[TMP54]]
-// CHECK8-NEXT:    [[SUB100:%.*]] = sub i32 [[SUB99]], 1
-// CHECK8-NEXT:    [[ADD101:%.*]] = add i32 [[SUB100]], 1
-// CHECK8-NEXT:    [[DIV102:%.*]] = udiv i32 [[ADD101]], 1
-// CHECK8-NEXT:    [[MUL103:%.*]] = mul i32 1, [[DIV102]]
-// CHECK8-NEXT:    [[CONV104:%.*]] = zext i32 [[MUL103]] to i64
-// CHECK8-NEXT:    [[DIV105:%.*]] = sdiv i64 [[TMP52]], [[CONV104]]
+// CHECK8-NEXT:    [[SUB98:%.*]] = sub i32 [[TMP53]], [[TMP54]]
+// CHECK8-NEXT:    [[SUB99:%.*]] = sub i32 [[SUB98]], 1
+// CHECK8-NEXT:    [[ADD100:%.*]] = add i32 [[SUB99]], 1
+// CHECK8-NEXT:    [[DIV101:%.*]] = udiv i32 [[ADD100]], 1
+// CHECK8-NEXT:    [[MUL102:%.*]] = mul i32 1, [[DIV101]]
+// CHECK8-NEXT:    [[CONV103:%.*]] = zext i32 [[MUL102]] to i64
+// CHECK8-NEXT:    [[DIV104:%.*]] = sdiv i64 [[TMP52]], [[CONV103]]
 // CHECK8-NEXT:    [[TMP55:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_27]], align 4
 // CHECK8-NEXT:    [[TMP56:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_26]], align 4
-// CHECK8-NEXT:    [[SUB106:%.*]] = sub i32 [[TMP55]], [[TMP56]]
-// CHECK8-NEXT:    [[SUB107:%.*]] = sub i32 [[SUB106]], 1
-// CHECK8-NEXT:    [[ADD108:%.*]] = add i32 [[SUB107]], 1
-// CHECK8-NEXT:    [[DIV109:%.*]] = udiv i32 [[ADD108]], 1
-// CHECK8-NEXT:    [[MUL110:%.*]] = mul i32 1, [[DIV109]]
-// CHECK8-NEXT:    [[CONV111:%.*]] = zext i32 [[MUL110]] to i64
-// CHECK8-NEXT:    [[MUL112:%.*]] = mul nsw i64 [[DIV105]], [[CONV111]]
-// CHECK8-NEXT:    [[SUB113:%.*]] = sub nsw i64 [[TMP51]], [[MUL112]]
-// CHECK8-NEXT:    [[MUL114:%.*]] = mul nsw i64 [[SUB113]], 1
-// CHECK8-NEXT:    [[ADD115:%.*]] = add nsw i64 [[CONV98]], [[MUL114]]
-// CHECK8-NEXT:    [[CONV116:%.*]] = trunc i64 [[ADD115]] to i32
-// CHECK8-NEXT:    store i32 [[CONV116]], ptr [[J47]], align 4
-// CHECK8-NEXT:    br label [[OMP_BODY_CONTINUE117:%.*]]
-// CHECK8:       omp.body.continue117:
-// CHECK8-NEXT:    br label [[OMP_INNER_FOR_INC118:%.*]]
-// CHECK8:       omp.inner.for.inc118:
+// CHECK8-NEXT:    [[SUB105:%.*]] = sub i32 [[TMP55]], [[TMP56]]
+// CHECK8-NEXT:    [[SUB106:%.*]] = sub i32 [[SUB105]], 1
+// CHECK8-NEXT:    [[ADD107:%.*]] = add i32 [[SUB106]], 1
+// CHECK8-NEXT:    [[DIV108:%.*]] = udiv i32 [[ADD107]], 1
+// CHECK8-NEXT:    [[MUL109:%.*]] = mul i32 1, [[DIV108]]
+// CHECK8-NEXT:    [[CONV110:%.*]] = zext i32 [[MUL109]] to i64
+// CHECK8-NEXT:    [[MUL111:%.*]] = mul nsw i64 [[DIV104]], [[CONV110]]
+// CHECK8-NEXT:    [[SUB112:%.*]] = sub nsw i64 [[TMP51]], [[MUL111]]
+// CHECK8-NEXT:    [[MUL113:%.*]] = mul nsw i64 [[SUB112]], 1
+// CHECK8-NEXT:    [[ADD114:%.*]] = add nsw i64 [[CONV97]], [[MUL113]]
+// CHECK8-NEXT:    [[CONV115:%.*]] = trunc i64 [[ADD114]] to i32
+// CHECK8-NEXT:    store i32 [[CONV115]], ptr [[J47]], align 4
+// CHECK8-NEXT:    br label [[OMP_BODY_CONTINUE116:%.*]]
+// CHECK8:       omp.body.continue116:
+// CHECK8-NEXT:    br label [[OMP_INNER_FOR_INC117:%.*]]
+// CHECK8:       omp.inner.for.inc117:
 // CHECK8-NEXT:    [[TMP57:%.*]] = load i64, ptr [[DOTOMP_IV45]], align 8
-// CHECK8-NEXT:    [[ADD119:%.*]] = add nsw i64 [[TMP57]], 1
-// CHECK8-NEXT:    store i64 [[ADD119]], ptr [[DOTOMP_IV45]], align 8
-// CHECK8-NEXT:    br label [[OMP_INNER_FOR_COND85]], !llvm.loop [[LOOP12:![0-9]+]]
-// CHECK8:       omp.inner.for.end120:
+// CHECK8-NEXT:    [[ADD118:%.*]] = add nsw i64 [[TMP57]], 1
+// CHECK8-NEXT:    store i64 [[ADD118]], ptr [[DOTOMP_IV45]], align 8
+// CHECK8-NEXT:    br label [[OMP_INNER_FOR_COND84]], !llvm.loop [[LOOP12:![0-9]+]]
+// CHECK8:       omp.inner.for.end119:
 // CHECK8-NEXT:    br label [[OMP_IF_END]]
 // CHECK8:       omp_if.end:
 // CHECK8-NEXT:    [[TMP58:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_25]], align 4
-// CHECK8-NEXT:    [[SUB121:%.*]] = sub nsw i32 [[TMP58]], 0
-// CHECK8-NEXT:    [[DIV122:%.*]] = sdiv i32 [[SUB121]], 1
-// CHECK8-NEXT:    [[MUL123:%.*]] = mul nsw i32 [[DIV122]], 1
-// CHECK8-NEXT:    [[ADD124:%.*]] = add nsw i32 0, [[MUL123]]
-// CHECK8-NEXT:    store i32 [[ADD124]], ptr [[I20]], align 4
+// CHECK8-NEXT:    [[SUB120:%.*]] = sub nsw i32 [[TMP58]], 0
+// CHECK8-NEXT:    [[DIV121:%.*]] = sdiv i32 [[SUB120]], 1
+// CHECK8-NEXT:    [[MUL122:%.*]] = mul nsw i32 [[DIV121]], 1
+// CHECK8-NEXT:    [[ADD123:%.*]] = add nsw i32 0, [[MUL122]]
+// CHECK8-NEXT:    store i32 [[ADD123]], ptr [[I20]], align 4
 // CHECK8-NEXT:    [[TMP59:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_26]], align 4
 // CHECK8-NEXT:    [[TMP60:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_27]], align 4
 // CHECK8-NEXT:    [[TMP61:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_26]], align 4
-// CHECK8-NEXT:    [[SUB125:%.*]] = sub i32 [[TMP60]], [[TMP61]]
-// CHECK8-NEXT:    [[SUB126:%.*]] = sub i32 [[SUB125]], 1
-// CHECK8-NEXT:    [[ADD127:%.*]] = add i32 [[SUB126]], 1
-// CHECK8-NEXT:    [[DIV128:%.*]] = udiv i32 [[ADD127]], 1
-// CHECK8-NEXT:    [[MUL129:%.*]] = mul i32 [[DIV128]], 1
-// CHECK8-NEXT:    [[ADD130:%.*]] = add i32 [[TMP59]], [[MUL129]]
-// CHECK8-NEXT:    store i32 [[ADD130]], ptr [[J47]], align 4
+// CHECK8-NEXT:    [[SUB124:%.*]] = sub i32 [[TMP60]], [[TMP61]]
+// CHECK8-NEXT:    [[SUB125:%.*]] = sub i32 [[SUB124]], 1
+// CHECK8-NEXT:    [[ADD126:%.*]] = add i32 [[SUB125]], 1
+// CHECK8-NEXT:    [[DIV127:%.*]] = udiv i32 [[ADD126]], 1
+// CHECK8-NEXT:    [[MUL128:%.*]] = mul i32 [[DIV127]], 1
+// CHECK8-NEXT:    [[ADD129:%.*]] = add i32 [[TMP59]], [[MUL128]]
+// CHECK8-NEXT:    store i32 [[ADD129]], ptr [[J47]], align 4
 // CHECK8-NEXT:    br label [[SIMD_IF_END]]
 // CHECK8:       simd.if.end:
 // CHECK8-NEXT:    [[TMP62:%.*]] = load i32, ptr [[RETVAL]], align 4
@@ -3962,8 +3962,8 @@ struct S {
 // CHECK8-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
 // CHECK8-NEXT:    [[TMP0:%.*]] = load i32, ptr [[C_ADDR]], align 4
 // CHECK8-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0
-// CHECK8-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
-// CHECK8-NEXT:    store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1
+// CHECK8-NEXT:    [[STOREDV:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK8-NEXT:    store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR_]], align 1
 // CHECK8-NEXT:    store ptr [[TMP]], ptr [[_TMP2]], align 8
 // CHECK8-NEXT:    [[TMP1:%.*]] = load i32, ptr [[C_ADDR]], align 4
 // CHECK8-NEXT:    store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_3]], align 4
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp
index 9be3ca8fd7587..a9f28981d6393 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp
@@ -472,8 +472,8 @@ int main() {
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
-// CHECK1-NEXT:    [[_TMP5:%.*]] = alloca i32, align 4
-// CHECK1-NEXT:    [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK1-NEXT:    [[_TMP4:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 3, ptr [[TMP0]], align 4
@@ -511,16 +511,16 @@ int main() {
 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83() #[[ATTR2]]
 // CHECK1-NEXT:    [[TMP15:%.*]] = load i32, ptr @Arg, align 4
 // CHECK1-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP15]], 0
-// CHECK1-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
-// CHECK1-NEXT:    store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1
+// CHECK1-NEXT:    [[STOREDV:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK1-NEXT:    store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR_]], align 1
 // CHECK1-NEXT:    [[TMP16:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1
-// CHECK1-NEXT:    [[TOBOOL1:%.*]] = trunc i8 [[TMP16]] to i1
-// CHECK1-NEXT:    [[FROMBOOL2:%.*]] = zext i1 [[TOBOOL1]] to i8
-// CHECK1-NEXT:    store i8 [[FROMBOOL2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
+// CHECK1-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP16]] to i1
+// CHECK1-NEXT:    [[STOREDV1:%.*]] = zext i1 [[LOADEDV]] to i8
+// CHECK1-NEXT:    store i8 [[STOREDV1]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
 // CHECK1-NEXT:    [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8
 // CHECK1-NEXT:    [[TMP18:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1
-// CHECK1-NEXT:    [[TOBOOL3:%.*]] = trunc i8 [[TMP18]] to i1
-// CHECK1-NEXT:    br i1 [[TOBOOL3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
+// CHECK1-NEXT:    [[LOADEDV2:%.*]] = trunc i8 [[TMP18]] to i1
+// CHECK1-NEXT:    br i1 [[LOADEDV2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
 // CHECK1:       omp_if.then:
 // CHECK1-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    store i64 [[TMP17]], ptr [[TMP19]], align 8
@@ -531,42 +531,42 @@ int main() {
 // CHECK1-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP24:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1
-// CHECK1-NEXT:    [[TOBOOL4:%.*]] = trunc i8 [[TMP24]] to i1
-// CHECK1-NEXT:    [[TMP25:%.*]] = select i1 [[TOBOOL4]], i32 0, i32 1
+// CHECK1-NEXT:    [[LOADEDV3:%.*]] = trunc i8 [[TMP24]] to i1
+// CHECK1-NEXT:    [[TMP25:%.*]] = select i1 [[LOADEDV3]], i32 0, i32 1
 // CHECK1-NEXT:    [[TMP26:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP25]], 0
-// CHECK1-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 3, ptr [[TMP27]], align 4
-// CHECK1-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1
+// CHECK1-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1
 // CHECK1-NEXT:    store i32 1, ptr [[TMP28]], align 4
-// CHECK1-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 2
+// CHECK1-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2
 // CHECK1-NEXT:    store ptr [[TMP22]], ptr [[TMP29]], align 8
-// CHECK1-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 3
+// CHECK1-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 3
 // CHECK1-NEXT:    store ptr [[TMP23]], ptr [[TMP30]], align 8
-// CHECK1-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 4
+// CHECK1-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 4
 // CHECK1-NEXT:    store ptr @.offload_sizes, ptr [[TMP31]], align 8
-// CHECK1-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 5
+// CHECK1-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 5
 // CHECK1-NEXT:    store ptr @.offload_maptypes, ptr [[TMP32]], align 8
-// CHECK1-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 6
+// CHECK1-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 6
 // CHECK1-NEXT:    store ptr null, ptr [[TMP33]], align 8
-// CHECK1-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 7
+// CHECK1-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 7
 // CHECK1-NEXT:    store ptr null, ptr [[TMP34]], align 8
-// CHECK1-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 8
+// CHECK1-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 8
 // CHECK1-NEXT:    store i64 100, ptr [[TMP35]], align 8
-// CHECK1-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 9
+// CHECK1-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 9
 // CHECK1-NEXT:    store i64 0, ptr [[TMP36]], align 8
-// CHECK1-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 10
+// CHECK1-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 10
 // CHECK1-NEXT:    store [3 x i32] zeroinitializer, ptr [[TMP37]], align 4
-// CHECK1-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 11
+// CHECK1-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 11
 // CHECK1-NEXT:    store [3 x i32] [[TMP26]], ptr [[TMP38]], align 4
-// CHECK1-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 12
+// CHECK1-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 12
 // CHECK1-NEXT:    store i32 0, ptr [[TMP39]], align 4
-// CHECK1-NEXT:    [[TMP40:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 [[TMP25]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l90.region_id, ptr [[KERNEL_ARGS6]])
+// CHECK1-NEXT:    [[TMP40:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 [[TMP25]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l90.region_id, ptr [[KERNEL_ARGS5]])
 // CHECK1-NEXT:    [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0
-// CHECK1-NEXT:    br i1 [[TMP41]], label [[OMP_OFFLOAD_FAILED7:%.*]], label [[OMP_OFFLOAD_CONT8:%.*]]
-// CHECK1:       omp_offload.failed7:
+// CHECK1-NEXT:    br i1 [[TMP41]], label [[OMP_OFFLOAD_FAILED6:%.*]], label [[OMP_OFFLOAD_CONT7:%.*]]
+// CHECK1:       omp_offload.failed6:
 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l90(i64 [[TMP17]]) #[[ATTR2]]
-// CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT8]]
-// CHECK1:       omp_offload.cont8:
+// CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT7]]
+// CHECK1:       omp_offload.cont7:
 // CHECK1-NEXT:    br label [[OMP_IF_END:%.*]]
 // CHECK1:       omp_if.else:
 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l90(i64 [[TMP17]]) #[[ATTR2]]
@@ -865,9 +865,9 @@ int main() {
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8
 // CHECK1-NEXT:    store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8
 // CHECK1-NEXT:    [[TMP0:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
-// CHECK1-NEXT:    [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1
-// CHECK1-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
-// CHECK1-NEXT:    store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
+// CHECK1-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP0]] to i1
+// CHECK1-NEXT:    [[STOREDV:%.*]] = zext i1 [[LOADEDV]] to i8
+// CHECK1-NEXT:    store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
 // CHECK1-NEXT:    [[TMP1:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8
 // CHECK1-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l90.omp_outlined, i64 [[TMP1]])
 // CHECK1-NEXT:    ret void
@@ -922,8 +922,8 @@ int main() {
 // CHECK1-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
 // CHECK1-NEXT:    [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
 // CHECK1-NEXT:    [[TMP11:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
-// CHECK1-NEXT:    [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1
-// CHECK1-NEXT:    br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
+// CHECK1-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP11]] to i1
+// CHECK1-NEXT:    br i1 [[LOADEDV]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
 // CHECK1:       omp_if.then:
 // CHECK1-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l90.omp_outlined.omp_outlined, i64 [[TMP8]], i64 [[TMP10]])
 // CHECK1-NEXT:    br label [[OMP_IF_END:%.*]]
@@ -1031,8 +1031,8 @@ int main() {
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
-// CHECK1-NEXT:    [[_TMP4:%.*]] = alloca i32, align 4
-// CHECK1-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK1-NEXT:    [[_TMP3:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS4:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    store i32 [[ARG]], ptr [[ARG_ADDR]], align 4
 // CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 3, ptr [[TMP0]], align 4
@@ -1070,12 +1070,12 @@ int main() {
 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l64() #[[ATTR2]]
 // CHECK1-NEXT:    [[TMP15:%.*]] = load i32, ptr [[ARG_ADDR]], align 4
 // CHECK1-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP15]], 0
-// CHECK1-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
-// CHECK1-NEXT:    store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1
+// CHECK1-NEXT:    [[STOREDV:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK1-NEXT:    store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR_]], align 1
 // CHECK1-NEXT:    [[TMP16:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1
-// CHECK1-NEXT:    [[TOBOOL1:%.*]] = trunc i8 [[TMP16]] to i1
-// CHECK1-NEXT:    [[FROMBOOL2:%.*]] = zext i1 [[TOBOOL1]] to i8
-// CHECK1-NEXT:    store i8 [[FROMBOOL2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
+// CHECK1-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP16]] to i1
+// CHECK1-NEXT:    [[STOREDV1:%.*]] = zext i1 [[LOADEDV]] to i8
+// CHECK1-NEXT:    store i8 [[STOREDV1]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
 // CHECK1-NEXT:    [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8
 // CHECK1-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    store i64 [[TMP17]], ptr [[TMP18]], align 8
@@ -1086,42 +1086,42 @@ int main() {
 // CHECK1-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP23:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1
-// CHECK1-NEXT:    [[TOBOOL3:%.*]] = trunc i8 [[TMP23]] to i1
-// CHECK1-NEXT:    [[TMP24:%.*]] = select i1 [[TOBOOL3]], i32 0, i32 1
+// CHECK1-NEXT:    [[LOADEDV2:%.*]] = trunc i8 [[TMP23]] to i1
+// CHECK1-NEXT:    [[TMP24:%.*]] = select i1 [[LOADEDV2]], i32 0, i32 1
 // CHECK1-NEXT:    [[TMP25:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP24]], 0
-// CHECK1-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 3, ptr [[TMP26]], align 4
-// CHECK1-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1
+// CHECK1-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 1
 // CHECK1-NEXT:    store i32 1, ptr [[TMP27]], align 4
-// CHECK1-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2
+// CHECK1-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 2
 // CHECK1-NEXT:    store ptr [[TMP21]], ptr [[TMP28]], align 8
-// CHECK1-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 3
+// CHECK1-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 3
 // CHECK1-NEXT:    store ptr [[TMP22]], ptr [[TMP29]], align 8
-// CHECK1-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 4
+// CHECK1-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 4
 // CHECK1-NEXT:    store ptr @.offload_sizes.1, ptr [[TMP30]], align 8
-// CHECK1-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 5
+// CHECK1-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 5
 // CHECK1-NEXT:    store ptr @.offload_maptypes.2, ptr [[TMP31]], align 8
-// CHECK1-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 6
+// CHECK1-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 6
 // CHECK1-NEXT:    store ptr null, ptr [[TMP32]], align 8
-// CHECK1-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 7
+// CHECK1-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 7
 // CHECK1-NEXT:    store ptr null, ptr [[TMP33]], align 8
-// CHECK1-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 8
+// CHECK1-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 8
 // CHECK1-NEXT:    store i64 100, ptr [[TMP34]], align 8
-// CHECK1-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 9
+// CHECK1-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 9
 // CHECK1-NEXT:    store i64 0, ptr [[TMP35]], align 8
-// CHECK1-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 10
+// CHECK1-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 10
 // CHECK1-NEXT:    store [3 x i32] zeroinitializer, ptr [[TMP36]], align 4
-// CHECK1-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 11
+// CHECK1-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 11
 // CHECK1-NEXT:    store [3 x i32] [[TMP25]], ptr [[TMP37]], align 4
-// CHECK1-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 12
+// CHECK1-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 12
 // CHECK1-NEXT:    store i32 0, ptr [[TMP38]], align 4
-// CHECK1-NEXT:    [[TMP39:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 [[TMP24]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l68.region_id, ptr [[KERNEL_ARGS5]])
+// CHECK1-NEXT:    [[TMP39:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 [[TMP24]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l68.region_id, ptr [[KERNEL_ARGS4]])
 // CHECK1-NEXT:    [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0
-// CHECK1-NEXT:    br i1 [[TMP40]], label [[OMP_OFFLOAD_FAILED6:%.*]], label [[OMP_OFFLOAD_CONT7:%.*]]
-// CHECK1:       omp_offload.failed6:
+// CHECK1-NEXT:    br i1 [[TMP40]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]]
+// CHECK1:       omp_offload.failed5:
 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l68(i64 [[TMP17]]) #[[ATTR2]]
-// CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT7]]
-// CHECK1:       omp_offload.cont7:
+// CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT6]]
+// CHECK1:       omp_offload.cont6:
 // CHECK1-NEXT:    ret i32 0
 //
 //
@@ -1413,9 +1413,9 @@ int main() {
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8
 // CHECK1-NEXT:    store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8
 // CHECK1-NEXT:    [[TMP0:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
-// CHECK1-NEXT:    [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1
-// CHECK1-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
-// CHECK1-NEXT:    store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
+// CHECK1-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP0]] to i1
+// CHECK1-NEXT:    [[STOREDV:%.*]] = zext i1 [[LOADEDV]] to i8
+// CHECK1-NEXT:    store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
 // CHECK1-NEXT:    [[TMP1:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8
 // CHECK1-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l68.omp_outlined, i64 [[TMP1]])
 // CHECK1-NEXT:    ret void
@@ -1470,8 +1470,8 @@ int main() {
 // CHECK1-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
 // CHECK1-NEXT:    [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
 // CHECK1-NEXT:    [[TMP11:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
-// CHECK1-NEXT:    [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1
-// CHECK1-NEXT:    br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
+// CHECK1-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP11]] to i1
+// CHECK1-NEXT:    br i1 [[LOADEDV]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
 // CHECK1:       omp_if.then:
 // CHECK1-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l68.omp_outlined.omp_outlined, i64 [[TMP8]], i64 [[TMP10]])
 // CHECK1-NEXT:    br label [[OMP_IF_END:%.*]]
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp
index 71f4506fb6348..48764b27ed6fa 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp
@@ -532,8 +532,8 @@ int main() {
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
-// CHECK1-NEXT:    [[_TMP5:%.*]] = alloca i32, align 4
-// CHECK1-NEXT:    [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK1-NEXT:    [[_TMP4:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 3, ptr [[TMP0]], align 4
@@ -571,16 +571,16 @@ int main() {
 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l85() #[[ATTR2]]
 // CHECK1-NEXT:    [[TMP15:%.*]] = load i32, ptr @Arg, align 4
 // CHECK1-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP15]], 0
-// CHECK1-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
-// CHECK1-NEXT:    store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1
+// CHECK1-NEXT:    [[STOREDV:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK1-NEXT:    store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR_]], align 1
 // CHECK1-NEXT:    [[TMP16:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1
-// CHECK1-NEXT:    [[TOBOOL1:%.*]] = trunc i8 [[TMP16]] to i1
-// CHECK1-NEXT:    [[FROMBOOL2:%.*]] = zext i1 [[TOBOOL1]] to i8
-// CHECK1-NEXT:    store i8 [[FROMBOOL2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
+// CHECK1-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP16]] to i1
+// CHECK1-NEXT:    [[STOREDV1:%.*]] = zext i1 [[LOADEDV]] to i8
+// CHECK1-NEXT:    store i8 [[STOREDV1]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
 // CHECK1-NEXT:    [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8
 // CHECK1-NEXT:    [[TMP18:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1
-// CHECK1-NEXT:    [[TOBOOL3:%.*]] = trunc i8 [[TMP18]] to i1
-// CHECK1-NEXT:    br i1 [[TOBOOL3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
+// CHECK1-NEXT:    [[LOADEDV2:%.*]] = trunc i8 [[TMP18]] to i1
+// CHECK1-NEXT:    br i1 [[LOADEDV2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
 // CHECK1:       omp_if.then:
 // CHECK1-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    store i64 [[TMP17]], ptr [[TMP19]], align 8
@@ -591,42 +591,42 @@ int main() {
 // CHECK1-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP24:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1
-// CHECK1-NEXT:    [[TOBOOL4:%.*]] = trunc i8 [[TMP24]] to i1
-// CHECK1-NEXT:    [[TMP25:%.*]] = select i1 [[TOBOOL4]], i32 0, i32 1
+// CHECK1-NEXT:    [[LOADEDV3:%.*]] = trunc i8 [[TMP24]] to i1
+// CHECK1-NEXT:    [[TMP25:%.*]] = select i1 [[LOADEDV3]], i32 0, i32 1
 // CHECK1-NEXT:    [[TMP26:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP25]], 0
-// CHECK1-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 3, ptr [[TMP27]], align 4
-// CHECK1-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1
+// CHECK1-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1
 // CHECK1-NEXT:    store i32 1, ptr [[TMP28]], align 4
-// CHECK1-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 2
+// CHECK1-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2
 // CHECK1-NEXT:    store ptr [[TMP22]], ptr [[TMP29]], align 8
-// CHECK1-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 3
+// CHECK1-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 3
 // CHECK1-NEXT:    store ptr [[TMP23]], ptr [[TMP30]], align 8
-// CHECK1-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 4
+// CHECK1-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 4
 // CHECK1-NEXT:    store ptr @.offload_sizes.1, ptr [[TMP31]], align 8
-// CHECK1-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 5
+// CHECK1-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 5
 // CHECK1-NEXT:    store ptr @.offload_maptypes.2, ptr [[TMP32]], align 8
-// CHECK1-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 6
+// CHECK1-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 6
 // CHECK1-NEXT:    store ptr null, ptr [[TMP33]], align 8
-// CHECK1-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 7
+// CHECK1-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 7
 // CHECK1-NEXT:    store ptr null, ptr [[TMP34]], align 8
-// CHECK1-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 8
+// CHECK1-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 8
 // CHECK1-NEXT:    store i64 100, ptr [[TMP35]], align 8
-// CHECK1-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 9
+// CHECK1-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 9
 // CHECK1-NEXT:    store i64 0, ptr [[TMP36]], align 8
-// CHECK1-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 10
+// CHECK1-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 10
 // CHECK1-NEXT:    store [3 x i32] zeroinitializer, ptr [[TMP37]], align 4
-// CHECK1-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 11
+// CHECK1-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 11
 // CHECK1-NEXT:    store [3 x i32] [[TMP26]], ptr [[TMP38]], align 4
-// CHECK1-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 12
+// CHECK1-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 12
 // CHECK1-NEXT:    store i32 0, ptr [[TMP39]], align 4
-// CHECK1-NEXT:    [[TMP40:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 [[TMP25]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l92.region_id, ptr [[KERNEL_ARGS6]])
+// CHECK1-NEXT:    [[TMP40:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 [[TMP25]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l92.region_id, ptr [[KERNEL_ARGS5]])
 // CHECK1-NEXT:    [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0
-// CHECK1-NEXT:    br i1 [[TMP41]], label [[OMP_OFFLOAD_FAILED7:%.*]], label [[OMP_OFFLOAD_CONT8:%.*]]
-// CHECK1:       omp_offload.failed7:
+// CHECK1-NEXT:    br i1 [[TMP41]], label [[OMP_OFFLOAD_FAILED6:%.*]], label [[OMP_OFFLOAD_CONT7:%.*]]
+// CHECK1:       omp_offload.failed6:
 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l92(i64 [[TMP17]]) #[[ATTR2]]
-// CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT8]]
-// CHECK1:       omp_offload.cont8:
+// CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT7]]
+// CHECK1:       omp_offload.cont7:
 // CHECK1-NEXT:    br label [[OMP_IF_END:%.*]]
 // CHECK1:       omp_if.else:
 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l92(i64 [[TMP17]]) #[[ATTR2]]
@@ -953,9 +953,9 @@ int main() {
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8
 // CHECK1-NEXT:    store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8
 // CHECK1-NEXT:    [[TMP0:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
-// CHECK1-NEXT:    [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1
-// CHECK1-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
-// CHECK1-NEXT:    store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
+// CHECK1-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP0]] to i1
+// CHECK1-NEXT:    [[STOREDV:%.*]] = zext i1 [[LOADEDV]] to i8
+// CHECK1-NEXT:    store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
 // CHECK1-NEXT:    [[TMP1:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8
 // CHECK1-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l92.omp_outlined, i64 [[TMP1]])
 // CHECK1-NEXT:    ret void
@@ -1010,8 +1010,8 @@ int main() {
 // CHECK1-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]]
 // CHECK1-NEXT:    [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
 // CHECK1-NEXT:    [[TMP11:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !llvm.access.group [[ACC_GRP36]]
-// CHECK1-NEXT:    [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1
-// CHECK1-NEXT:    br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
+// CHECK1-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP11]] to i1
+// CHECK1-NEXT:    br i1 [[LOADEDV]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
 // CHECK1:       omp_if.then:
 // CHECK1-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l92.omp_outlined.omp_outlined, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP36]]
 // CHECK1-NEXT:    br label [[OMP_IF_END:%.*]]
@@ -1133,8 +1133,8 @@ int main() {
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
-// CHECK1-NEXT:    [[_TMP4:%.*]] = alloca i32, align 4
-// CHECK1-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK1-NEXT:    [[_TMP3:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS4:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    store i32 [[ARG]], ptr [[ARG_ADDR]], align 4
 // CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 3, ptr [[TMP0]], align 4
@@ -1172,12 +1172,12 @@ int main() {
 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l66() #[[ATTR2]]
 // CHECK1-NEXT:    [[TMP15:%.*]] = load i32, ptr [[ARG_ADDR]], align 4
 // CHECK1-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP15]], 0
-// CHECK1-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
-// CHECK1-NEXT:    store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1
+// CHECK1-NEXT:    [[STOREDV:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK1-NEXT:    store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR_]], align 1
 // CHECK1-NEXT:    [[TMP16:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1
-// CHECK1-NEXT:    [[TOBOOL1:%.*]] = trunc i8 [[TMP16]] to i1
-// CHECK1-NEXT:    [[FROMBOOL2:%.*]] = zext i1 [[TOBOOL1]] to i8
-// CHECK1-NEXT:    store i8 [[FROMBOOL2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
+// CHECK1-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP16]] to i1
+// CHECK1-NEXT:    [[STOREDV1:%.*]] = zext i1 [[LOADEDV]] to i8
+// CHECK1-NEXT:    store i8 [[STOREDV1]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
 // CHECK1-NEXT:    [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8
 // CHECK1-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    store i64 [[TMP17]], ptr [[TMP18]], align 8
@@ -1188,42 +1188,42 @@ int main() {
 // CHECK1-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP23:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1
-// CHECK1-NEXT:    [[TOBOOL3:%.*]] = trunc i8 [[TMP23]] to i1
-// CHECK1-NEXT:    [[TMP24:%.*]] = select i1 [[TOBOOL3]], i32 0, i32 1
+// CHECK1-NEXT:    [[LOADEDV2:%.*]] = trunc i8 [[TMP23]] to i1
+// CHECK1-NEXT:    [[TMP24:%.*]] = select i1 [[LOADEDV2]], i32 0, i32 1
 // CHECK1-NEXT:    [[TMP25:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP24]], 0
-// CHECK1-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 3, ptr [[TMP26]], align 4
-// CHECK1-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1
+// CHECK1-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 1
 // CHECK1-NEXT:    store i32 1, ptr [[TMP27]], align 4
-// CHECK1-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2
+// CHECK1-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 2
 // CHECK1-NEXT:    store ptr [[TMP21]], ptr [[TMP28]], align 8
-// CHECK1-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 3
+// CHECK1-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 3
 // CHECK1-NEXT:    store ptr [[TMP22]], ptr [[TMP29]], align 8
-// CHECK1-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 4
+// CHECK1-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 4
 // CHECK1-NEXT:    store ptr @.offload_sizes.3, ptr [[TMP30]], align 8
-// CHECK1-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 5
+// CHECK1-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 5
 // CHECK1-NEXT:    store ptr @.offload_maptypes.4, ptr [[TMP31]], align 8
-// CHECK1-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 6
+// CHECK1-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 6
 // CHECK1-NEXT:    store ptr null, ptr [[TMP32]], align 8
-// CHECK1-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 7
+// CHECK1-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 7
 // CHECK1-NEXT:    store ptr null, ptr [[TMP33]], align 8
-// CHECK1-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 8
+// CHECK1-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 8
 // CHECK1-NEXT:    store i64 100, ptr [[TMP34]], align 8
-// CHECK1-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 9
+// CHECK1-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 9
 // CHECK1-NEXT:    store i64 0, ptr [[TMP35]], align 8
-// CHECK1-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 10
+// CHECK1-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 10
 // CHECK1-NEXT:    store [3 x i32] zeroinitializer, ptr [[TMP36]], align 4
-// CHECK1-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 11
+// CHECK1-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 11
 // CHECK1-NEXT:    store [3 x i32] [[TMP25]], ptr [[TMP37]], align 4
-// CHECK1-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 12
+// CHECK1-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 12
 // CHECK1-NEXT:    store i32 0, ptr [[TMP38]], align 4
-// CHECK1-NEXT:    [[TMP39:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 [[TMP24]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l70.region_id, ptr [[KERNEL_ARGS5]])
+// CHECK1-NEXT:    [[TMP39:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 [[TMP24]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l70.region_id, ptr [[KERNEL_ARGS4]])
 // CHECK1-NEXT:    [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0
-// CHECK1-NEXT:    br i1 [[TMP40]], label [[OMP_OFFLOAD_FAILED6:%.*]], label [[OMP_OFFLOAD_CONT7:%.*]]
-// CHECK1:       omp_offload.failed6:
+// CHECK1-NEXT:    br i1 [[TMP40]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]]
+// CHECK1:       omp_offload.failed5:
 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l70(i64 [[TMP17]]) #[[ATTR2]]
-// CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT7]]
-// CHECK1:       omp_offload.cont7:
+// CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT6]]
+// CHECK1:       omp_offload.cont6:
 // CHECK1-NEXT:    ret i32 0
 //
 //
@@ -1543,9 +1543,9 @@ int main() {
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8
 // CHECK1-NEXT:    store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8
 // CHECK1-NEXT:    [[TMP0:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
-// CHECK1-NEXT:    [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1
-// CHECK1-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
-// CHECK1-NEXT:    store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
+// CHECK1-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP0]] to i1
+// CHECK1-NEXT:    [[STOREDV:%.*]] = zext i1 [[LOADEDV]] to i8
+// CHECK1-NEXT:    store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
 // CHECK1-NEXT:    [[TMP1:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8
 // CHECK1-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l70.omp_outlined, i64 [[TMP1]])
 // CHECK1-NEXT:    ret void
@@ -1600,8 +1600,8 @@ int main() {
 // CHECK1-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP54]]
 // CHECK1-NEXT:    [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
 // CHECK1-NEXT:    [[TMP11:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !llvm.access.group [[ACC_GRP54]]
-// CHECK1-NEXT:    [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1
-// CHECK1-NEXT:    br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
+// CHECK1-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP11]] to i1
+// CHECK1-NEXT:    br i1 [[LOADEDV]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
 // CHECK1:       omp_if.then:
 // CHECK1-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l70.omp_outlined.omp_outlined, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP54]]
 // CHECK1-NEXT:    br label [[OMP_IF_END:%.*]]
@@ -2137,8 +2137,8 @@ int main() {
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
-// CHECK3-NEXT:    [[_TMP5:%.*]] = alloca i32, align 4
-// CHECK3-NEXT:    [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK3-NEXT:    [[_TMP4:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK3-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 3, ptr [[TMP0]], align 4
@@ -2176,16 +2176,16 @@ int main() {
 // CHECK3-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l85() #[[ATTR2]]
 // CHECK3-NEXT:    [[TMP15:%.*]] = load i32, ptr @Arg, align 4
 // CHECK3-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP15]], 0
-// CHECK3-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
-// CHECK3-NEXT:    store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1
+// CHECK3-NEXT:    [[STOREDV:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK3-NEXT:    store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR_]], align 1
 // CHECK3-NEXT:    [[TMP16:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1
-// CHECK3-NEXT:    [[TOBOOL1:%.*]] = trunc i8 [[TMP16]] to i1
-// CHECK3-NEXT:    [[FROMBOOL2:%.*]] = zext i1 [[TOBOOL1]] to i8
-// CHECK3-NEXT:    store i8 [[FROMBOOL2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
+// CHECK3-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP16]] to i1
+// CHECK3-NEXT:    [[STOREDV1:%.*]] = zext i1 [[LOADEDV]] to i8
+// CHECK3-NEXT:    store i8 [[STOREDV1]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
 // CHECK3-NEXT:    [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8
 // CHECK3-NEXT:    [[TMP18:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1
-// CHECK3-NEXT:    [[TOBOOL3:%.*]] = trunc i8 [[TMP18]] to i1
-// CHECK3-NEXT:    br i1 [[TOBOOL3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
+// CHECK3-NEXT:    [[LOADEDV2:%.*]] = trunc i8 [[TMP18]] to i1
+// CHECK3-NEXT:    br i1 [[LOADEDV2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
 // CHECK3:       omp_if.then:
 // CHECK3-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    store i64 [[TMP17]], ptr [[TMP19]], align 8
@@ -2196,42 +2196,42 @@ int main() {
 // CHECK3-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP24:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1
-// CHECK3-NEXT:    [[TOBOOL4:%.*]] = trunc i8 [[TMP24]] to i1
-// CHECK3-NEXT:    [[TMP25:%.*]] = select i1 [[TOBOOL4]], i32 0, i32 1
+// CHECK3-NEXT:    [[LOADEDV3:%.*]] = trunc i8 [[TMP24]] to i1
+// CHECK3-NEXT:    [[TMP25:%.*]] = select i1 [[LOADEDV3]], i32 0, i32 1
 // CHECK3-NEXT:    [[TMP26:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP25]], 0
-// CHECK3-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0
+// CHECK3-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 3, ptr [[TMP27]], align 4
-// CHECK3-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1
+// CHECK3-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1
 // CHECK3-NEXT:    store i32 1, ptr [[TMP28]], align 4
-// CHECK3-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 2
+// CHECK3-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2
 // CHECK3-NEXT:    store ptr [[TMP22]], ptr [[TMP29]], align 8
-// CHECK3-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 3
+// CHECK3-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 3
 // CHECK3-NEXT:    store ptr [[TMP23]], ptr [[TMP30]], align 8
-// CHECK3-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 4
+// CHECK3-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 4
 // CHECK3-NEXT:    store ptr @.offload_sizes.2, ptr [[TMP31]], align 8
-// CHECK3-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 5
+// CHECK3-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 5
 // CHECK3-NEXT:    store ptr @.offload_maptypes.3, ptr [[TMP32]], align 8
-// CHECK3-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 6
+// CHECK3-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 6
 // CHECK3-NEXT:    store ptr null, ptr [[TMP33]], align 8
-// CHECK3-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 7
+// CHECK3-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 7
 // CHECK3-NEXT:    store ptr null, ptr [[TMP34]], align 8
-// CHECK3-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 8
+// CHECK3-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 8
 // CHECK3-NEXT:    store i64 100, ptr [[TMP35]], align 8
-// CHECK3-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 9
+// CHECK3-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 9
 // CHECK3-NEXT:    store i64 0, ptr [[TMP36]], align 8
-// CHECK3-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 10
+// CHECK3-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 10
 // CHECK3-NEXT:    store [3 x i32] zeroinitializer, ptr [[TMP37]], align 4
-// CHECK3-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 11
+// CHECK3-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 11
 // CHECK3-NEXT:    store [3 x i32] [[TMP26]], ptr [[TMP38]], align 4
-// CHECK3-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 12
+// CHECK3-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 12
 // CHECK3-NEXT:    store i32 0, ptr [[TMP39]], align 4
-// CHECK3-NEXT:    [[TMP40:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 [[TMP25]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l92.region_id, ptr [[KERNEL_ARGS6]])
+// CHECK3-NEXT:    [[TMP40:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 [[TMP25]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l92.region_id, ptr [[KERNEL_ARGS5]])
 // CHECK3-NEXT:    [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0
-// CHECK3-NEXT:    br i1 [[TMP41]], label [[OMP_OFFLOAD_FAILED7:%.*]], label [[OMP_OFFLOAD_CONT8:%.*]]
-// CHECK3:       omp_offload.failed7:
+// CHECK3-NEXT:    br i1 [[TMP41]], label [[OMP_OFFLOAD_FAILED6:%.*]], label [[OMP_OFFLOAD_CONT7:%.*]]
+// CHECK3:       omp_offload.failed6:
 // CHECK3-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l92(i64 [[TMP17]]) #[[ATTR2]]
-// CHECK3-NEXT:    br label [[OMP_OFFLOAD_CONT8]]
-// CHECK3:       omp_offload.cont8:
+// CHECK3-NEXT:    br label [[OMP_OFFLOAD_CONT7]]
+// CHECK3:       omp_offload.cont7:
 // CHECK3-NEXT:    br label [[OMP_IF_END:%.*]]
 // CHECK3:       omp_if.else:
 // CHECK3-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l92(i64 [[TMP17]]) #[[ATTR2]]
@@ -2558,9 +2558,9 @@ int main() {
 // CHECK3-NEXT:    [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8
 // CHECK3-NEXT:    store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8
 // CHECK3-NEXT:    [[TMP0:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
-// CHECK3-NEXT:    [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1
-// CHECK3-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
-// CHECK3-NEXT:    store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
+// CHECK3-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP0]] to i1
+// CHECK3-NEXT:    [[STOREDV:%.*]] = zext i1 [[LOADEDV]] to i8
+// CHECK3-NEXT:    store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
 // CHECK3-NEXT:    [[TMP1:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8
 // CHECK3-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l92.omp_outlined, i64 [[TMP1]])
 // CHECK3-NEXT:    ret void
@@ -2607,8 +2607,8 @@ int main() {
 // CHECK3-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
 // CHECK3-NEXT:    store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
 // CHECK3-NEXT:    [[TMP5:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
-// CHECK3-NEXT:    [[TOBOOL:%.*]] = trunc i8 [[TMP5]] to i1
-// CHECK3-NEXT:    br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE5:%.*]]
+// CHECK3-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP5]] to i1
+// CHECK3-NEXT:    br i1 [[LOADEDV]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE5:%.*]]
 // CHECK3:       omp_if.then:
 // CHECK3-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
 // CHECK3:       omp.inner.for.cond:
@@ -2622,13 +2622,13 @@ int main() {
 // CHECK3-NEXT:    [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP34]]
 // CHECK3-NEXT:    [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
 // CHECK3-NEXT:    [[TMP12:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !llvm.access.group [[ACC_GRP34]]
-// CHECK3-NEXT:    [[TOBOOL2:%.*]] = trunc i8 [[TMP12]] to i1
-// CHECK3-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TOBOOL2]] to i8
-// CHECK3-NEXT:    store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1, !llvm.access.group [[ACC_GRP34]]
+// CHECK3-NEXT:    [[LOADEDV2:%.*]] = trunc i8 [[TMP12]] to i1
+// CHECK3-NEXT:    [[STOREDV:%.*]] = zext i1 [[LOADEDV2]] to i8
+// CHECK3-NEXT:    store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1, !llvm.access.group [[ACC_GRP34]]
 // CHECK3-NEXT:    [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group [[ACC_GRP34]]
 // CHECK3-NEXT:    [[TMP14:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !llvm.access.group [[ACC_GRP34]]
-// CHECK3-NEXT:    [[TOBOOL3:%.*]] = trunc i8 [[TMP14]] to i1
-// CHECK3-NEXT:    br i1 [[TOBOOL3]], label [[OMP_IF_THEN4:%.*]], label [[OMP_IF_ELSE:%.*]]
+// CHECK3-NEXT:    [[LOADEDV3:%.*]] = trunc i8 [[TMP14]] to i1
+// CHECK3-NEXT:    br i1 [[LOADEDV3]], label [[OMP_IF_THEN4:%.*]], label [[OMP_IF_ELSE:%.*]]
 // CHECK3:       omp_if.then4:
 // CHECK3-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l92.omp_outlined.omp_outlined, i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]), !llvm.access.group [[ACC_GRP34]]
 // CHECK3-NEXT:    br label [[OMP_IF_END:%.*]]
@@ -2662,13 +2662,13 @@ int main() {
 // CHECK3-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
 // CHECK3-NEXT:    [[TMP23:%.*]] = zext i32 [[TMP22]] to i64
 // CHECK3-NEXT:    [[TMP24:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
-// CHECK3-NEXT:    [[TOBOOL9:%.*]] = trunc i8 [[TMP24]] to i1
-// CHECK3-NEXT:    [[FROMBOOL11:%.*]] = zext i1 [[TOBOOL9]] to i8
-// CHECK3-NEXT:    store i8 [[FROMBOOL11]], ptr [[DOTCAPTURE_EXPR__CASTED10]], align 1
+// CHECK3-NEXT:    [[LOADEDV9:%.*]] = trunc i8 [[TMP24]] to i1
+// CHECK3-NEXT:    [[STOREDV11:%.*]] = zext i1 [[LOADEDV9]] to i8
+// CHECK3-NEXT:    store i8 [[STOREDV11]], ptr [[DOTCAPTURE_EXPR__CASTED10]], align 1
 // CHECK3-NEXT:    [[TMP25:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED10]], align 8
 // CHECK3-NEXT:    [[TMP26:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
-// CHECK3-NEXT:    [[TOBOOL12:%.*]] = trunc i8 [[TMP26]] to i1
-// CHECK3-NEXT:    br i1 [[TOBOOL12]], label [[OMP_IF_THEN13:%.*]], label [[OMP_IF_ELSE14:%.*]]
+// CHECK3-NEXT:    [[LOADEDV12:%.*]] = trunc i8 [[TMP26]] to i1
+// CHECK3-NEXT:    br i1 [[LOADEDV12]], label [[OMP_IF_THEN13:%.*]], label [[OMP_IF_ELSE14:%.*]]
 // CHECK3:       omp_if.then13:
 // CHECK3-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l92.omp_outlined.omp_outlined.1, i64 [[TMP21]], i64 [[TMP23]], i64 [[TMP25]])
 // CHECK3-NEXT:    br label [[OMP_IF_END16:%.*]]
@@ -2734,8 +2734,8 @@ int main() {
 // CHECK3-NEXT:    store i32 1, ptr [[DOTOMP_STRIDE]], align 4
 // CHECK3-NEXT:    store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK3-NEXT:    [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
-// CHECK3-NEXT:    [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1
-// CHECK3-NEXT:    br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
+// CHECK3-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
+// CHECK3-NEXT:    br i1 [[LOADEDV]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
 // CHECK3:       omp_if.then:
 // CHECK3-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK3-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
@@ -2861,8 +2861,8 @@ int main() {
 // CHECK3-NEXT:    store i32 1, ptr [[DOTOMP_STRIDE]], align 4
 // CHECK3-NEXT:    store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK3-NEXT:    [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
-// CHECK3-NEXT:    [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1
-// CHECK3-NEXT:    br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
+// CHECK3-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
+// CHECK3-NEXT:    br i1 [[LOADEDV]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
 // CHECK3:       omp_if.then:
 // CHECK3-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK3-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
@@ -2968,8 +2968,8 @@ int main() {
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
-// CHECK3-NEXT:    [[_TMP4:%.*]] = alloca i32, align 4
-// CHECK3-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK3-NEXT:    [[_TMP3:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS4:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    store i32 [[ARG]], ptr [[ARG_ADDR]], align 4
 // CHECK3-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 3, ptr [[TMP0]], align 4
@@ -3007,12 +3007,12 @@ int main() {
 // CHECK3-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l66() #[[ATTR2]]
 // CHECK3-NEXT:    [[TMP15:%.*]] = load i32, ptr [[ARG_ADDR]], align 4
 // CHECK3-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP15]], 0
-// CHECK3-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
-// CHECK3-NEXT:    store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1
+// CHECK3-NEXT:    [[STOREDV:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK3-NEXT:    store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR_]], align 1
 // CHECK3-NEXT:    [[TMP16:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1
-// CHECK3-NEXT:    [[TOBOOL1:%.*]] = trunc i8 [[TMP16]] to i1
-// CHECK3-NEXT:    [[FROMBOOL2:%.*]] = zext i1 [[TOBOOL1]] to i8
-// CHECK3-NEXT:    store i8 [[FROMBOOL2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
+// CHECK3-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP16]] to i1
+// CHECK3-NEXT:    [[STOREDV1:%.*]] = zext i1 [[LOADEDV]] to i8
+// CHECK3-NEXT:    store i8 [[STOREDV1]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
 // CHECK3-NEXT:    [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8
 // CHECK3-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    store i64 [[TMP17]], ptr [[TMP18]], align 8
@@ -3023,42 +3023,42 @@ int main() {
 // CHECK3-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP23:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1
-// CHECK3-NEXT:    [[TOBOOL3:%.*]] = trunc i8 [[TMP23]] to i1
-// CHECK3-NEXT:    [[TMP24:%.*]] = select i1 [[TOBOOL3]], i32 0, i32 1
+// CHECK3-NEXT:    [[LOADEDV2:%.*]] = trunc i8 [[TMP23]] to i1
+// CHECK3-NEXT:    [[TMP24:%.*]] = select i1 [[LOADEDV2]], i32 0, i32 1
 // CHECK3-NEXT:    [[TMP25:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP24]], 0
-// CHECK3-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0
+// CHECK3-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 3, ptr [[TMP26]], align 4
-// CHECK3-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1
+// CHECK3-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 1
 // CHECK3-NEXT:    store i32 1, ptr [[TMP27]], align 4
-// CHECK3-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2
+// CHECK3-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 2
 // CHECK3-NEXT:    store ptr [[TMP21]], ptr [[TMP28]], align 8
-// CHECK3-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 3
+// CHECK3-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 3
 // CHECK3-NEXT:    store ptr [[TMP22]], ptr [[TMP29]], align 8
-// CHECK3-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 4
+// CHECK3-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 4
 // CHECK3-NEXT:    store ptr @.offload_sizes.4, ptr [[TMP30]], align 8
-// CHECK3-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 5
+// CHECK3-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 5
 // CHECK3-NEXT:    store ptr @.offload_maptypes.5, ptr [[TMP31]], align 8
-// CHECK3-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 6
+// CHECK3-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 6
 // CHECK3-NEXT:    store ptr null, ptr [[TMP32]], align 8
-// CHECK3-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 7
+// CHECK3-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 7
 // CHECK3-NEXT:    store ptr null, ptr [[TMP33]], align 8
-// CHECK3-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 8
+// CHECK3-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 8
 // CHECK3-NEXT:    store i64 100, ptr [[TMP34]], align 8
-// CHECK3-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 9
+// CHECK3-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 9
 // CHECK3-NEXT:    store i64 0, ptr [[TMP35]], align 8
-// CHECK3-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 10
+// CHECK3-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 10
 // CHECK3-NEXT:    store [3 x i32] zeroinitializer, ptr [[TMP36]], align 4
-// CHECK3-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 11
+// CHECK3-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 11
 // CHECK3-NEXT:    store [3 x i32] [[TMP25]], ptr [[TMP37]], align 4
-// CHECK3-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 12
+// CHECK3-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 12
 // CHECK3-NEXT:    store i32 0, ptr [[TMP38]], align 4
-// CHECK3-NEXT:    [[TMP39:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 [[TMP24]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l70.region_id, ptr [[KERNEL_ARGS5]])
+// CHECK3-NEXT:    [[TMP39:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 [[TMP24]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l70.region_id, ptr [[KERNEL_ARGS4]])
 // CHECK3-NEXT:    [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0
-// CHECK3-NEXT:    br i1 [[TMP40]], label [[OMP_OFFLOAD_FAILED6:%.*]], label [[OMP_OFFLOAD_CONT7:%.*]]
-// CHECK3:       omp_offload.failed6:
+// CHECK3-NEXT:    br i1 [[TMP40]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]]
+// CHECK3:       omp_offload.failed5:
 // CHECK3-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l70(i64 [[TMP17]]) #[[ATTR2]]
-// CHECK3-NEXT:    br label [[OMP_OFFLOAD_CONT7]]
-// CHECK3:       omp_offload.cont7:
+// CHECK3-NEXT:    br label [[OMP_OFFLOAD_CONT6]]
+// CHECK3:       omp_offload.cont6:
 // CHECK3-NEXT:    ret i32 0
 //
 //
@@ -3378,9 +3378,9 @@ int main() {
 // CHECK3-NEXT:    [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8
 // CHECK3-NEXT:    store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8
 // CHECK3-NEXT:    [[TMP0:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
-// CHECK3-NEXT:    [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1
-// CHECK3-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
-// CHECK3-NEXT:    store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
+// CHECK3-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP0]] to i1
+// CHECK3-NEXT:    [[STOREDV:%.*]] = zext i1 [[LOADEDV]] to i8
+// CHECK3-NEXT:    store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
 // CHECK3-NEXT:    [[TMP1:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8
 // CHECK3-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l70.omp_outlined, i64 [[TMP1]])
 // CHECK3-NEXT:    ret void
@@ -3435,8 +3435,8 @@ int main() {
 // CHECK3-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP54]]
 // CHECK3-NEXT:    [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
 // CHECK3-NEXT:    [[TMP11:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !llvm.access.group [[ACC_GRP54]]
-// CHECK3-NEXT:    [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1
-// CHECK3-NEXT:    br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
+// CHECK3-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP11]] to i1
+// CHECK3-NEXT:    br i1 [[LOADEDV]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
 // CHECK3:       omp_if.then:
 // CHECK3-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l70.omp_outlined.omp_outlined, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP54]]
 // CHECK3-NEXT:    br label [[OMP_IF_END:%.*]]
@@ -3690,8 +3690,8 @@ int main() {
 // CHECK5-NEXT:    store i32 100, ptr [[I6]], align 4
 // CHECK5-NEXT:    [[TMP10:%.*]] = load i32, ptr @Arg, align 4
 // CHECK5-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP10]], 0
-// CHECK5-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
-// CHECK5-NEXT:    store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1
+// CHECK5-NEXT:    [[STOREDV:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK5-NEXT:    store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR_]], align 1
 // CHECK5-NEXT:    store i32 0, ptr [[DOTOMP_LB17]], align 4
 // CHECK5-NEXT:    store i32 99, ptr [[DOTOMP_UB18]], align 4
 // CHECK5-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB17]], align 4
@@ -3798,8 +3798,8 @@ int main() {
 // CHECK5-NEXT:    store i32 100, ptr [[I6]], align 4
 // CHECK5-NEXT:    [[TMP10:%.*]] = load i32, ptr [[ARG_ADDR]], align 4
 // CHECK5-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP10]], 0
-// CHECK5-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
-// CHECK5-NEXT:    store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1
+// CHECK5-NEXT:    [[STOREDV:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK5-NEXT:    store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR_]], align 1
 // CHECK5-NEXT:    store i32 0, ptr [[DOTOMP_LB17]], align 4
 // CHECK5-NEXT:    store i32 99, ptr [[DOTOMP_UB18]], align 4
 // CHECK5-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB17]], align 4
@@ -3972,60 +3972,60 @@ int main() {
 // CHECK7-NEXT:    store i32 100, ptr [[I6]], align 4
 // CHECK7-NEXT:    [[TMP10:%.*]] = load i32, ptr @Arg, align 4
 // CHECK7-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP10]], 0
-// CHECK7-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
-// CHECK7-NEXT:    store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1
+// CHECK7-NEXT:    [[STOREDV:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK7-NEXT:    store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR_]], align 1
 // CHECK7-NEXT:    store i32 0, ptr [[DOTOMP_LB17]], align 4
 // CHECK7-NEXT:    store i32 99, ptr [[DOTOMP_UB18]], align 4
 // CHECK7-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB17]], align 4
 // CHECK7-NEXT:    store i32 [[TMP11]], ptr [[DOTOMP_IV19]], align 4
 // CHECK7-NEXT:    [[TMP12:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1
-// CHECK7-NEXT:    [[TOBOOL21:%.*]] = trunc i8 [[TMP12]] to i1
-// CHECK7-NEXT:    br i1 [[TOBOOL21]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
+// CHECK7-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP12]] to i1
+// CHECK7-NEXT:    br i1 [[LOADEDV]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
 // CHECK7:       omp_if.then:
-// CHECK7-NEXT:    br label [[OMP_INNER_FOR_COND22:%.*]]
-// CHECK7:       omp.inner.for.cond22:
+// CHECK7-NEXT:    br label [[OMP_INNER_FOR_COND21:%.*]]
+// CHECK7:       omp.inner.for.cond21:
 // CHECK7-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV19]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]]
 // CHECK7-NEXT:    [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB18]], align 4, !llvm.access.group [[ACC_GRP15]]
-// CHECK7-NEXT:    [[CMP23:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]]
-// CHECK7-NEXT:    br i1 [[CMP23]], label [[OMP_INNER_FOR_BODY24:%.*]], label [[OMP_INNER_FOR_END30:%.*]]
-// CHECK7:       omp.inner.for.body24:
+// CHECK7-NEXT:    [[CMP22:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]]
+// CHECK7-NEXT:    br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]]
+// CHECK7:       omp.inner.for.body23:
 // CHECK7-NEXT:    [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV19]], align 4, !llvm.access.group [[ACC_GRP15]]
-// CHECK7-NEXT:    [[MUL25:%.*]] = mul nsw i32 [[TMP15]], 1
-// CHECK7-NEXT:    [[ADD26:%.*]] = add nsw i32 0, [[MUL25]]
-// CHECK7-NEXT:    store i32 [[ADD26]], ptr [[I20]], align 4, !llvm.access.group [[ACC_GRP15]]
+// CHECK7-NEXT:    [[MUL24:%.*]] = mul nsw i32 [[TMP15]], 1
+// CHECK7-NEXT:    [[ADD25:%.*]] = add nsw i32 0, [[MUL24]]
+// CHECK7-NEXT:    store i32 [[ADD25]], ptr [[I20]], align 4, !llvm.access.group [[ACC_GRP15]]
 // CHECK7-NEXT:    call void @_Z3fn6v(), !llvm.access.group [[ACC_GRP15]]
-// CHECK7-NEXT:    br label [[OMP_BODY_CONTINUE27:%.*]]
-// CHECK7:       omp.body.continue27:
-// CHECK7-NEXT:    br label [[OMP_INNER_FOR_INC28:%.*]]
-// CHECK7:       omp.inner.for.inc28:
+// CHECK7-NEXT:    br label [[OMP_BODY_CONTINUE26:%.*]]
+// CHECK7:       omp.body.continue26:
+// CHECK7-NEXT:    br label [[OMP_INNER_FOR_INC27:%.*]]
+// CHECK7:       omp.inner.for.inc27:
 // CHECK7-NEXT:    [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV19]], align 4, !llvm.access.group [[ACC_GRP15]]
-// CHECK7-NEXT:    [[ADD29:%.*]] = add nsw i32 [[TMP16]], 1
-// CHECK7-NEXT:    store i32 [[ADD29]], ptr [[DOTOMP_IV19]], align 4, !llvm.access.group [[ACC_GRP15]]
-// CHECK7-NEXT:    br label [[OMP_INNER_FOR_COND22]], !llvm.loop [[LOOP16:![0-9]+]]
-// CHECK7:       omp.inner.for.end30:
+// CHECK7-NEXT:    [[ADD28:%.*]] = add nsw i32 [[TMP16]], 1
+// CHECK7-NEXT:    store i32 [[ADD28]], ptr [[DOTOMP_IV19]], align 4, !llvm.access.group [[ACC_GRP15]]
+// CHECK7-NEXT:    br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP16:![0-9]+]]
+// CHECK7:       omp.inner.for.end29:
 // CHECK7-NEXT:    br label [[OMP_IF_END:%.*]]
 // CHECK7:       omp_if.else:
-// CHECK7-NEXT:    br label [[OMP_INNER_FOR_COND31:%.*]]
-// CHECK7:       omp.inner.for.cond31:
+// CHECK7-NEXT:    br label [[OMP_INNER_FOR_COND30:%.*]]
+// CHECK7:       omp.inner.for.cond30:
 // CHECK7-NEXT:    [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV19]], align 4
 // CHECK7-NEXT:    [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB18]], align 4
-// CHECK7-NEXT:    [[CMP32:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]]
-// CHECK7-NEXT:    br i1 [[CMP32]], label [[OMP_INNER_FOR_BODY33:%.*]], label [[OMP_INNER_FOR_END39:%.*]]
-// CHECK7:       omp.inner.for.body33:
+// CHECK7-NEXT:    [[CMP31:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]]
+// CHECK7-NEXT:    br i1 [[CMP31]], label [[OMP_INNER_FOR_BODY32:%.*]], label [[OMP_INNER_FOR_END38:%.*]]
+// CHECK7:       omp.inner.for.body32:
 // CHECK7-NEXT:    [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV19]], align 4
-// CHECK7-NEXT:    [[MUL34:%.*]] = mul nsw i32 [[TMP19]], 1
-// CHECK7-NEXT:    [[ADD35:%.*]] = add nsw i32 0, [[MUL34]]
-// CHECK7-NEXT:    store i32 [[ADD35]], ptr [[I20]], align 4
+// CHECK7-NEXT:    [[MUL33:%.*]] = mul nsw i32 [[TMP19]], 1
+// CHECK7-NEXT:    [[ADD34:%.*]] = add nsw i32 0, [[MUL33]]
+// CHECK7-NEXT:    store i32 [[ADD34]], ptr [[I20]], align 4
 // CHECK7-NEXT:    call void @_Z3fn6v()
-// CHECK7-NEXT:    br label [[OMP_BODY_CONTINUE36:%.*]]
-// CHECK7:       omp.body.continue36:
-// CHECK7-NEXT:    br label [[OMP_INNER_FOR_INC37:%.*]]
-// CHECK7:       omp.inner.for.inc37:
+// CHECK7-NEXT:    br label [[OMP_BODY_CONTINUE35:%.*]]
+// CHECK7:       omp.body.continue35:
+// CHECK7-NEXT:    br label [[OMP_INNER_FOR_INC36:%.*]]
+// CHECK7:       omp.inner.for.inc36:
 // CHECK7-NEXT:    [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV19]], align 4
-// CHECK7-NEXT:    [[ADD38:%.*]] = add nsw i32 [[TMP20]], 1
-// CHECK7-NEXT:    store i32 [[ADD38]], ptr [[DOTOMP_IV19]], align 4
-// CHECK7-NEXT:    br label [[OMP_INNER_FOR_COND31]], !llvm.loop [[LOOP18:![0-9]+]]
-// CHECK7:       omp.inner.for.end39:
+// CHECK7-NEXT:    [[ADD37:%.*]] = add nsw i32 [[TMP20]], 1
+// CHECK7-NEXT:    store i32 [[ADD37]], ptr [[DOTOMP_IV19]], align 4
+// CHECK7-NEXT:    br label [[OMP_INNER_FOR_COND30]], !llvm.loop [[LOOP18:![0-9]+]]
+// CHECK7:       omp.inner.for.end38:
 // CHECK7-NEXT:    br label [[OMP_IF_END]]
 // CHECK7:       omp_if.end:
 // CHECK7-NEXT:    store i32 100, ptr [[I20]], align 4
@@ -4109,8 +4109,8 @@ int main() {
 // CHECK7-NEXT:    store i32 100, ptr [[I6]], align 4
 // CHECK7-NEXT:    [[TMP10:%.*]] = load i32, ptr [[ARG_ADDR]], align 4
 // CHECK7-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP10]], 0
-// CHECK7-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
-// CHECK7-NEXT:    store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1
+// CHECK7-NEXT:    [[STOREDV:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK7-NEXT:    store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR_]], align 1
 // CHECK7-NEXT:    store i32 0, ptr [[DOTOMP_LB17]], align 4
 // CHECK7-NEXT:    store i32 99, ptr [[DOTOMP_UB18]], align 4
 // CHECK7-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB17]], align 4
@@ -4565,8 +4565,8 @@ int main() {
 // CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
-// CHECK9-NEXT:    [[_TMP5:%.*]] = alloca i32, align 4
-// CHECK9-NEXT:    [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK9-NEXT:    [[_TMP4:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK9-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 3, ptr [[TMP0]], align 4
@@ -4604,16 +4604,16 @@ int main() {
 // CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l85() #[[ATTR2]]
 // CHECK9-NEXT:    [[TMP15:%.*]] = load i32, ptr @Arg, align 4
 // CHECK9-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP15]], 0
-// CHECK9-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
-// CHECK9-NEXT:    store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1
+// CHECK9-NEXT:    [[STOREDV:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK9-NEXT:    store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR_]], align 1
 // CHECK9-NEXT:    [[TMP16:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1
-// CHECK9-NEXT:    [[TOBOOL1:%.*]] = trunc i8 [[TMP16]] to i1
-// CHECK9-NEXT:    [[FROMBOOL2:%.*]] = zext i1 [[TOBOOL1]] to i8
-// CHECK9-NEXT:    store i8 [[FROMBOOL2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
+// CHECK9-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP16]] to i1
+// CHECK9-NEXT:    [[STOREDV1:%.*]] = zext i1 [[LOADEDV]] to i8
+// CHECK9-NEXT:    store i8 [[STOREDV1]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
 // CHECK9-NEXT:    [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8
 // CHECK9-NEXT:    [[TMP18:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1
-// CHECK9-NEXT:    [[TOBOOL3:%.*]] = trunc i8 [[TMP18]] to i1
-// CHECK9-NEXT:    br i1 [[TOBOOL3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
+// CHECK9-NEXT:    [[LOADEDV2:%.*]] = trunc i8 [[TMP18]] to i1
+// CHECK9-NEXT:    br i1 [[LOADEDV2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
 // CHECK9:       omp_if.then:
 // CHECK9-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK9-NEXT:    store i64 [[TMP17]], ptr [[TMP19]], align 8
@@ -4624,42 +4624,42 @@ int main() {
 // CHECK9-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP24:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1
-// CHECK9-NEXT:    [[TOBOOL4:%.*]] = trunc i8 [[TMP24]] to i1
-// CHECK9-NEXT:    [[TMP25:%.*]] = select i1 [[TOBOOL4]], i32 0, i32 1
+// CHECK9-NEXT:    [[LOADEDV3:%.*]] = trunc i8 [[TMP24]] to i1
+// CHECK9-NEXT:    [[TMP25:%.*]] = select i1 [[LOADEDV3]], i32 0, i32 1
 // CHECK9-NEXT:    [[TMP26:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP25]], 0
-// CHECK9-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 3, ptr [[TMP27]], align 4
-// CHECK9-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1
+// CHECK9-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1
 // CHECK9-NEXT:    store i32 1, ptr [[TMP28]], align 4
-// CHECK9-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2
 // CHECK9-NEXT:    store ptr [[TMP22]], ptr [[TMP29]], align 8
-// CHECK9-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 3
+// CHECK9-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 3
 // CHECK9-NEXT:    store ptr [[TMP23]], ptr [[TMP30]], align 8
-// CHECK9-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 4
+// CHECK9-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 4
 // CHECK9-NEXT:    store ptr @.offload_sizes.1, ptr [[TMP31]], align 8
-// CHECK9-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 5
+// CHECK9-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 5
 // CHECK9-NEXT:    store ptr @.offload_maptypes.2, ptr [[TMP32]], align 8
-// CHECK9-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 6
+// CHECK9-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 6
 // CHECK9-NEXT:    store ptr null, ptr [[TMP33]], align 8
-// CHECK9-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 7
+// CHECK9-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 7
 // CHECK9-NEXT:    store ptr null, ptr [[TMP34]], align 8
-// CHECK9-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 8
+// CHECK9-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 8
 // CHECK9-NEXT:    store i64 100, ptr [[TMP35]], align 8
-// CHECK9-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 9
+// CHECK9-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 9
 // CHECK9-NEXT:    store i64 0, ptr [[TMP36]], align 8
-// CHECK9-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 10
+// CHECK9-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 10
 // CHECK9-NEXT:    store [3 x i32] zeroinitializer, ptr [[TMP37]], align 4
-// CHECK9-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 11
+// CHECK9-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 11
 // CHECK9-NEXT:    store [3 x i32] [[TMP26]], ptr [[TMP38]], align 4
-// CHECK9-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 12
+// CHECK9-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 12
 // CHECK9-NEXT:    store i32 0, ptr [[TMP39]], align 4
-// CHECK9-NEXT:    [[TMP40:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 [[TMP25]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l92.region_id, ptr [[KERNEL_ARGS6]])
+// CHECK9-NEXT:    [[TMP40:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 [[TMP25]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l92.region_id, ptr [[KERNEL_ARGS5]])
 // CHECK9-NEXT:    [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0
-// CHECK9-NEXT:    br i1 [[TMP41]], label [[OMP_OFFLOAD_FAILED7:%.*]], label [[OMP_OFFLOAD_CONT8:%.*]]
-// CHECK9:       omp_offload.failed7:
+// CHECK9-NEXT:    br i1 [[TMP41]], label [[OMP_OFFLOAD_FAILED6:%.*]], label [[OMP_OFFLOAD_CONT7:%.*]]
+// CHECK9:       omp_offload.failed6:
 // CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l92(i64 [[TMP17]]) #[[ATTR2]]
-// CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT8]]
-// CHECK9:       omp_offload.cont8:
+// CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT7]]
+// CHECK9:       omp_offload.cont7:
 // CHECK9-NEXT:    br label [[OMP_IF_END:%.*]]
 // CHECK9:       omp_if.else:
 // CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l92(i64 [[TMP17]]) #[[ATTR2]]
@@ -4986,9 +4986,9 @@ int main() {
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8
 // CHECK9-NEXT:    store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8
 // CHECK9-NEXT:    [[TMP0:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
-// CHECK9-NEXT:    [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1
-// CHECK9-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
-// CHECK9-NEXT:    store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
+// CHECK9-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP0]] to i1
+// CHECK9-NEXT:    [[STOREDV:%.*]] = zext i1 [[LOADEDV]] to i8
+// CHECK9-NEXT:    store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
 // CHECK9-NEXT:    [[TMP1:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8
 // CHECK9-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l92.omp_outlined, i64 [[TMP1]])
 // CHECK9-NEXT:    ret void
@@ -5043,8 +5043,8 @@ int main() {
 // CHECK9-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]]
 // CHECK9-NEXT:    [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
 // CHECK9-NEXT:    [[TMP11:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !llvm.access.group [[ACC_GRP36]]
-// CHECK9-NEXT:    [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1
-// CHECK9-NEXT:    br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
+// CHECK9-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP11]] to i1
+// CHECK9-NEXT:    br i1 [[LOADEDV]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
 // CHECK9:       omp_if.then:
 // CHECK9-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l92.omp_outlined.omp_outlined, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP36]]
 // CHECK9-NEXT:    br label [[OMP_IF_END:%.*]]
@@ -5166,8 +5166,8 @@ int main() {
 // CHECK9-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
-// CHECK9-NEXT:    [[_TMP4:%.*]] = alloca i32, align 4
-// CHECK9-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK9-NEXT:    [[_TMP3:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS4:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    store i32 [[ARG]], ptr [[ARG_ADDR]], align 4
 // CHECK9-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 3, ptr [[TMP0]], align 4
@@ -5205,12 +5205,12 @@ int main() {
 // CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l66() #[[ATTR2]]
 // CHECK9-NEXT:    [[TMP15:%.*]] = load i32, ptr [[ARG_ADDR]], align 4
 // CHECK9-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP15]], 0
-// CHECK9-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
-// CHECK9-NEXT:    store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1
+// CHECK9-NEXT:    [[STOREDV:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK9-NEXT:    store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR_]], align 1
 // CHECK9-NEXT:    [[TMP16:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1
-// CHECK9-NEXT:    [[TOBOOL1:%.*]] = trunc i8 [[TMP16]] to i1
-// CHECK9-NEXT:    [[FROMBOOL2:%.*]] = zext i1 [[TOBOOL1]] to i8
-// CHECK9-NEXT:    store i8 [[FROMBOOL2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
+// CHECK9-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP16]] to i1
+// CHECK9-NEXT:    [[STOREDV1:%.*]] = zext i1 [[LOADEDV]] to i8
+// CHECK9-NEXT:    store i8 [[STOREDV1]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
 // CHECK9-NEXT:    [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8
 // CHECK9-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK9-NEXT:    store i64 [[TMP17]], ptr [[TMP18]], align 8
@@ -5221,42 +5221,42 @@ int main() {
 // CHECK9-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP23:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1
-// CHECK9-NEXT:    [[TOBOOL3:%.*]] = trunc i8 [[TMP23]] to i1
-// CHECK9-NEXT:    [[TMP24:%.*]] = select i1 [[TOBOOL3]], i32 0, i32 1
+// CHECK9-NEXT:    [[LOADEDV2:%.*]] = trunc i8 [[TMP23]] to i1
+// CHECK9-NEXT:    [[TMP24:%.*]] = select i1 [[LOADEDV2]], i32 0, i32 1
 // CHECK9-NEXT:    [[TMP25:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP24]], 0
-// CHECK9-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 3, ptr [[TMP26]], align 4
-// CHECK9-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1
+// CHECK9-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 1
 // CHECK9-NEXT:    store i32 1, ptr [[TMP27]], align 4
-// CHECK9-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 2
 // CHECK9-NEXT:    store ptr [[TMP21]], ptr [[TMP28]], align 8
-// CHECK9-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 3
+// CHECK9-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 3
 // CHECK9-NEXT:    store ptr [[TMP22]], ptr [[TMP29]], align 8
-// CHECK9-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 4
+// CHECK9-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 4
 // CHECK9-NEXT:    store ptr @.offload_sizes.3, ptr [[TMP30]], align 8
-// CHECK9-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 5
+// CHECK9-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 5
 // CHECK9-NEXT:    store ptr @.offload_maptypes.4, ptr [[TMP31]], align 8
-// CHECK9-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 6
+// CHECK9-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 6
 // CHECK9-NEXT:    store ptr null, ptr [[TMP32]], align 8
-// CHECK9-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 7
+// CHECK9-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 7
 // CHECK9-NEXT:    store ptr null, ptr [[TMP33]], align 8
-// CHECK9-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 8
+// CHECK9-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 8
 // CHECK9-NEXT:    store i64 100, ptr [[TMP34]], align 8
-// CHECK9-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 9
+// CHECK9-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 9
 // CHECK9-NEXT:    store i64 0, ptr [[TMP35]], align 8
-// CHECK9-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 10
+// CHECK9-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 10
 // CHECK9-NEXT:    store [3 x i32] zeroinitializer, ptr [[TMP36]], align 4
-// CHECK9-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 11
+// CHECK9-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 11
 // CHECK9-NEXT:    store [3 x i32] [[TMP25]], ptr [[TMP37]], align 4
-// CHECK9-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 12
+// CHECK9-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 12
 // CHECK9-NEXT:    store i32 0, ptr [[TMP38]], align 4
-// CHECK9-NEXT:    [[TMP39:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 [[TMP24]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l70.region_id, ptr [[KERNEL_ARGS5]])
+// CHECK9-NEXT:    [[TMP39:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 [[TMP24]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l70.region_id, ptr [[KERNEL_ARGS4]])
 // CHECK9-NEXT:    [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0
-// CHECK9-NEXT:    br i1 [[TMP40]], label [[OMP_OFFLOAD_FAILED6:%.*]], label [[OMP_OFFLOAD_CONT7:%.*]]
-// CHECK9:       omp_offload.failed6:
+// CHECK9-NEXT:    br i1 [[TMP40]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]]
+// CHECK9:       omp_offload.failed5:
 // CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l70(i64 [[TMP17]]) #[[ATTR2]]
-// CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT7]]
-// CHECK9:       omp_offload.cont7:
+// CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT6]]
+// CHECK9:       omp_offload.cont6:
 // CHECK9-NEXT:    ret i32 0
 //
 //
@@ -5576,9 +5576,9 @@ int main() {
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8
 // CHECK9-NEXT:    store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8
 // CHECK9-NEXT:    [[TMP0:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
-// CHECK9-NEXT:    [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1
-// CHECK9-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
-// CHECK9-NEXT:    store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
+// CHECK9-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP0]] to i1
+// CHECK9-NEXT:    [[STOREDV:%.*]] = zext i1 [[LOADEDV]] to i8
+// CHECK9-NEXT:    store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
 // CHECK9-NEXT:    [[TMP1:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8
 // CHECK9-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l70.omp_outlined, i64 [[TMP1]])
 // CHECK9-NEXT:    ret void
@@ -5633,8 +5633,8 @@ int main() {
 // CHECK9-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP54]]
 // CHECK9-NEXT:    [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
 // CHECK9-NEXT:    [[TMP11:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !llvm.access.group [[ACC_GRP54]]
-// CHECK9-NEXT:    [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1
-// CHECK9-NEXT:    br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
+// CHECK9-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP11]] to i1
+// CHECK9-NEXT:    br i1 [[LOADEDV]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
 // CHECK9:       omp_if.then:
 // CHECK9-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l70.omp_outlined.omp_outlined, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP54]]
 // CHECK9-NEXT:    br label [[OMP_IF_END:%.*]]
@@ -6170,8 +6170,8 @@ int main() {
 // CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK11-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
-// CHECK11-NEXT:    [[_TMP5:%.*]] = alloca i32, align 4
-// CHECK11-NEXT:    [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK11-NEXT:    [[_TMP4:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK11-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 3, ptr [[TMP0]], align 4
@@ -6209,16 +6209,16 @@ int main() {
 // CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l85() #[[ATTR2]]
 // CHECK11-NEXT:    [[TMP15:%.*]] = load i32, ptr @Arg, align 4
 // CHECK11-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP15]], 0
-// CHECK11-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
-// CHECK11-NEXT:    store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1
+// CHECK11-NEXT:    [[STOREDV:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK11-NEXT:    store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR_]], align 1
 // CHECK11-NEXT:    [[TMP16:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1
-// CHECK11-NEXT:    [[TOBOOL1:%.*]] = trunc i8 [[TMP16]] to i1
-// CHECK11-NEXT:    [[FROMBOOL2:%.*]] = zext i1 [[TOBOOL1]] to i8
-// CHECK11-NEXT:    store i8 [[FROMBOOL2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
+// CHECK11-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP16]] to i1
+// CHECK11-NEXT:    [[STOREDV1:%.*]] = zext i1 [[LOADEDV]] to i8
+// CHECK11-NEXT:    store i8 [[STOREDV1]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
 // CHECK11-NEXT:    [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8
 // CHECK11-NEXT:    [[TMP18:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1
-// CHECK11-NEXT:    [[TOBOOL3:%.*]] = trunc i8 [[TMP18]] to i1
-// CHECK11-NEXT:    br i1 [[TOBOOL3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
+// CHECK11-NEXT:    [[LOADEDV2:%.*]] = trunc i8 [[TMP18]] to i1
+// CHECK11-NEXT:    br i1 [[LOADEDV2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
 // CHECK11:       omp_if.then:
 // CHECK11-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK11-NEXT:    store i64 [[TMP17]], ptr [[TMP19]], align 8
@@ -6229,42 +6229,42 @@ int main() {
 // CHECK11-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP24:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1
-// CHECK11-NEXT:    [[TOBOOL4:%.*]] = trunc i8 [[TMP24]] to i1
-// CHECK11-NEXT:    [[TMP25:%.*]] = select i1 [[TOBOOL4]], i32 0, i32 1
+// CHECK11-NEXT:    [[LOADEDV3:%.*]] = trunc i8 [[TMP24]] to i1
+// CHECK11-NEXT:    [[TMP25:%.*]] = select i1 [[LOADEDV3]], i32 0, i32 1
 // CHECK11-NEXT:    [[TMP26:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP25]], 0
-// CHECK11-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 3, ptr [[TMP27]], align 4
-// CHECK11-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1
+// CHECK11-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1
 // CHECK11-NEXT:    store i32 1, ptr [[TMP28]], align 4
-// CHECK11-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2
 // CHECK11-NEXT:    store ptr [[TMP22]], ptr [[TMP29]], align 8
-// CHECK11-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 3
+// CHECK11-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 3
 // CHECK11-NEXT:    store ptr [[TMP23]], ptr [[TMP30]], align 8
-// CHECK11-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 4
+// CHECK11-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 4
 // CHECK11-NEXT:    store ptr @.offload_sizes.2, ptr [[TMP31]], align 8
-// CHECK11-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 5
+// CHECK11-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 5
 // CHECK11-NEXT:    store ptr @.offload_maptypes.3, ptr [[TMP32]], align 8
-// CHECK11-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 6
+// CHECK11-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 6
 // CHECK11-NEXT:    store ptr null, ptr [[TMP33]], align 8
-// CHECK11-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 7
+// CHECK11-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 7
 // CHECK11-NEXT:    store ptr null, ptr [[TMP34]], align 8
-// CHECK11-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 8
+// CHECK11-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 8
 // CHECK11-NEXT:    store i64 100, ptr [[TMP35]], align 8
-// CHECK11-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 9
+// CHECK11-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 9
 // CHECK11-NEXT:    store i64 0, ptr [[TMP36]], align 8
-// CHECK11-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 10
+// CHECK11-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 10
 // CHECK11-NEXT:    store [3 x i32] zeroinitializer, ptr [[TMP37]], align 4
-// CHECK11-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 11
+// CHECK11-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 11
 // CHECK11-NEXT:    store [3 x i32] [[TMP26]], ptr [[TMP38]], align 4
-// CHECK11-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 12
+// CHECK11-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 12
 // CHECK11-NEXT:    store i32 0, ptr [[TMP39]], align 4
-// CHECK11-NEXT:    [[TMP40:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 [[TMP25]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l92.region_id, ptr [[KERNEL_ARGS6]])
+// CHECK11-NEXT:    [[TMP40:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 [[TMP25]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l92.region_id, ptr [[KERNEL_ARGS5]])
 // CHECK11-NEXT:    [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0
-// CHECK11-NEXT:    br i1 [[TMP41]], label [[OMP_OFFLOAD_FAILED7:%.*]], label [[OMP_OFFLOAD_CONT8:%.*]]
-// CHECK11:       omp_offload.failed7:
+// CHECK11-NEXT:    br i1 [[TMP41]], label [[OMP_OFFLOAD_FAILED6:%.*]], label [[OMP_OFFLOAD_CONT7:%.*]]
+// CHECK11:       omp_offload.failed6:
 // CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l92(i64 [[TMP17]]) #[[ATTR2]]
-// CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT8]]
-// CHECK11:       omp_offload.cont8:
+// CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT7]]
+// CHECK11:       omp_offload.cont7:
 // CHECK11-NEXT:    br label [[OMP_IF_END:%.*]]
 // CHECK11:       omp_if.else:
 // CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l92(i64 [[TMP17]]) #[[ATTR2]]
@@ -6591,9 +6591,9 @@ int main() {
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8
 // CHECK11-NEXT:    store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8
 // CHECK11-NEXT:    [[TMP0:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
-// CHECK11-NEXT:    [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1
-// CHECK11-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
-// CHECK11-NEXT:    store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
+// CHECK11-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP0]] to i1
+// CHECK11-NEXT:    [[STOREDV:%.*]] = zext i1 [[LOADEDV]] to i8
+// CHECK11-NEXT:    store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
 // CHECK11-NEXT:    [[TMP1:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8
 // CHECK11-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l92.omp_outlined, i64 [[TMP1]])
 // CHECK11-NEXT:    ret void
@@ -6640,8 +6640,8 @@ int main() {
 // CHECK11-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
 // CHECK11-NEXT:    store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
 // CHECK11-NEXT:    [[TMP5:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
-// CHECK11-NEXT:    [[TOBOOL:%.*]] = trunc i8 [[TMP5]] to i1
-// CHECK11-NEXT:    br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE5:%.*]]
+// CHECK11-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP5]] to i1
+// CHECK11-NEXT:    br i1 [[LOADEDV]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE5:%.*]]
 // CHECK11:       omp_if.then:
 // CHECK11-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
 // CHECK11:       omp.inner.for.cond:
@@ -6655,13 +6655,13 @@ int main() {
 // CHECK11-NEXT:    [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP34]]
 // CHECK11-NEXT:    [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
 // CHECK11-NEXT:    [[TMP12:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !llvm.access.group [[ACC_GRP34]]
-// CHECK11-NEXT:    [[TOBOOL2:%.*]] = trunc i8 [[TMP12]] to i1
-// CHECK11-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TOBOOL2]] to i8
-// CHECK11-NEXT:    store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1, !llvm.access.group [[ACC_GRP34]]
+// CHECK11-NEXT:    [[LOADEDV2:%.*]] = trunc i8 [[TMP12]] to i1
+// CHECK11-NEXT:    [[STOREDV:%.*]] = zext i1 [[LOADEDV2]] to i8
+// CHECK11-NEXT:    store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1, !llvm.access.group [[ACC_GRP34]]
 // CHECK11-NEXT:    [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group [[ACC_GRP34]]
 // CHECK11-NEXT:    [[TMP14:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !llvm.access.group [[ACC_GRP34]]
-// CHECK11-NEXT:    [[TOBOOL3:%.*]] = trunc i8 [[TMP14]] to i1
-// CHECK11-NEXT:    br i1 [[TOBOOL3]], label [[OMP_IF_THEN4:%.*]], label [[OMP_IF_ELSE:%.*]]
+// CHECK11-NEXT:    [[LOADEDV3:%.*]] = trunc i8 [[TMP14]] to i1
+// CHECK11-NEXT:    br i1 [[LOADEDV3]], label [[OMP_IF_THEN4:%.*]], label [[OMP_IF_ELSE:%.*]]
 // CHECK11:       omp_if.then4:
 // CHECK11-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l92.omp_outlined.omp_outlined, i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]), !llvm.access.group [[ACC_GRP34]]
 // CHECK11-NEXT:    br label [[OMP_IF_END:%.*]]
@@ -6695,13 +6695,13 @@ int main() {
 // CHECK11-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
 // CHECK11-NEXT:    [[TMP23:%.*]] = zext i32 [[TMP22]] to i64
 // CHECK11-NEXT:    [[TMP24:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
-// CHECK11-NEXT:    [[TOBOOL9:%.*]] = trunc i8 [[TMP24]] to i1
-// CHECK11-NEXT:    [[FROMBOOL11:%.*]] = zext i1 [[TOBOOL9]] to i8
-// CHECK11-NEXT:    store i8 [[FROMBOOL11]], ptr [[DOTCAPTURE_EXPR__CASTED10]], align 1
+// CHECK11-NEXT:    [[LOADEDV9:%.*]] = trunc i8 [[TMP24]] to i1
+// CHECK11-NEXT:    [[STOREDV11:%.*]] = zext i1 [[LOADEDV9]] to i8
+// CHECK11-NEXT:    store i8 [[STOREDV11]], ptr [[DOTCAPTURE_EXPR__CASTED10]], align 1
 // CHECK11-NEXT:    [[TMP25:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED10]], align 8
 // CHECK11-NEXT:    [[TMP26:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
-// CHECK11-NEXT:    [[TOBOOL12:%.*]] = trunc i8 [[TMP26]] to i1
-// CHECK11-NEXT:    br i1 [[TOBOOL12]], label [[OMP_IF_THEN13:%.*]], label [[OMP_IF_ELSE14:%.*]]
+// CHECK11-NEXT:    [[LOADEDV12:%.*]] = trunc i8 [[TMP26]] to i1
+// CHECK11-NEXT:    br i1 [[LOADEDV12]], label [[OMP_IF_THEN13:%.*]], label [[OMP_IF_ELSE14:%.*]]
 // CHECK11:       omp_if.then13:
 // CHECK11-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l92.omp_outlined.omp_outlined.1, i64 [[TMP21]], i64 [[TMP23]], i64 [[TMP25]])
 // CHECK11-NEXT:    br label [[OMP_IF_END16:%.*]]
@@ -6767,8 +6767,8 @@ int main() {
 // CHECK11-NEXT:    store i32 1, ptr [[DOTOMP_STRIDE]], align 4
 // CHECK11-NEXT:    store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK11-NEXT:    [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
-// CHECK11-NEXT:    [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1
-// CHECK11-NEXT:    br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
+// CHECK11-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
+// CHECK11-NEXT:    br i1 [[LOADEDV]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
 // CHECK11:       omp_if.then:
 // CHECK11-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK11-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
@@ -6894,8 +6894,8 @@ int main() {
 // CHECK11-NEXT:    store i32 1, ptr [[DOTOMP_STRIDE]], align 4
 // CHECK11-NEXT:    store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK11-NEXT:    [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
-// CHECK11-NEXT:    [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1
-// CHECK11-NEXT:    br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
+// CHECK11-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
+// CHECK11-NEXT:    br i1 [[LOADEDV]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
 // CHECK11:       omp_if.then:
 // CHECK11-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK11-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
@@ -7001,8 +7001,8 @@ int main() {
 // CHECK11-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK11-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
-// CHECK11-NEXT:    [[_TMP4:%.*]] = alloca i32, align 4
-// CHECK11-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK11-NEXT:    [[_TMP3:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS4:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    store i32 [[ARG]], ptr [[ARG_ADDR]], align 4
 // CHECK11-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 3, ptr [[TMP0]], align 4
@@ -7040,12 +7040,12 @@ int main() {
 // CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l66() #[[ATTR2]]
 // CHECK11-NEXT:    [[TMP15:%.*]] = load i32, ptr [[ARG_ADDR]], align 4
 // CHECK11-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP15]], 0
-// CHECK11-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
-// CHECK11-NEXT:    store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1
+// CHECK11-NEXT:    [[STOREDV:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK11-NEXT:    store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR_]], align 1
 // CHECK11-NEXT:    [[TMP16:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1
-// CHECK11-NEXT:    [[TOBOOL1:%.*]] = trunc i8 [[TMP16]] to i1
-// CHECK11-NEXT:    [[FROMBOOL2:%.*]] = zext i1 [[TOBOOL1]] to i8
-// CHECK11-NEXT:    store i8 [[FROMBOOL2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
+// CHECK11-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP16]] to i1
+// CHECK11-NEXT:    [[STOREDV1:%.*]] = zext i1 [[LOADEDV]] to i8
+// CHECK11-NEXT:    store i8 [[STOREDV1]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
 // CHECK11-NEXT:    [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8
 // CHECK11-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK11-NEXT:    store i64 [[TMP17]], ptr [[TMP18]], align 8
@@ -7056,42 +7056,42 @@ int main() {
 // CHECK11-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP23:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1
-// CHECK11-NEXT:    [[TOBOOL3:%.*]] = trunc i8 [[TMP23]] to i1
-// CHECK11-NEXT:    [[TMP24:%.*]] = select i1 [[TOBOOL3]], i32 0, i32 1
+// CHECK11-NEXT:    [[LOADEDV2:%.*]] = trunc i8 [[TMP23]] to i1
+// CHECK11-NEXT:    [[TMP24:%.*]] = select i1 [[LOADEDV2]], i32 0, i32 1
 // CHECK11-NEXT:    [[TMP25:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP24]], 0
-// CHECK11-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 3, ptr [[TMP26]], align 4
-// CHECK11-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1
+// CHECK11-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 1
 // CHECK11-NEXT:    store i32 1, ptr [[TMP27]], align 4
-// CHECK11-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 2
 // CHECK11-NEXT:    store ptr [[TMP21]], ptr [[TMP28]], align 8
-// CHECK11-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 3
+// CHECK11-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 3
 // CHECK11-NEXT:    store ptr [[TMP22]], ptr [[TMP29]], align 8
-// CHECK11-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 4
+// CHECK11-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 4
 // CHECK11-NEXT:    store ptr @.offload_sizes.4, ptr [[TMP30]], align 8
-// CHECK11-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 5
+// CHECK11-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 5
 // CHECK11-NEXT:    store ptr @.offload_maptypes.5, ptr [[TMP31]], align 8
-// CHECK11-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 6
+// CHECK11-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 6
 // CHECK11-NEXT:    store ptr null, ptr [[TMP32]], align 8
-// CHECK11-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 7
+// CHECK11-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 7
 // CHECK11-NEXT:    store ptr null, ptr [[TMP33]], align 8
-// CHECK11-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 8
+// CHECK11-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 8
 // CHECK11-NEXT:    store i64 100, ptr [[TMP34]], align 8
-// CHECK11-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 9
+// CHECK11-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 9
 // CHECK11-NEXT:    store i64 0, ptr [[TMP35]], align 8
-// CHECK11-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 10
+// CHECK11-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 10
 // CHECK11-NEXT:    store [3 x i32] zeroinitializer, ptr [[TMP36]], align 4
-// CHECK11-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 11
+// CHECK11-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 11
 // CHECK11-NEXT:    store [3 x i32] [[TMP25]], ptr [[TMP37]], align 4
-// CHECK11-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 12
+// CHECK11-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 12
 // CHECK11-NEXT:    store i32 0, ptr [[TMP38]], align 4
-// CHECK11-NEXT:    [[TMP39:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 [[TMP24]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l70.region_id, ptr [[KERNEL_ARGS5]])
+// CHECK11-NEXT:    [[TMP39:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 [[TMP24]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l70.region_id, ptr [[KERNEL_ARGS4]])
 // CHECK11-NEXT:    [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0
-// CHECK11-NEXT:    br i1 [[TMP40]], label [[OMP_OFFLOAD_FAILED6:%.*]], label [[OMP_OFFLOAD_CONT7:%.*]]
-// CHECK11:       omp_offload.failed6:
+// CHECK11-NEXT:    br i1 [[TMP40]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]]
+// CHECK11:       omp_offload.failed5:
 // CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l70(i64 [[TMP17]]) #[[ATTR2]]
-// CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT7]]
-// CHECK11:       omp_offload.cont7:
+// CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT6]]
+// CHECK11:       omp_offload.cont6:
 // CHECK11-NEXT:    ret i32 0
 //
 //
@@ -7411,9 +7411,9 @@ int main() {
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8
 // CHECK11-NEXT:    store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8
 // CHECK11-NEXT:    [[TMP0:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
-// CHECK11-NEXT:    [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1
-// CHECK11-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
-// CHECK11-NEXT:    store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
+// CHECK11-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP0]] to i1
+// CHECK11-NEXT:    [[STOREDV:%.*]] = zext i1 [[LOADEDV]] to i8
+// CHECK11-NEXT:    store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
 // CHECK11-NEXT:    [[TMP1:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8
 // CHECK11-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l70.omp_outlined, i64 [[TMP1]])
 // CHECK11-NEXT:    ret void
@@ -7468,8 +7468,8 @@ int main() {
 // CHECK11-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP54]]
 // CHECK11-NEXT:    [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
 // CHECK11-NEXT:    [[TMP11:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !llvm.access.group [[ACC_GRP54]]
-// CHECK11-NEXT:    [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1
-// CHECK11-NEXT:    br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
+// CHECK11-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP11]] to i1
+// CHECK11-NEXT:    br i1 [[LOADEDV]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
 // CHECK11:       omp_if.then:
 // CHECK11-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l70.omp_outlined.omp_outlined, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP54]]
 // CHECK11-NEXT:    br label [[OMP_IF_END:%.*]]
@@ -7723,8 +7723,8 @@ int main() {
 // CHECK13-NEXT:    store i32 100, ptr [[I6]], align 4
 // CHECK13-NEXT:    [[TMP10:%.*]] = load i32, ptr @Arg, align 4
 // CHECK13-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP10]], 0
-// CHECK13-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
-// CHECK13-NEXT:    store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1
+// CHECK13-NEXT:    [[STOREDV:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK13-NEXT:    store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR_]], align 1
 // CHECK13-NEXT:    store i32 0, ptr [[DOTOMP_LB17]], align 4
 // CHECK13-NEXT:    store i32 99, ptr [[DOTOMP_UB18]], align 4
 // CHECK13-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB17]], align 4
@@ -7831,8 +7831,8 @@ int main() {
 // CHECK13-NEXT:    store i32 100, ptr [[I6]], align 4
 // CHECK13-NEXT:    [[TMP10:%.*]] = load i32, ptr [[ARG_ADDR]], align 4
 // CHECK13-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP10]], 0
-// CHECK13-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
-// CHECK13-NEXT:    store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1
+// CHECK13-NEXT:    [[STOREDV:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK13-NEXT:    store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR_]], align 1
 // CHECK13-NEXT:    store i32 0, ptr [[DOTOMP_LB17]], align 4
 // CHECK13-NEXT:    store i32 99, ptr [[DOTOMP_UB18]], align 4
 // CHECK13-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB17]], align 4
@@ -8005,60 +8005,60 @@ int main() {
 // CHECK15-NEXT:    store i32 100, ptr [[I6]], align 4
 // CHECK15-NEXT:    [[TMP10:%.*]] = load i32, ptr @Arg, align 4
 // CHECK15-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP10]], 0
-// CHECK15-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
-// CHECK15-NEXT:    store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1
+// CHECK15-NEXT:    [[STOREDV:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK15-NEXT:    store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR_]], align 1
 // CHECK15-NEXT:    store i32 0, ptr [[DOTOMP_LB17]], align 4
 // CHECK15-NEXT:    store i32 99, ptr [[DOTOMP_UB18]], align 4
 // CHECK15-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB17]], align 4
 // CHECK15-NEXT:    store i32 [[TMP11]], ptr [[DOTOMP_IV19]], align 4
 // CHECK15-NEXT:    [[TMP12:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1
-// CHECK15-NEXT:    [[TOBOOL21:%.*]] = trunc i8 [[TMP12]] to i1
-// CHECK15-NEXT:    br i1 [[TOBOOL21]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
+// CHECK15-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP12]] to i1
+// CHECK15-NEXT:    br i1 [[LOADEDV]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
 // CHECK15:       omp_if.then:
-// CHECK15-NEXT:    br label [[OMP_INNER_FOR_COND22:%.*]]
-// CHECK15:       omp.inner.for.cond22:
+// CHECK15-NEXT:    br label [[OMP_INNER_FOR_COND21:%.*]]
+// CHECK15:       omp.inner.for.cond21:
 // CHECK15-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV19]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]]
 // CHECK15-NEXT:    [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB18]], align 4, !llvm.access.group [[ACC_GRP15]]
-// CHECK15-NEXT:    [[CMP23:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]]
-// CHECK15-NEXT:    br i1 [[CMP23]], label [[OMP_INNER_FOR_BODY24:%.*]], label [[OMP_INNER_FOR_END30:%.*]]
-// CHECK15:       omp.inner.for.body24:
+// CHECK15-NEXT:    [[CMP22:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]]
+// CHECK15-NEXT:    br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]]
+// CHECK15:       omp.inner.for.body23:
 // CHECK15-NEXT:    [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV19]], align 4, !llvm.access.group [[ACC_GRP15]]
-// CHECK15-NEXT:    [[MUL25:%.*]] = mul nsw i32 [[TMP15]], 1
-// CHECK15-NEXT:    [[ADD26:%.*]] = add nsw i32 0, [[MUL25]]
-// CHECK15-NEXT:    store i32 [[ADD26]], ptr [[I20]], align 4, !llvm.access.group [[ACC_GRP15]]
+// CHECK15-NEXT:    [[MUL24:%.*]] = mul nsw i32 [[TMP15]], 1
+// CHECK15-NEXT:    [[ADD25:%.*]] = add nsw i32 0, [[MUL24]]
+// CHECK15-NEXT:    store i32 [[ADD25]], ptr [[I20]], align 4, !llvm.access.group [[ACC_GRP15]]
 // CHECK15-NEXT:    call void @_Z3fn6v(), !llvm.access.group [[ACC_GRP15]]
-// CHECK15-NEXT:    br label [[OMP_BODY_CONTINUE27:%.*]]
-// CHECK15:       omp.body.continue27:
-// CHECK15-NEXT:    br label [[OMP_INNER_FOR_INC28:%.*]]
-// CHECK15:       omp.inner.for.inc28:
+// CHECK15-NEXT:    br label [[OMP_BODY_CONTINUE26:%.*]]
+// CHECK15:       omp.body.continue26:
+// CHECK15-NEXT:    br label [[OMP_INNER_FOR_INC27:%.*]]
+// CHECK15:       omp.inner.for.inc27:
 // CHECK15-NEXT:    [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV19]], align 4, !llvm.access.group [[ACC_GRP15]]
-// CHECK15-NEXT:    [[ADD29:%.*]] = add nsw i32 [[TMP16]], 1
-// CHECK15-NEXT:    store i32 [[ADD29]], ptr [[DOTOMP_IV19]], align 4, !llvm.access.group [[ACC_GRP15]]
-// CHECK15-NEXT:    br label [[OMP_INNER_FOR_COND22]], !llvm.loop [[LOOP16:![0-9]+]]
-// CHECK15:       omp.inner.for.end30:
+// CHECK15-NEXT:    [[ADD28:%.*]] = add nsw i32 [[TMP16]], 1
+// CHECK15-NEXT:    store i32 [[ADD28]], ptr [[DOTOMP_IV19]], align 4, !llvm.access.group [[ACC_GRP15]]
+// CHECK15-NEXT:    br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP16:![0-9]+]]
+// CHECK15:       omp.inner.for.end29:
 // CHECK15-NEXT:    br label [[OMP_IF_END:%.*]]
 // CHECK15:       omp_if.else:
-// CHECK15-NEXT:    br label [[OMP_INNER_FOR_COND31:%.*]]
-// CHECK15:       omp.inner.for.cond31:
+// CHECK15-NEXT:    br label [[OMP_INNER_FOR_COND30:%.*]]
+// CHECK15:       omp.inner.for.cond30:
 // CHECK15-NEXT:    [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV19]], align 4
 // CHECK15-NEXT:    [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB18]], align 4
-// CHECK15-NEXT:    [[CMP32:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]]
-// CHECK15-NEXT:    br i1 [[CMP32]], label [[OMP_INNER_FOR_BODY33:%.*]], label [[OMP_INNER_FOR_END39:%.*]]
-// CHECK15:       omp.inner.for.body33:
+// CHECK15-NEXT:    [[CMP31:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]]
+// CHECK15-NEXT:    br i1 [[CMP31]], label [[OMP_INNER_FOR_BODY32:%.*]], label [[OMP_INNER_FOR_END38:%.*]]
+// CHECK15:       omp.inner.for.body32:
 // CHECK15-NEXT:    [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV19]], align 4
-// CHECK15-NEXT:    [[MUL34:%.*]] = mul nsw i32 [[TMP19]], 1
-// CHECK15-NEXT:    [[ADD35:%.*]] = add nsw i32 0, [[MUL34]]
-// CHECK15-NEXT:    store i32 [[ADD35]], ptr [[I20]], align 4
+// CHECK15-NEXT:    [[MUL33:%.*]] = mul nsw i32 [[TMP19]], 1
+// CHECK15-NEXT:    [[ADD34:%.*]] = add nsw i32 0, [[MUL33]]
+// CHECK15-NEXT:    store i32 [[ADD34]], ptr [[I20]], align 4
 // CHECK15-NEXT:    call void @_Z3fn6v()
-// CHECK15-NEXT:    br label [[OMP_BODY_CONTINUE36:%.*]]
-// CHECK15:       omp.body.continue36:
-// CHECK15-NEXT:    br label [[OMP_INNER_FOR_INC37:%.*]]
-// CHECK15:       omp.inner.for.inc37:
+// CHECK15-NEXT:    br label [[OMP_BODY_CONTINUE35:%.*]]
+// CHECK15:       omp.body.continue35:
+// CHECK15-NEXT:    br label [[OMP_INNER_FOR_INC36:%.*]]
+// CHECK15:       omp.inner.for.inc36:
 // CHECK15-NEXT:    [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV19]], align 4
-// CHECK15-NEXT:    [[ADD38:%.*]] = add nsw i32 [[TMP20]], 1
-// CHECK15-NEXT:    store i32 [[ADD38]], ptr [[DOTOMP_IV19]], align 4
-// CHECK15-NEXT:    br label [[OMP_INNER_FOR_COND31]], !llvm.loop [[LOOP18:![0-9]+]]
-// CHECK15:       omp.inner.for.end39:
+// CHECK15-NEXT:    [[ADD37:%.*]] = add nsw i32 [[TMP20]], 1
+// CHECK15-NEXT:    store i32 [[ADD37]], ptr [[DOTOMP_IV19]], align 4
+// CHECK15-NEXT:    br label [[OMP_INNER_FOR_COND30]], !llvm.loop [[LOOP18:![0-9]+]]
+// CHECK15:       omp.inner.for.end38:
 // CHECK15-NEXT:    br label [[OMP_IF_END]]
 // CHECK15:       omp_if.end:
 // CHECK15-NEXT:    store i32 100, ptr [[I20]], align 4
@@ -8142,8 +8142,8 @@ int main() {
 // CHECK15-NEXT:    store i32 100, ptr [[I6]], align 4
 // CHECK15-NEXT:    [[TMP10:%.*]] = load i32, ptr [[ARG_ADDR]], align 4
 // CHECK15-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP10]], 0
-// CHECK15-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
-// CHECK15-NEXT:    store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1
+// CHECK15-NEXT:    [[STOREDV:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK15-NEXT:    store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR_]], align 1
 // CHECK15-NEXT:    store i32 0, ptr [[DOTOMP_LB17]], align 4
 // CHECK15-NEXT:    store i32 99, ptr [[DOTOMP_UB18]], align 4
 // CHECK15-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB17]], align 4
diff --git a/clang/test/OpenMP/target_teams_generic_loop_if_codegen.cpp b/clang/test/OpenMP/target_teams_generic_loop_if_codegen.cpp
index e1a6aad65b796..ccc877723ef9d 100644
--- a/clang/test/OpenMP/target_teams_generic_loop_if_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_generic_loop_if_codegen.cpp
@@ -363,8 +363,8 @@ int main() {
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
-// CHECK1-NEXT:    [[_TMP5:%.*]] = alloca i32, align 4
-// CHECK1-NEXT:    [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK1-NEXT:    [[_TMP4:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 3, ptr [[TMP0]], align 4
@@ -402,16 +402,16 @@ int main() {
 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83() #[[ATTR2]]
 // CHECK1-NEXT:    [[TMP15:%.*]] = load i32, ptr @Arg, align 4
 // CHECK1-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP15]], 0
-// CHECK1-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
-// CHECK1-NEXT:    store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1
+// CHECK1-NEXT:    [[STOREDV:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK1-NEXT:    store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR_]], align 1
 // CHECK1-NEXT:    [[TMP16:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1
-// CHECK1-NEXT:    [[TOBOOL1:%.*]] = trunc i8 [[TMP16]] to i1
-// CHECK1-NEXT:    [[FROMBOOL2:%.*]] = zext i1 [[TOBOOL1]] to i8
-// CHECK1-NEXT:    store i8 [[FROMBOOL2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
+// CHECK1-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP16]] to i1
+// CHECK1-NEXT:    [[STOREDV1:%.*]] = zext i1 [[LOADEDV]] to i8
+// CHECK1-NEXT:    store i8 [[STOREDV1]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
 // CHECK1-NEXT:    [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8
 // CHECK1-NEXT:    [[TMP18:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1
-// CHECK1-NEXT:    [[TOBOOL3:%.*]] = trunc i8 [[TMP18]] to i1
-// CHECK1-NEXT:    br i1 [[TOBOOL3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
+// CHECK1-NEXT:    [[LOADEDV2:%.*]] = trunc i8 [[TMP18]] to i1
+// CHECK1-NEXT:    br i1 [[LOADEDV2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
 // CHECK1:       omp_if.then:
 // CHECK1-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    store i64 [[TMP17]], ptr [[TMP19]], align 8
@@ -422,42 +422,42 @@ int main() {
 // CHECK1-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP24:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1
-// CHECK1-NEXT:    [[TOBOOL4:%.*]] = trunc i8 [[TMP24]] to i1
-// CHECK1-NEXT:    [[TMP25:%.*]] = select i1 [[TOBOOL4]], i32 0, i32 1
+// CHECK1-NEXT:    [[LOADEDV3:%.*]] = trunc i8 [[TMP24]] to i1
+// CHECK1-NEXT:    [[TMP25:%.*]] = select i1 [[LOADEDV3]], i32 0, i32 1
 // CHECK1-NEXT:    [[TMP26:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP25]], 0
-// CHECK1-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 3, ptr [[TMP27]], align 4
-// CHECK1-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1
+// CHECK1-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1
 // CHECK1-NEXT:    store i32 1, ptr [[TMP28]], align 4
-// CHECK1-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 2
+// CHECK1-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2
 // CHECK1-NEXT:    store ptr [[TMP22]], ptr [[TMP29]], align 8
-// CHECK1-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 3
+// CHECK1-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 3
 // CHECK1-NEXT:    store ptr [[TMP23]], ptr [[TMP30]], align 8
-// CHECK1-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 4
+// CHECK1-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 4
 // CHECK1-NEXT:    store ptr @.offload_sizes, ptr [[TMP31]], align 8
-// CHECK1-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 5
+// CHECK1-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 5
 // CHECK1-NEXT:    store ptr @.offload_maptypes, ptr [[TMP32]], align 8
-// CHECK1-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 6
+// CHECK1-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 6
 // CHECK1-NEXT:    store ptr null, ptr [[TMP33]], align 8
-// CHECK1-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 7
+// CHECK1-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 7
 // CHECK1-NEXT:    store ptr null, ptr [[TMP34]], align 8
-// CHECK1-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 8
+// CHECK1-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 8
 // CHECK1-NEXT:    store i64 100, ptr [[TMP35]], align 8
-// CHECK1-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 9
+// CHECK1-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 9
 // CHECK1-NEXT:    store i64 0, ptr [[TMP36]], align 8
-// CHECK1-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 10
+// CHECK1-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 10
 // CHECK1-NEXT:    store [3 x i32] zeroinitializer, ptr [[TMP37]], align 4
-// CHECK1-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 11
+// CHECK1-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 11
 // CHECK1-NEXT:    store [3 x i32] [[TMP26]], ptr [[TMP38]], align 4
-// CHECK1-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 12
+// CHECK1-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 12
 // CHECK1-NEXT:    store i32 0, ptr [[TMP39]], align 4
-// CHECK1-NEXT:    [[TMP40:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 [[TMP25]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l90.region_id, ptr [[KERNEL_ARGS6]])
+// CHECK1-NEXT:    [[TMP40:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 [[TMP25]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l90.region_id, ptr [[KERNEL_ARGS5]])
 // CHECK1-NEXT:    [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0
-// CHECK1-NEXT:    br i1 [[TMP41]], label [[OMP_OFFLOAD_FAILED7:%.*]], label [[OMP_OFFLOAD_CONT8:%.*]]
-// CHECK1:       omp_offload.failed7:
+// CHECK1-NEXT:    br i1 [[TMP41]], label [[OMP_OFFLOAD_FAILED6:%.*]], label [[OMP_OFFLOAD_CONT7:%.*]]
+// CHECK1:       omp_offload.failed6:
 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l90(i64 [[TMP17]]) #[[ATTR2]]
-// CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT8]]
-// CHECK1:       omp_offload.cont8:
+// CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT7]]
+// CHECK1:       omp_offload.cont7:
 // CHECK1-NEXT:    br label [[OMP_IF_END:%.*]]
 // CHECK1:       omp_if.else:
 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l90(i64 [[TMP17]]) #[[ATTR2]]
@@ -611,9 +611,9 @@ int main() {
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8
 // CHECK1-NEXT:    store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8
 // CHECK1-NEXT:    [[TMP0:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
-// CHECK1-NEXT:    [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1
-// CHECK1-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
-// CHECK1-NEXT:    store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
+// CHECK1-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP0]] to i1
+// CHECK1-NEXT:    [[STOREDV:%.*]] = zext i1 [[LOADEDV]] to i8
+// CHECK1-NEXT:    store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
 // CHECK1-NEXT:    [[TMP1:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8
 // CHECK1-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l90.omp_outlined, i64 [[TMP1]])
 // CHECK1-NEXT:    ret void
@@ -693,8 +693,8 @@ int main() {
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
-// CHECK1-NEXT:    [[_TMP4:%.*]] = alloca i32, align 4
-// CHECK1-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK1-NEXT:    [[_TMP3:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS4:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    store i32 [[ARG]], ptr [[ARG_ADDR]], align 4
 // CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 3, ptr [[TMP0]], align 4
@@ -732,16 +732,16 @@ int main() {
 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l64() #[[ATTR2]]
 // CHECK1-NEXT:    [[TMP15:%.*]] = load i32, ptr [[ARG_ADDR]], align 4
 // CHECK1-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP15]], 0
-// CHECK1-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
-// CHECK1-NEXT:    store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1
+// CHECK1-NEXT:    [[STOREDV:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK1-NEXT:    store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR_]], align 1
 // CHECK1-NEXT:    [[TMP16:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1
-// CHECK1-NEXT:    [[TOBOOL1:%.*]] = trunc i8 [[TMP16]] to i1
-// CHECK1-NEXT:    [[FROMBOOL2:%.*]] = zext i1 [[TOBOOL1]] to i8
-// CHECK1-NEXT:    store i8 [[FROMBOOL2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
+// CHECK1-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP16]] to i1
+// CHECK1-NEXT:    [[STOREDV1:%.*]] = zext i1 [[LOADEDV]] to i8
+// CHECK1-NEXT:    store i8 [[STOREDV1]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
 // CHECK1-NEXT:    [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8
 // CHECK1-NEXT:    [[TMP18:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1
-// CHECK1-NEXT:    [[TOBOOL3:%.*]] = trunc i8 [[TMP18]] to i1
-// CHECK1-NEXT:    br i1 [[TOBOOL3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
+// CHECK1-NEXT:    [[LOADEDV2:%.*]] = trunc i8 [[TMP18]] to i1
+// CHECK1-NEXT:    br i1 [[LOADEDV2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
 // CHECK1:       omp_if.then:
 // CHECK1-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    store i64 [[TMP17]], ptr [[TMP19]], align 8
@@ -751,39 +751,39 @@ int main() {
 // CHECK1-NEXT:    store ptr null, ptr [[TMP21]], align 8
 // CHECK1-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 3, ptr [[TMP24]], align 4
-// CHECK1-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1
+// CHECK1-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 1
 // CHECK1-NEXT:    store i32 1, ptr [[TMP25]], align 4
-// CHECK1-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2
+// CHECK1-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 2
 // CHECK1-NEXT:    store ptr [[TMP22]], ptr [[TMP26]], align 8
-// CHECK1-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 3
+// CHECK1-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 3
 // CHECK1-NEXT:    store ptr [[TMP23]], ptr [[TMP27]], align 8
-// CHECK1-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 4
+// CHECK1-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 4
 // CHECK1-NEXT:    store ptr @.offload_sizes.1, ptr [[TMP28]], align 8
-// CHECK1-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 5
+// CHECK1-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 5
 // CHECK1-NEXT:    store ptr @.offload_maptypes.2, ptr [[TMP29]], align 8
-// CHECK1-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 6
+// CHECK1-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 6
 // CHECK1-NEXT:    store ptr null, ptr [[TMP30]], align 8
-// CHECK1-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 7
+// CHECK1-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 7
 // CHECK1-NEXT:    store ptr null, ptr [[TMP31]], align 8
-// CHECK1-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 8
+// CHECK1-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 8
 // CHECK1-NEXT:    store i64 100, ptr [[TMP32]], align 8
-// CHECK1-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 9
+// CHECK1-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 9
 // CHECK1-NEXT:    store i64 0, ptr [[TMP33]], align 8
-// CHECK1-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 10
+// CHECK1-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 10
 // CHECK1-NEXT:    store [3 x i32] zeroinitializer, ptr [[TMP34]], align 4
-// CHECK1-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 11
+// CHECK1-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 11
 // CHECK1-NEXT:    store [3 x i32] zeroinitializer, ptr [[TMP35]], align 4
-// CHECK1-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 12
+// CHECK1-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 12
 // CHECK1-NEXT:    store i32 0, ptr [[TMP36]], align 4
-// CHECK1-NEXT:    [[TMP37:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l68.region_id, ptr [[KERNEL_ARGS5]])
+// CHECK1-NEXT:    [[TMP37:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l68.region_id, ptr [[KERNEL_ARGS4]])
 // CHECK1-NEXT:    [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0
-// CHECK1-NEXT:    br i1 [[TMP38]], label [[OMP_OFFLOAD_FAILED6:%.*]], label [[OMP_OFFLOAD_CONT7:%.*]]
-// CHECK1:       omp_offload.failed6:
+// CHECK1-NEXT:    br i1 [[TMP38]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]]
+// CHECK1:       omp_offload.failed5:
 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l68(i64 [[TMP17]]) #[[ATTR2]]
-// CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT7]]
-// CHECK1:       omp_offload.cont7:
+// CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT6]]
+// CHECK1:       omp_offload.cont6:
 // CHECK1-NEXT:    br label [[OMP_IF_END:%.*]]
 // CHECK1:       omp_if.else:
 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l68(i64 [[TMP17]]) #[[ATTR2]]
@@ -935,9 +935,9 @@ int main() {
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8
 // CHECK1-NEXT:    store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8
 // CHECK1-NEXT:    [[TMP0:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
-// CHECK1-NEXT:    [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1
-// CHECK1-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
-// CHECK1-NEXT:    store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
+// CHECK1-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP0]] to i1
+// CHECK1-NEXT:    [[STOREDV:%.*]] = zext i1 [[LOADEDV]] to i8
+// CHECK1-NEXT:    store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
 // CHECK1-NEXT:    [[TMP1:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8
 // CHECK1-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l68.omp_outlined, i64 [[TMP1]])
 // CHECK1-NEXT:    ret void
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_if_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_if_codegen.cpp
index aed27c47fa1d3..2a8621fac25dd 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_if_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_if_codegen.cpp
@@ -482,8 +482,8 @@ int main() {
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1
-// CHECK1-NEXT:    [[_TMP6:%.*]] = alloca i32, align 4
-// CHECK1-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK1-NEXT:    [[_TMP5:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 3, ptr [[TMP0]], align 4
@@ -564,45 +564,45 @@ int main() {
 // CHECK1-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP37:%.*]] = load i32, ptr @Arg, align 4
 // CHECK1-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP37]], 0
-// CHECK1-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
-// CHECK1-NEXT:    store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1
+// CHECK1-NEXT:    [[STOREDV:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK1-NEXT:    store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR_]], align 1
 // CHECK1-NEXT:    [[TMP38:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1
-// CHECK1-NEXT:    [[TOBOOL5:%.*]] = trunc i8 [[TMP38]] to i1
-// CHECK1-NEXT:    [[TMP39:%.*]] = select i1 [[TOBOOL5]], i32 0, i32 1
+// CHECK1-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP38]] to i1
+// CHECK1-NEXT:    [[TMP39:%.*]] = select i1 [[LOADEDV]], i32 0, i32 1
 // CHECK1-NEXT:    [[TMP40:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP39]], 0
-// CHECK1-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 3, ptr [[TMP41]], align 4
-// CHECK1-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1
+// CHECK1-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1
 // CHECK1-NEXT:    store i32 1, ptr [[TMP42]], align 4
-// CHECK1-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 2
+// CHECK1-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 2
 // CHECK1-NEXT:    store ptr [[TMP35]], ptr [[TMP43]], align 8
-// CHECK1-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 3
+// CHECK1-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 3
 // CHECK1-NEXT:    store ptr [[TMP36]], ptr [[TMP44]], align 8
-// CHECK1-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 4
+// CHECK1-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 4
 // CHECK1-NEXT:    store ptr @.offload_sizes, ptr [[TMP45]], align 8
-// CHECK1-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 5
+// CHECK1-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 5
 // CHECK1-NEXT:    store ptr @.offload_maptypes, ptr [[TMP46]], align 8
-// CHECK1-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 6
+// CHECK1-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 6
 // CHECK1-NEXT:    store ptr null, ptr [[TMP47]], align 8
-// CHECK1-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 7
+// CHECK1-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 7
 // CHECK1-NEXT:    store ptr null, ptr [[TMP48]], align 8
-// CHECK1-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 8
+// CHECK1-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 8
 // CHECK1-NEXT:    store i64 100, ptr [[TMP49]], align 8
-// CHECK1-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 9
+// CHECK1-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 9
 // CHECK1-NEXT:    store i64 0, ptr [[TMP50]], align 8
-// CHECK1-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 10
+// CHECK1-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 10
 // CHECK1-NEXT:    store [3 x i32] zeroinitializer, ptr [[TMP51]], align 4
-// CHECK1-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 11
+// CHECK1-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 11
 // CHECK1-NEXT:    store [3 x i32] [[TMP40]], ptr [[TMP52]], align 4
-// CHECK1-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 12
+// CHECK1-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 12
 // CHECK1-NEXT:    store i32 0, ptr [[TMP53]], align 4
-// CHECK1-NEXT:    [[TMP54:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 [[TMP39]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l97.region_id, ptr [[KERNEL_ARGS7]])
+// CHECK1-NEXT:    [[TMP54:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 [[TMP39]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l97.region_id, ptr [[KERNEL_ARGS6]])
 // CHECK1-NEXT:    [[TMP55:%.*]] = icmp ne i32 [[TMP54]], 0
-// CHECK1-NEXT:    br i1 [[TMP55]], label [[OMP_OFFLOAD_FAILED8:%.*]], label [[OMP_OFFLOAD_CONT9:%.*]]
-// CHECK1:       omp_offload.failed8:
+// CHECK1-NEXT:    br i1 [[TMP55]], label [[OMP_OFFLOAD_FAILED7:%.*]], label [[OMP_OFFLOAD_CONT8:%.*]]
+// CHECK1:       omp_offload.failed7:
 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l97(i64 [[TMP31]]) #[[ATTR2]]
-// CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT9]]
-// CHECK1:       omp_offload.cont9:
+// CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT8]]
+// CHECK1:       omp_offload.cont8:
 // CHECK1-NEXT:    [[TMP56:%.*]] = load i32, ptr @Arg, align 4
 // CHECK1-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiEiT_(i32 noundef [[TMP56]])
 // CHECK1-NEXT:    ret i32 [[CALL]]
@@ -898,12 +898,12 @@ int main() {
 // CHECK1-NEXT:    store i64 [[ARG]], ptr [[ARG_ADDR]], align 8
 // CHECK1-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARG_ADDR]], align 4
 // CHECK1-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0
-// CHECK1-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
-// CHECK1-NEXT:    store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1
+// CHECK1-NEXT:    [[STOREDV:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK1-NEXT:    store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR_]], align 1
 // CHECK1-NEXT:    [[TMP1:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1
-// CHECK1-NEXT:    [[TOBOOL1:%.*]] = trunc i8 [[TMP1]] to i1
-// CHECK1-NEXT:    [[FROMBOOL2:%.*]] = zext i1 [[TOBOOL1]] to i8
-// CHECK1-NEXT:    store i8 [[FROMBOOL2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
+// CHECK1-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP1]] to i1
+// CHECK1-NEXT:    [[STOREDV1:%.*]] = zext i1 [[LOADEDV]] to i8
+// CHECK1-NEXT:    store i8 [[STOREDV1]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
 // CHECK1-NEXT:    [[TMP2:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8
 // CHECK1-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l97.omp_outlined, i64 [[TMP2]])
 // CHECK1-NEXT:    ret void
@@ -958,8 +958,8 @@ int main() {
 // CHECK1-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
 // CHECK1-NEXT:    [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
 // CHECK1-NEXT:    [[TMP11:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
-// CHECK1-NEXT:    [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1
-// CHECK1-NEXT:    br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
+// CHECK1-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP11]] to i1
+// CHECK1-NEXT:    br i1 [[LOADEDV]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
 // CHECK1:       omp_if.then:
 // CHECK1-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l97.omp_outlined.omp_outlined, i64 [[TMP8]], i64 [[TMP10]])
 // CHECK1-NEXT:    br label [[OMP_IF_END:%.*]]
@@ -1069,8 +1069,8 @@ int main() {
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1
-// CHECK1-NEXT:    [[_TMP6:%.*]] = alloca i32, align 4
-// CHECK1-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK1-NEXT:    [[_TMP5:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    store i32 [[ARG]], ptr [[ARG_ADDR]], align 4
 // CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 3, ptr [[TMP0]], align 4
@@ -1151,45 +1151,45 @@ int main() {
 // CHECK1-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP37:%.*]] = load i32, ptr [[ARG_ADDR]], align 4
 // CHECK1-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP37]], 0
-// CHECK1-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
-// CHECK1-NEXT:    store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1
+// CHECK1-NEXT:    [[STOREDV:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK1-NEXT:    store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR_]], align 1
 // CHECK1-NEXT:    [[TMP38:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1
-// CHECK1-NEXT:    [[TOBOOL5:%.*]] = trunc i8 [[TMP38]] to i1
-// CHECK1-NEXT:    [[TMP39:%.*]] = select i1 [[TOBOOL5]], i32 0, i32 1
+// CHECK1-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP38]] to i1
+// CHECK1-NEXT:    [[TMP39:%.*]] = select i1 [[LOADEDV]], i32 0, i32 1
 // CHECK1-NEXT:    [[TMP40:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP39]], 0
-// CHECK1-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 3, ptr [[TMP41]], align 4
-// CHECK1-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1
+// CHECK1-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1
 // CHECK1-NEXT:    store i32 1, ptr [[TMP42]], align 4
-// CHECK1-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 2
+// CHECK1-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 2
 // CHECK1-NEXT:    store ptr [[TMP35]], ptr [[TMP43]], align 8
-// CHECK1-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 3
+// CHECK1-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 3
 // CHECK1-NEXT:    store ptr [[TMP36]], ptr [[TMP44]], align 8
-// CHECK1-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 4
+// CHECK1-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 4
 // CHECK1-NEXT:    store ptr @.offload_sizes.1, ptr [[TMP45]], align 8
-// CHECK1-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 5
+// CHECK1-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 5
 // CHECK1-NEXT:    store ptr @.offload_maptypes.2, ptr [[TMP46]], align 8
-// CHECK1-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 6
+// CHECK1-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 6
 // CHECK1-NEXT:    store ptr null, ptr [[TMP47]], align 8
-// CHECK1-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 7
+// CHECK1-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 7
 // CHECK1-NEXT:    store ptr null, ptr [[TMP48]], align 8
-// CHECK1-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 8
+// CHECK1-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 8
 // CHECK1-NEXT:    store i64 100, ptr [[TMP49]], align 8
-// CHECK1-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 9
+// CHECK1-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 9
 // CHECK1-NEXT:    store i64 0, ptr [[TMP50]], align 8
-// CHECK1-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 10
+// CHECK1-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 10
 // CHECK1-NEXT:    store [3 x i32] zeroinitializer, ptr [[TMP51]], align 4
-// CHECK1-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 11
+// CHECK1-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 11
 // CHECK1-NEXT:    store [3 x i32] [[TMP40]], ptr [[TMP52]], align 4
-// CHECK1-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 12
+// CHECK1-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 12
 // CHECK1-NEXT:    store i32 0, ptr [[TMP53]], align 4
-// CHECK1-NEXT:    [[TMP54:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 [[TMP39]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l72.region_id, ptr [[KERNEL_ARGS7]])
+// CHECK1-NEXT:    [[TMP54:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 [[TMP39]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l72.region_id, ptr [[KERNEL_ARGS6]])
 // CHECK1-NEXT:    [[TMP55:%.*]] = icmp ne i32 [[TMP54]], 0
-// CHECK1-NEXT:    br i1 [[TMP55]], label [[OMP_OFFLOAD_FAILED8:%.*]], label [[OMP_OFFLOAD_CONT9:%.*]]
-// CHECK1:       omp_offload.failed8:
+// CHECK1-NEXT:    br i1 [[TMP55]], label [[OMP_OFFLOAD_FAILED7:%.*]], label [[OMP_OFFLOAD_CONT8:%.*]]
+// CHECK1:       omp_offload.failed7:
 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l72(i64 [[TMP31]]) #[[ATTR2]]
-// CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT9]]
-// CHECK1:       omp_offload.cont9:
+// CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT8]]
+// CHECK1:       omp_offload.cont8:
 // CHECK1-NEXT:    ret i32 0
 //
 //
@@ -1483,12 +1483,12 @@ int main() {
 // CHECK1-NEXT:    store i64 [[ARG]], ptr [[ARG_ADDR]], align 8
 // CHECK1-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARG_ADDR]], align 4
 // CHECK1-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0
-// CHECK1-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
-// CHECK1-NEXT:    store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1
+// CHECK1-NEXT:    [[STOREDV:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK1-NEXT:    store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR_]], align 1
 // CHECK1-NEXT:    [[TMP1:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1
-// CHECK1-NEXT:    [[TOBOOL1:%.*]] = trunc i8 [[TMP1]] to i1
-// CHECK1-NEXT:    [[FROMBOOL2:%.*]] = zext i1 [[TOBOOL1]] to i8
-// CHECK1-NEXT:    store i8 [[FROMBOOL2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
+// CHECK1-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP1]] to i1
+// CHECK1-NEXT:    [[STOREDV1:%.*]] = zext i1 [[LOADEDV]] to i8
+// CHECK1-NEXT:    store i8 [[STOREDV1]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
 // CHECK1-NEXT:    [[TMP2:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8
 // CHECK1-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l72.omp_outlined, i64 [[TMP2]])
 // CHECK1-NEXT:    ret void
@@ -1543,8 +1543,8 @@ int main() {
 // CHECK1-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
 // CHECK1-NEXT:    [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
 // CHECK1-NEXT:    [[TMP11:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
-// CHECK1-NEXT:    [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1
-// CHECK1-NEXT:    br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
+// CHECK1-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP11]] to i1
+// CHECK1-NEXT:    br i1 [[LOADEDV]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
 // CHECK1:       omp_if.then:
 // CHECK1-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l72.omp_outlined.omp_outlined, i64 [[TMP8]], i64 [[TMP10]])
 // CHECK1-NEXT:    br label [[OMP_IF_END:%.*]]
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_if_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_if_codegen.cpp
index 58c1f4155abfb..c796b5e5948c8 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_if_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_if_codegen.cpp
@@ -507,8 +507,8 @@ int main() {
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1
-// CHECK1-NEXT:    [[_TMP6:%.*]] = alloca i32, align 4
-// CHECK1-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK1-NEXT:    [[_TMP5:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 3, ptr [[TMP0]], align 4
@@ -589,45 +589,45 @@ int main() {
 // CHECK1-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP37:%.*]] = load i32, ptr @Arg, align 4
 // CHECK1-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP37]], 0
-// CHECK1-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
-// CHECK1-NEXT:    store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1
+// CHECK1-NEXT:    [[STOREDV:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK1-NEXT:    store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR_]], align 1
 // CHECK1-NEXT:    [[TMP38:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1
-// CHECK1-NEXT:    [[TOBOOL5:%.*]] = trunc i8 [[TMP38]] to i1
-// CHECK1-NEXT:    [[TMP39:%.*]] = select i1 [[TOBOOL5]], i32 0, i32 1
+// CHECK1-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP38]] to i1
+// CHECK1-NEXT:    [[TMP39:%.*]] = select i1 [[LOADEDV]], i32 0, i32 1
 // CHECK1-NEXT:    [[TMP40:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP39]], 0
-// CHECK1-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 3, ptr [[TMP41]], align 4
-// CHECK1-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1
+// CHECK1-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1
 // CHECK1-NEXT:    store i32 1, ptr [[TMP42]], align 4
-// CHECK1-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 2
+// CHECK1-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 2
 // CHECK1-NEXT:    store ptr [[TMP35]], ptr [[TMP43]], align 8
-// CHECK1-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 3
+// CHECK1-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 3
 // CHECK1-NEXT:    store ptr [[TMP36]], ptr [[TMP44]], align 8
-// CHECK1-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 4
+// CHECK1-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 4
 // CHECK1-NEXT:    store ptr @.offload_sizes, ptr [[TMP45]], align 8
-// CHECK1-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 5
+// CHECK1-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 5
 // CHECK1-NEXT:    store ptr @.offload_maptypes, ptr [[TMP46]], align 8
-// CHECK1-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 6
+// CHECK1-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 6
 // CHECK1-NEXT:    store ptr null, ptr [[TMP47]], align 8
-// CHECK1-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 7
+// CHECK1-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 7
 // CHECK1-NEXT:    store ptr null, ptr [[TMP48]], align 8
-// CHECK1-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 8
+// CHECK1-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 8
 // CHECK1-NEXT:    store i64 100, ptr [[TMP49]], align 8
-// CHECK1-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 9
+// CHECK1-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 9
 // CHECK1-NEXT:    store i64 0, ptr [[TMP50]], align 8
-// CHECK1-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 10
+// CHECK1-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 10
 // CHECK1-NEXT:    store [3 x i32] zeroinitializer, ptr [[TMP51]], align 4
-// CHECK1-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 11
+// CHECK1-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 11
 // CHECK1-NEXT:    store [3 x i32] [[TMP40]], ptr [[TMP52]], align 4
-// CHECK1-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 12
+// CHECK1-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 12
 // CHECK1-NEXT:    store i32 0, ptr [[TMP53]], align 4
-// CHECK1-NEXT:    [[TMP54:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 [[TMP39]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l92.region_id, ptr [[KERNEL_ARGS7]])
+// CHECK1-NEXT:    [[TMP54:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 [[TMP39]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l92.region_id, ptr [[KERNEL_ARGS6]])
 // CHECK1-NEXT:    [[TMP55:%.*]] = icmp ne i32 [[TMP54]], 0
-// CHECK1-NEXT:    br i1 [[TMP55]], label [[OMP_OFFLOAD_FAILED8:%.*]], label [[OMP_OFFLOAD_CONT9:%.*]]
-// CHECK1:       omp_offload.failed8:
+// CHECK1-NEXT:    br i1 [[TMP55]], label [[OMP_OFFLOAD_FAILED7:%.*]], label [[OMP_OFFLOAD_CONT8:%.*]]
+// CHECK1:       omp_offload.failed7:
 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l92(i64 [[TMP31]]) #[[ATTR2]]
-// CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT9]]
-// CHECK1:       omp_offload.cont9:
+// CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT8]]
+// CHECK1:       omp_offload.cont8:
 // CHECK1-NEXT:    [[TMP56:%.*]] = load i32, ptr @Arg, align 4
 // CHECK1-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiEiT_(i32 noundef [[TMP56]])
 // CHECK1-NEXT:    ret i32 [[CALL]]
@@ -951,12 +951,12 @@ int main() {
 // CHECK1-NEXT:    store i64 [[ARG]], ptr [[ARG_ADDR]], align 8
 // CHECK1-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARG_ADDR]], align 4
 // CHECK1-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0
-// CHECK1-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
-// CHECK1-NEXT:    store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1
+// CHECK1-NEXT:    [[STOREDV:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK1-NEXT:    store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR_]], align 1
 // CHECK1-NEXT:    [[TMP1:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1
-// CHECK1-NEXT:    [[TOBOOL1:%.*]] = trunc i8 [[TMP1]] to i1
-// CHECK1-NEXT:    [[FROMBOOL2:%.*]] = zext i1 [[TOBOOL1]] to i8
-// CHECK1-NEXT:    store i8 [[FROMBOOL2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
+// CHECK1-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP1]] to i1
+// CHECK1-NEXT:    [[STOREDV1:%.*]] = zext i1 [[LOADEDV]] to i8
+// CHECK1-NEXT:    store i8 [[STOREDV1]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
 // CHECK1-NEXT:    [[TMP2:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8
 // CHECK1-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l92.omp_outlined, i64 [[TMP2]])
 // CHECK1-NEXT:    ret void
@@ -1011,8 +1011,8 @@ int main() {
 // CHECK1-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP38]]
 // CHECK1-NEXT:    [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
 // CHECK1-NEXT:    [[TMP11:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !llvm.access.group [[ACC_GRP38]]
-// CHECK1-NEXT:    [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1
-// CHECK1-NEXT:    br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
+// CHECK1-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP11]] to i1
+// CHECK1-NEXT:    br i1 [[LOADEDV]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
 // CHECK1:       omp_if.then:
 // CHECK1-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l92.omp_outlined.omp_outlined, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP38]]
 // CHECK1-NEXT:    br label [[OMP_IF_END:%.*]]
@@ -1136,8 +1136,8 @@ int main() {
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK1-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1
-// CHECK1-NEXT:    [[_TMP6:%.*]] = alloca i32, align 4
-// CHECK1-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK1-NEXT:    [[_TMP5:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK1-NEXT:    store i32 [[ARG]], ptr [[ARG_ADDR]], align 4
 // CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 3, ptr [[TMP0]], align 4
@@ -1218,45 +1218,45 @@ int main() {
 // CHECK1-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP37:%.*]] = load i32, ptr [[ARG_ADDR]], align 4
 // CHECK1-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP37]], 0
-// CHECK1-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
-// CHECK1-NEXT:    store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1
+// CHECK1-NEXT:    [[STOREDV:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK1-NEXT:    store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR_]], align 1
 // CHECK1-NEXT:    [[TMP38:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1
-// CHECK1-NEXT:    [[TOBOOL5:%.*]] = trunc i8 [[TMP38]] to i1
-// CHECK1-NEXT:    [[TMP39:%.*]] = select i1 [[TOBOOL5]], i32 0, i32 1
+// CHECK1-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP38]] to i1
+// CHECK1-NEXT:    [[TMP39:%.*]] = select i1 [[LOADEDV]], i32 0, i32 1
 // CHECK1-NEXT:    [[TMP40:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP39]], 0
-// CHECK1-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0
 // CHECK1-NEXT:    store i32 3, ptr [[TMP41]], align 4
-// CHECK1-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1
+// CHECK1-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1
 // CHECK1-NEXT:    store i32 1, ptr [[TMP42]], align 4
-// CHECK1-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 2
+// CHECK1-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 2
 // CHECK1-NEXT:    store ptr [[TMP35]], ptr [[TMP43]], align 8
-// CHECK1-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 3
+// CHECK1-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 3
 // CHECK1-NEXT:    store ptr [[TMP36]], ptr [[TMP44]], align 8
-// CHECK1-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 4
+// CHECK1-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 4
 // CHECK1-NEXT:    store ptr @.offload_sizes.1, ptr [[TMP45]], align 8
-// CHECK1-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 5
+// CHECK1-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 5
 // CHECK1-NEXT:    store ptr @.offload_maptypes.2, ptr [[TMP46]], align 8
-// CHECK1-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 6
+// CHECK1-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 6
 // CHECK1-NEXT:    store ptr null, ptr [[TMP47]], align 8
-// CHECK1-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 7
+// CHECK1-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 7
 // CHECK1-NEXT:    store ptr null, ptr [[TMP48]], align 8
-// CHECK1-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 8
+// CHECK1-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 8
 // CHECK1-NEXT:    store i64 100, ptr [[TMP49]], align 8
-// CHECK1-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 9
+// CHECK1-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 9
 // CHECK1-NEXT:    store i64 0, ptr [[TMP50]], align 8
-// CHECK1-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 10
+// CHECK1-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 10
 // CHECK1-NEXT:    store [3 x i32] zeroinitializer, ptr [[TMP51]], align 4
-// CHECK1-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 11
+// CHECK1-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 11
 // CHECK1-NEXT:    store [3 x i32] [[TMP40]], ptr [[TMP52]], align 4
-// CHECK1-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 12
+// CHECK1-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 12
 // CHECK1-NEXT:    store i32 0, ptr [[TMP53]], align 4
-// CHECK1-NEXT:    [[TMP54:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 [[TMP39]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l67.region_id, ptr [[KERNEL_ARGS7]])
+// CHECK1-NEXT:    [[TMP54:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 [[TMP39]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l67.region_id, ptr [[KERNEL_ARGS6]])
 // CHECK1-NEXT:    [[TMP55:%.*]] = icmp ne i32 [[TMP54]], 0
-// CHECK1-NEXT:    br i1 [[TMP55]], label [[OMP_OFFLOAD_FAILED8:%.*]], label [[OMP_OFFLOAD_CONT9:%.*]]
-// CHECK1:       omp_offload.failed8:
+// CHECK1-NEXT:    br i1 [[TMP55]], label [[OMP_OFFLOAD_FAILED7:%.*]], label [[OMP_OFFLOAD_CONT8:%.*]]
+// CHECK1:       omp_offload.failed7:
 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l67(i64 [[TMP31]]) #[[ATTR2]]
-// CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT9]]
-// CHECK1:       omp_offload.cont9:
+// CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT8]]
+// CHECK1:       omp_offload.cont8:
 // CHECK1-NEXT:    ret i32 0
 //
 //
@@ -1578,12 +1578,12 @@ int main() {
 // CHECK1-NEXT:    store i64 [[ARG]], ptr [[ARG_ADDR]], align 8
 // CHECK1-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARG_ADDR]], align 4
 // CHECK1-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0
-// CHECK1-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
-// CHECK1-NEXT:    store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1
+// CHECK1-NEXT:    [[STOREDV:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK1-NEXT:    store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR_]], align 1
 // CHECK1-NEXT:    [[TMP1:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1
-// CHECK1-NEXT:    [[TOBOOL1:%.*]] = trunc i8 [[TMP1]] to i1
-// CHECK1-NEXT:    [[FROMBOOL2:%.*]] = zext i1 [[TOBOOL1]] to i8
-// CHECK1-NEXT:    store i8 [[FROMBOOL2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
+// CHECK1-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP1]] to i1
+// CHECK1-NEXT:    [[STOREDV1:%.*]] = zext i1 [[LOADEDV]] to i8
+// CHECK1-NEXT:    store i8 [[STOREDV1]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
 // CHECK1-NEXT:    [[TMP2:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8
 // CHECK1-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l67.omp_outlined, i64 [[TMP2]])
 // CHECK1-NEXT:    ret void
@@ -1638,8 +1638,8 @@ int main() {
 // CHECK1-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP56]]
 // CHECK1-NEXT:    [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
 // CHECK1-NEXT:    [[TMP11:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !llvm.access.group [[ACC_GRP56]]
-// CHECK1-NEXT:    [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1
-// CHECK1-NEXT:    br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
+// CHECK1-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP11]] to i1
+// CHECK1-NEXT:    br i1 [[LOADEDV]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
 // CHECK1:       omp_if.then:
 // CHECK1-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l67.omp_outlined.omp_outlined, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP56]]
 // CHECK1-NEXT:    br label [[OMP_IF_END:%.*]]
@@ -2147,8 +2147,8 @@ int main() {
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK3-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1
-// CHECK3-NEXT:    [[_TMP6:%.*]] = alloca i32, align 4
-// CHECK3-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK3-NEXT:    [[_TMP5:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK3-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 3, ptr [[TMP0]], align 4
@@ -2229,45 +2229,45 @@ int main() {
 // CHECK3-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP37:%.*]] = load i32, ptr @Arg, align 4
 // CHECK3-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP37]], 0
-// CHECK3-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
-// CHECK3-NEXT:    store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1
+// CHECK3-NEXT:    [[STOREDV:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK3-NEXT:    store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR_]], align 1
 // CHECK3-NEXT:    [[TMP38:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1
-// CHECK3-NEXT:    [[TOBOOL5:%.*]] = trunc i8 [[TMP38]] to i1
-// CHECK3-NEXT:    [[TMP39:%.*]] = select i1 [[TOBOOL5]], i32 0, i32 1
+// CHECK3-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP38]] to i1
+// CHECK3-NEXT:    [[TMP39:%.*]] = select i1 [[LOADEDV]], i32 0, i32 1
 // CHECK3-NEXT:    [[TMP40:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP39]], 0
-// CHECK3-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0
+// CHECK3-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 3, ptr [[TMP41]], align 4
-// CHECK3-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1
+// CHECK3-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1
 // CHECK3-NEXT:    store i32 1, ptr [[TMP42]], align 4
-// CHECK3-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 2
+// CHECK3-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 2
 // CHECK3-NEXT:    store ptr [[TMP35]], ptr [[TMP43]], align 8
-// CHECK3-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 3
+// CHECK3-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 3
 // CHECK3-NEXT:    store ptr [[TMP36]], ptr [[TMP44]], align 8
-// CHECK3-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 4
+// CHECK3-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 4
 // CHECK3-NEXT:    store ptr @.offload_sizes, ptr [[TMP45]], align 8
-// CHECK3-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 5
+// CHECK3-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 5
 // CHECK3-NEXT:    store ptr @.offload_maptypes, ptr [[TMP46]], align 8
-// CHECK3-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 6
+// CHECK3-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 6
 // CHECK3-NEXT:    store ptr null, ptr [[TMP47]], align 8
-// CHECK3-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 7
+// CHECK3-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 7
 // CHECK3-NEXT:    store ptr null, ptr [[TMP48]], align 8
-// CHECK3-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 8
+// CHECK3-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 8
 // CHECK3-NEXT:    store i64 100, ptr [[TMP49]], align 8
-// CHECK3-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 9
+// CHECK3-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 9
 // CHECK3-NEXT:    store i64 0, ptr [[TMP50]], align 8
-// CHECK3-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 10
+// CHECK3-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 10
 // CHECK3-NEXT:    store [3 x i32] zeroinitializer, ptr [[TMP51]], align 4
-// CHECK3-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 11
+// CHECK3-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 11
 // CHECK3-NEXT:    store [3 x i32] [[TMP40]], ptr [[TMP52]], align 4
-// CHECK3-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 12
+// CHECK3-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 12
 // CHECK3-NEXT:    store i32 0, ptr [[TMP53]], align 4
-// CHECK3-NEXT:    [[TMP54:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 [[TMP39]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l92.region_id, ptr [[KERNEL_ARGS7]])
+// CHECK3-NEXT:    [[TMP54:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 [[TMP39]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l92.region_id, ptr [[KERNEL_ARGS6]])
 // CHECK3-NEXT:    [[TMP55:%.*]] = icmp ne i32 [[TMP54]], 0
-// CHECK3-NEXT:    br i1 [[TMP55]], label [[OMP_OFFLOAD_FAILED8:%.*]], label [[OMP_OFFLOAD_CONT9:%.*]]
-// CHECK3:       omp_offload.failed8:
+// CHECK3-NEXT:    br i1 [[TMP55]], label [[OMP_OFFLOAD_FAILED7:%.*]], label [[OMP_OFFLOAD_CONT8:%.*]]
+// CHECK3:       omp_offload.failed7:
 // CHECK3-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l92(i64 [[TMP31]]) #[[ATTR2]]
-// CHECK3-NEXT:    br label [[OMP_OFFLOAD_CONT9]]
-// CHECK3:       omp_offload.cont9:
+// CHECK3-NEXT:    br label [[OMP_OFFLOAD_CONT8]]
+// CHECK3:       omp_offload.cont8:
 // CHECK3-NEXT:    [[TMP56:%.*]] = load i32, ptr @Arg, align 4
 // CHECK3-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiEiT_(i32 noundef [[TMP56]])
 // CHECK3-NEXT:    ret i32 [[CALL]]
@@ -2591,12 +2591,12 @@ int main() {
 // CHECK3-NEXT:    store i64 [[ARG]], ptr [[ARG_ADDR]], align 8
 // CHECK3-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARG_ADDR]], align 4
 // CHECK3-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0
-// CHECK3-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
-// CHECK3-NEXT:    store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1
+// CHECK3-NEXT:    [[STOREDV:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK3-NEXT:    store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR_]], align 1
 // CHECK3-NEXT:    [[TMP1:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1
-// CHECK3-NEXT:    [[TOBOOL1:%.*]] = trunc i8 [[TMP1]] to i1
-// CHECK3-NEXT:    [[FROMBOOL2:%.*]] = zext i1 [[TOBOOL1]] to i8
-// CHECK3-NEXT:    store i8 [[FROMBOOL2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
+// CHECK3-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP1]] to i1
+// CHECK3-NEXT:    [[STOREDV1:%.*]] = zext i1 [[LOADEDV]] to i8
+// CHECK3-NEXT:    store i8 [[STOREDV1]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
 // CHECK3-NEXT:    [[TMP2:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8
 // CHECK3-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l92.omp_outlined, i64 [[TMP2]])
 // CHECK3-NEXT:    ret void
@@ -2643,8 +2643,8 @@ int main() {
 // CHECK3-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
 // CHECK3-NEXT:    store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
 // CHECK3-NEXT:    [[TMP5:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
-// CHECK3-NEXT:    [[TOBOOL:%.*]] = trunc i8 [[TMP5]] to i1
-// CHECK3-NEXT:    br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE5:%.*]]
+// CHECK3-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP5]] to i1
+// CHECK3-NEXT:    br i1 [[LOADEDV]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE5:%.*]]
 // CHECK3:       omp_if.then:
 // CHECK3-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
 // CHECK3:       omp.inner.for.cond:
@@ -2658,13 +2658,13 @@ int main() {
 // CHECK3-NEXT:    [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]]
 // CHECK3-NEXT:    [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
 // CHECK3-NEXT:    [[TMP12:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !llvm.access.group [[ACC_GRP35]]
-// CHECK3-NEXT:    [[TOBOOL2:%.*]] = trunc i8 [[TMP12]] to i1
-// CHECK3-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TOBOOL2]] to i8
-// CHECK3-NEXT:    store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1, !llvm.access.group [[ACC_GRP35]]
+// CHECK3-NEXT:    [[LOADEDV2:%.*]] = trunc i8 [[TMP12]] to i1
+// CHECK3-NEXT:    [[STOREDV:%.*]] = zext i1 [[LOADEDV2]] to i8
+// CHECK3-NEXT:    store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1, !llvm.access.group [[ACC_GRP35]]
 // CHECK3-NEXT:    [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group [[ACC_GRP35]]
 // CHECK3-NEXT:    [[TMP14:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !llvm.access.group [[ACC_GRP35]]
-// CHECK3-NEXT:    [[TOBOOL3:%.*]] = trunc i8 [[TMP14]] to i1
-// CHECK3-NEXT:    br i1 [[TOBOOL3]], label [[OMP_IF_THEN4:%.*]], label [[OMP_IF_ELSE:%.*]]
+// CHECK3-NEXT:    [[LOADEDV3:%.*]] = trunc i8 [[TMP14]] to i1
+// CHECK3-NEXT:    br i1 [[LOADEDV3]], label [[OMP_IF_THEN4:%.*]], label [[OMP_IF_ELSE:%.*]]
 // CHECK3:       omp_if.then4:
 // CHECK3-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l92.omp_outlined.omp_outlined, i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]), !llvm.access.group [[ACC_GRP35]]
 // CHECK3-NEXT:    br label [[OMP_IF_END:%.*]]
@@ -2698,13 +2698,13 @@ int main() {
 // CHECK3-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
 // CHECK3-NEXT:    [[TMP23:%.*]] = zext i32 [[TMP22]] to i64
 // CHECK3-NEXT:    [[TMP24:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
-// CHECK3-NEXT:    [[TOBOOL9:%.*]] = trunc i8 [[TMP24]] to i1
-// CHECK3-NEXT:    [[FROMBOOL11:%.*]] = zext i1 [[TOBOOL9]] to i8
-// CHECK3-NEXT:    store i8 [[FROMBOOL11]], ptr [[DOTCAPTURE_EXPR__CASTED10]], align 1
+// CHECK3-NEXT:    [[LOADEDV9:%.*]] = trunc i8 [[TMP24]] to i1
+// CHECK3-NEXT:    [[STOREDV11:%.*]] = zext i1 [[LOADEDV9]] to i8
+// CHECK3-NEXT:    store i8 [[STOREDV11]], ptr [[DOTCAPTURE_EXPR__CASTED10]], align 1
 // CHECK3-NEXT:    [[TMP25:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED10]], align 8
 // CHECK3-NEXT:    [[TMP26:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
-// CHECK3-NEXT:    [[TOBOOL12:%.*]] = trunc i8 [[TMP26]] to i1
-// CHECK3-NEXT:    br i1 [[TOBOOL12]], label [[OMP_IF_THEN13:%.*]], label [[OMP_IF_ELSE14:%.*]]
+// CHECK3-NEXT:    [[LOADEDV12:%.*]] = trunc i8 [[TMP26]] to i1
+// CHECK3-NEXT:    br i1 [[LOADEDV12]], label [[OMP_IF_THEN13:%.*]], label [[OMP_IF_ELSE14:%.*]]
 // CHECK3:       omp_if.then13:
 // CHECK3-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l92.omp_outlined.omp_outlined.1, i64 [[TMP21]], i64 [[TMP23]], i64 [[TMP25]])
 // CHECK3-NEXT:    br label [[OMP_IF_END16:%.*]]
@@ -2770,8 +2770,8 @@ int main() {
 // CHECK3-NEXT:    store i32 1, ptr [[DOTOMP_STRIDE]], align 4
 // CHECK3-NEXT:    store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK3-NEXT:    [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
-// CHECK3-NEXT:    [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1
-// CHECK3-NEXT:    br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
+// CHECK3-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
+// CHECK3-NEXT:    br i1 [[LOADEDV]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
 // CHECK3:       omp_if.then:
 // CHECK3-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK3-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
@@ -2897,8 +2897,8 @@ int main() {
 // CHECK3-NEXT:    store i32 1, ptr [[DOTOMP_STRIDE]], align 4
 // CHECK3-NEXT:    store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK3-NEXT:    [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
-// CHECK3-NEXT:    [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1
-// CHECK3-NEXT:    br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
+// CHECK3-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
+// CHECK3-NEXT:    br i1 [[LOADEDV]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
 // CHECK3:       omp_if.then:
 // CHECK3-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK3-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
@@ -3006,8 +3006,8 @@ int main() {
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK3-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1
-// CHECK3-NEXT:    [[_TMP6:%.*]] = alloca i32, align 4
-// CHECK3-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK3-NEXT:    [[_TMP5:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK3-NEXT:    store i32 [[ARG]], ptr [[ARG_ADDR]], align 4
 // CHECK3-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 3, ptr [[TMP0]], align 4
@@ -3088,45 +3088,45 @@ int main() {
 // CHECK3-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP37:%.*]] = load i32, ptr [[ARG_ADDR]], align 4
 // CHECK3-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP37]], 0
-// CHECK3-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
-// CHECK3-NEXT:    store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1
+// CHECK3-NEXT:    [[STOREDV:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK3-NEXT:    store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR_]], align 1
 // CHECK3-NEXT:    [[TMP38:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1
-// CHECK3-NEXT:    [[TOBOOL5:%.*]] = trunc i8 [[TMP38]] to i1
-// CHECK3-NEXT:    [[TMP39:%.*]] = select i1 [[TOBOOL5]], i32 0, i32 1
+// CHECK3-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP38]] to i1
+// CHECK3-NEXT:    [[TMP39:%.*]] = select i1 [[LOADEDV]], i32 0, i32 1
 // CHECK3-NEXT:    [[TMP40:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP39]], 0
-// CHECK3-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0
+// CHECK3-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0
 // CHECK3-NEXT:    store i32 3, ptr [[TMP41]], align 4
-// CHECK3-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1
+// CHECK3-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1
 // CHECK3-NEXT:    store i32 1, ptr [[TMP42]], align 4
-// CHECK3-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 2
+// CHECK3-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 2
 // CHECK3-NEXT:    store ptr [[TMP35]], ptr [[TMP43]], align 8
-// CHECK3-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 3
+// CHECK3-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 3
 // CHECK3-NEXT:    store ptr [[TMP36]], ptr [[TMP44]], align 8
-// CHECK3-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 4
+// CHECK3-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 4
 // CHECK3-NEXT:    store ptr @.offload_sizes.2, ptr [[TMP45]], align 8
-// CHECK3-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 5
+// CHECK3-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 5
 // CHECK3-NEXT:    store ptr @.offload_maptypes.3, ptr [[TMP46]], align 8
-// CHECK3-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 6
+// CHECK3-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 6
 // CHECK3-NEXT:    store ptr null, ptr [[TMP47]], align 8
-// CHECK3-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 7
+// CHECK3-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 7
 // CHECK3-NEXT:    store ptr null, ptr [[TMP48]], align 8
-// CHECK3-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 8
+// CHECK3-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 8
 // CHECK3-NEXT:    store i64 100, ptr [[TMP49]], align 8
-// CHECK3-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 9
+// CHECK3-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 9
 // CHECK3-NEXT:    store i64 0, ptr [[TMP50]], align 8
-// CHECK3-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 10
+// CHECK3-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 10
 // CHECK3-NEXT:    store [3 x i32] zeroinitializer, ptr [[TMP51]], align 4
-// CHECK3-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 11
+// CHECK3-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 11
 // CHECK3-NEXT:    store [3 x i32] [[TMP40]], ptr [[TMP52]], align 4
-// CHECK3-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 12
+// CHECK3-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 12
 // CHECK3-NEXT:    store i32 0, ptr [[TMP53]], align 4
-// CHECK3-NEXT:    [[TMP54:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 [[TMP39]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l67.region_id, ptr [[KERNEL_ARGS7]])
+// CHECK3-NEXT:    [[TMP54:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 [[TMP39]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l67.region_id, ptr [[KERNEL_ARGS6]])
 // CHECK3-NEXT:    [[TMP55:%.*]] = icmp ne i32 [[TMP54]], 0
-// CHECK3-NEXT:    br i1 [[TMP55]], label [[OMP_OFFLOAD_FAILED8:%.*]], label [[OMP_OFFLOAD_CONT9:%.*]]
-// CHECK3:       omp_offload.failed8:
+// CHECK3-NEXT:    br i1 [[TMP55]], label [[OMP_OFFLOAD_FAILED7:%.*]], label [[OMP_OFFLOAD_CONT8:%.*]]
+// CHECK3:       omp_offload.failed7:
 // CHECK3-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l67(i64 [[TMP31]]) #[[ATTR2]]
-// CHECK3-NEXT:    br label [[OMP_OFFLOAD_CONT9]]
-// CHECK3:       omp_offload.cont9:
+// CHECK3-NEXT:    br label [[OMP_OFFLOAD_CONT8]]
+// CHECK3:       omp_offload.cont8:
 // CHECK3-NEXT:    ret i32 0
 //
 //
@@ -3448,12 +3448,12 @@ int main() {
 // CHECK3-NEXT:    store i64 [[ARG]], ptr [[ARG_ADDR]], align 8
 // CHECK3-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARG_ADDR]], align 4
 // CHECK3-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0
-// CHECK3-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
-// CHECK3-NEXT:    store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1
+// CHECK3-NEXT:    [[STOREDV:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK3-NEXT:    store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR_]], align 1
 // CHECK3-NEXT:    [[TMP1:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1
-// CHECK3-NEXT:    [[TOBOOL1:%.*]] = trunc i8 [[TMP1]] to i1
-// CHECK3-NEXT:    [[FROMBOOL2:%.*]] = zext i1 [[TOBOOL1]] to i8
-// CHECK3-NEXT:    store i8 [[FROMBOOL2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
+// CHECK3-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP1]] to i1
+// CHECK3-NEXT:    [[STOREDV1:%.*]] = zext i1 [[LOADEDV]] to i8
+// CHECK3-NEXT:    store i8 [[STOREDV1]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
 // CHECK3-NEXT:    [[TMP2:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8
 // CHECK3-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l67.omp_outlined, i64 [[TMP2]])
 // CHECK3-NEXT:    ret void
@@ -3508,8 +3508,8 @@ int main() {
 // CHECK3-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP55]]
 // CHECK3-NEXT:    [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
 // CHECK3-NEXT:    [[TMP11:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !llvm.access.group [[ACC_GRP55]]
-// CHECK3-NEXT:    [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1
-// CHECK3-NEXT:    br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
+// CHECK3-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP11]] to i1
+// CHECK3-NEXT:    br i1 [[LOADEDV]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
 // CHECK3:       omp_if.then:
 // CHECK3-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l67.omp_outlined.omp_outlined, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP55]]
 // CHECK3-NEXT:    br label [[OMP_IF_END:%.*]]
@@ -3762,8 +3762,8 @@ int main() {
 // CHECK5-NEXT:    store i32 100, ptr [[I6]], align 4
 // CHECK5-NEXT:    [[TMP10:%.*]] = load i32, ptr @Arg, align 4
 // CHECK5-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP10]], 0
-// CHECK5-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
-// CHECK5-NEXT:    store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1
+// CHECK5-NEXT:    [[STOREDV:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK5-NEXT:    store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR_]], align 1
 // CHECK5-NEXT:    store i32 0, ptr [[DOTOMP_LB17]], align 4
 // CHECK5-NEXT:    store i32 99, ptr [[DOTOMP_UB18]], align 4
 // CHECK5-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB17]], align 4
@@ -3870,8 +3870,8 @@ int main() {
 // CHECK5-NEXT:    store i32 100, ptr [[I6]], align 4
 // CHECK5-NEXT:    [[TMP10:%.*]] = load i32, ptr [[ARG_ADDR]], align 4
 // CHECK5-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP10]], 0
-// CHECK5-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
-// CHECK5-NEXT:    store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1
+// CHECK5-NEXT:    [[STOREDV:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK5-NEXT:    store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR_]], align 1
 // CHECK5-NEXT:    store i32 0, ptr [[DOTOMP_LB17]], align 4
 // CHECK5-NEXT:    store i32 99, ptr [[DOTOMP_UB18]], align 4
 // CHECK5-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB17]], align 4
@@ -4043,60 +4043,60 @@ int main() {
 // CHECK7-NEXT:    store i32 100, ptr [[I6]], align 4
 // CHECK7-NEXT:    [[TMP10:%.*]] = load i32, ptr @Arg, align 4
 // CHECK7-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP10]], 0
-// CHECK7-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
-// CHECK7-NEXT:    store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1
+// CHECK7-NEXT:    [[STOREDV:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK7-NEXT:    store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR_]], align 1
 // CHECK7-NEXT:    store i32 0, ptr [[DOTOMP_LB17]], align 4
 // CHECK7-NEXT:    store i32 99, ptr [[DOTOMP_UB18]], align 4
 // CHECK7-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB17]], align 4
 // CHECK7-NEXT:    store i32 [[TMP11]], ptr [[DOTOMP_IV19]], align 4
 // CHECK7-NEXT:    [[TMP12:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1
-// CHECK7-NEXT:    [[TOBOOL21:%.*]] = trunc i8 [[TMP12]] to i1
-// CHECK7-NEXT:    br i1 [[TOBOOL21]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
+// CHECK7-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP12]] to i1
+// CHECK7-NEXT:    br i1 [[LOADEDV]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
 // CHECK7:       omp_if.then:
-// CHECK7-NEXT:    br label [[OMP_INNER_FOR_COND22:%.*]]
-// CHECK7:       omp.inner.for.cond22:
+// CHECK7-NEXT:    br label [[OMP_INNER_FOR_COND21:%.*]]
+// CHECK7:       omp.inner.for.cond21:
 // CHECK7-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV19]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]]
 // CHECK7-NEXT:    [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB18]], align 4, !llvm.access.group [[ACC_GRP14]]
-// CHECK7-NEXT:    [[CMP23:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]]
-// CHECK7-NEXT:    br i1 [[CMP23]], label [[OMP_INNER_FOR_BODY24:%.*]], label [[OMP_INNER_FOR_END30:%.*]]
-// CHECK7:       omp.inner.for.body24:
+// CHECK7-NEXT:    [[CMP22:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]]
+// CHECK7-NEXT:    br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]]
+// CHECK7:       omp.inner.for.body23:
 // CHECK7-NEXT:    [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV19]], align 4, !llvm.access.group [[ACC_GRP14]]
-// CHECK7-NEXT:    [[MUL25:%.*]] = mul nsw i32 [[TMP15]], 1
-// CHECK7-NEXT:    [[ADD26:%.*]] = add nsw i32 0, [[MUL25]]
-// CHECK7-NEXT:    store i32 [[ADD26]], ptr [[I20]], align 4, !llvm.access.group [[ACC_GRP14]]
+// CHECK7-NEXT:    [[MUL24:%.*]] = mul nsw i32 [[TMP15]], 1
+// CHECK7-NEXT:    [[ADD25:%.*]] = add nsw i32 0, [[MUL24]]
+// CHECK7-NEXT:    store i32 [[ADD25]], ptr [[I20]], align 4, !llvm.access.group [[ACC_GRP14]]
 // CHECK7-NEXT:    call void @_Z3fn6v(), !llvm.access.group [[ACC_GRP14]]
-// CHECK7-NEXT:    br label [[OMP_BODY_CONTINUE27:%.*]]
-// CHECK7:       omp.body.continue27:
-// CHECK7-NEXT:    br label [[OMP_INNER_FOR_INC28:%.*]]
-// CHECK7:       omp.inner.for.inc28:
+// CHECK7-NEXT:    br label [[OMP_BODY_CONTINUE26:%.*]]
+// CHECK7:       omp.body.continue26:
+// CHECK7-NEXT:    br label [[OMP_INNER_FOR_INC27:%.*]]
+// CHECK7:       omp.inner.for.inc27:
 // CHECK7-NEXT:    [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV19]], align 4, !llvm.access.group [[ACC_GRP14]]
-// CHECK7-NEXT:    [[ADD29:%.*]] = add nsw i32 [[TMP16]], 1
-// CHECK7-NEXT:    store i32 [[ADD29]], ptr [[DOTOMP_IV19]], align 4, !llvm.access.group [[ACC_GRP14]]
-// CHECK7-NEXT:    br label [[OMP_INNER_FOR_COND22]], !llvm.loop [[LOOP15:![0-9]+]]
-// CHECK7:       omp.inner.for.end30:
+// CHECK7-NEXT:    [[ADD28:%.*]] = add nsw i32 [[TMP16]], 1
+// CHECK7-NEXT:    store i32 [[ADD28]], ptr [[DOTOMP_IV19]], align 4, !llvm.access.group [[ACC_GRP14]]
+// CHECK7-NEXT:    br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP15:![0-9]+]]
+// CHECK7:       omp.inner.for.end29:
 // CHECK7-NEXT:    br label [[OMP_IF_END:%.*]]
 // CHECK7:       omp_if.else:
-// CHECK7-NEXT:    br label [[OMP_INNER_FOR_COND31:%.*]]
-// CHECK7:       omp.inner.for.cond31:
+// CHECK7-NEXT:    br label [[OMP_INNER_FOR_COND30:%.*]]
+// CHECK7:       omp.inner.for.cond30:
 // CHECK7-NEXT:    [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV19]], align 4
 // CHECK7-NEXT:    [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB18]], align 4
-// CHECK7-NEXT:    [[CMP32:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]]
-// CHECK7-NEXT:    br i1 [[CMP32]], label [[OMP_INNER_FOR_BODY33:%.*]], label [[OMP_INNER_FOR_END39:%.*]]
-// CHECK7:       omp.inner.for.body33:
+// CHECK7-NEXT:    [[CMP31:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]]
+// CHECK7-NEXT:    br i1 [[CMP31]], label [[OMP_INNER_FOR_BODY32:%.*]], label [[OMP_INNER_FOR_END38:%.*]]
+// CHECK7:       omp.inner.for.body32:
 // CHECK7-NEXT:    [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV19]], align 4
-// CHECK7-NEXT:    [[MUL34:%.*]] = mul nsw i32 [[TMP19]], 1
-// CHECK7-NEXT:    [[ADD35:%.*]] = add nsw i32 0, [[MUL34]]
-// CHECK7-NEXT:    store i32 [[ADD35]], ptr [[I20]], align 4
+// CHECK7-NEXT:    [[MUL33:%.*]] = mul nsw i32 [[TMP19]], 1
+// CHECK7-NEXT:    [[ADD34:%.*]] = add nsw i32 0, [[MUL33]]
+// CHECK7-NEXT:    store i32 [[ADD34]], ptr [[I20]], align 4
 // CHECK7-NEXT:    call void @_Z3fn6v()
-// CHECK7-NEXT:    br label [[OMP_BODY_CONTINUE36:%.*]]
-// CHECK7:       omp.body.continue36:
-// CHECK7-NEXT:    br label [[OMP_INNER_FOR_INC37:%.*]]
-// CHECK7:       omp.inner.for.inc37:
+// CHECK7-NEXT:    br label [[OMP_BODY_CONTINUE35:%.*]]
+// CHECK7:       omp.body.continue35:
+// CHECK7-NEXT:    br label [[OMP_INNER_FOR_INC36:%.*]]
+// CHECK7:       omp.inner.for.inc36:
 // CHECK7-NEXT:    [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV19]], align 4
-// CHECK7-NEXT:    [[ADD38:%.*]] = add nsw i32 [[TMP20]], 1
-// CHECK7-NEXT:    store i32 [[ADD38]], ptr [[DOTOMP_IV19]], align 4
-// CHECK7-NEXT:    br label [[OMP_INNER_FOR_COND31]], !llvm.loop [[LOOP17:![0-9]+]]
-// CHECK7:       omp.inner.for.end39:
+// CHECK7-NEXT:    [[ADD37:%.*]] = add nsw i32 [[TMP20]], 1
+// CHECK7-NEXT:    store i32 [[ADD37]], ptr [[DOTOMP_IV19]], align 4
+// CHECK7-NEXT:    br label [[OMP_INNER_FOR_COND30]], !llvm.loop [[LOOP17:![0-9]+]]
+// CHECK7:       omp.inner.for.end38:
 // CHECK7-NEXT:    br label [[OMP_IF_END]]
 // CHECK7:       omp_if.end:
 // CHECK7-NEXT:    store i32 100, ptr [[I20]], align 4
@@ -4180,8 +4180,8 @@ int main() {
 // CHECK7-NEXT:    store i32 100, ptr [[I6]], align 4
 // CHECK7-NEXT:    [[TMP10:%.*]] = load i32, ptr [[ARG_ADDR]], align 4
 // CHECK7-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP10]], 0
-// CHECK7-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
-// CHECK7-NEXT:    store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1
+// CHECK7-NEXT:    [[STOREDV:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK7-NEXT:    store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR_]], align 1
 // CHECK7-NEXT:    store i32 0, ptr [[DOTOMP_LB17]], align 4
 // CHECK7-NEXT:    store i32 99, ptr [[DOTOMP_UB18]], align 4
 // CHECK7-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB17]], align 4
@@ -4608,8 +4608,8 @@ int main() {
 // CHECK9-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1
-// CHECK9-NEXT:    [[_TMP6:%.*]] = alloca i32, align 4
-// CHECK9-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK9-NEXT:    [[_TMP5:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK9-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 3, ptr [[TMP0]], align 4
@@ -4690,45 +4690,45 @@ int main() {
 // CHECK9-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP37:%.*]] = load i32, ptr @Arg, align 4
 // CHECK9-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP37]], 0
-// CHECK9-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
-// CHECK9-NEXT:    store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1
+// CHECK9-NEXT:    [[STOREDV:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK9-NEXT:    store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR_]], align 1
 // CHECK9-NEXT:    [[TMP38:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1
-// CHECK9-NEXT:    [[TOBOOL5:%.*]] = trunc i8 [[TMP38]] to i1
-// CHECK9-NEXT:    [[TMP39:%.*]] = select i1 [[TOBOOL5]], i32 0, i32 1
+// CHECK9-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP38]] to i1
+// CHECK9-NEXT:    [[TMP39:%.*]] = select i1 [[LOADEDV]], i32 0, i32 1
 // CHECK9-NEXT:    [[TMP40:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP39]], 0
-// CHECK9-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 3, ptr [[TMP41]], align 4
-// CHECK9-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1
+// CHECK9-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1
 // CHECK9-NEXT:    store i32 1, ptr [[TMP42]], align 4
-// CHECK9-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 2
 // CHECK9-NEXT:    store ptr [[TMP35]], ptr [[TMP43]], align 8
-// CHECK9-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 3
+// CHECK9-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 3
 // CHECK9-NEXT:    store ptr [[TMP36]], ptr [[TMP44]], align 8
-// CHECK9-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 4
+// CHECK9-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 4
 // CHECK9-NEXT:    store ptr @.offload_sizes, ptr [[TMP45]], align 8
-// CHECK9-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 5
+// CHECK9-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 5
 // CHECK9-NEXT:    store ptr @.offload_maptypes, ptr [[TMP46]], align 8
-// CHECK9-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 6
+// CHECK9-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 6
 // CHECK9-NEXT:    store ptr null, ptr [[TMP47]], align 8
-// CHECK9-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 7
+// CHECK9-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 7
 // CHECK9-NEXT:    store ptr null, ptr [[TMP48]], align 8
-// CHECK9-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 8
+// CHECK9-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 8
 // CHECK9-NEXT:    store i64 100, ptr [[TMP49]], align 8
-// CHECK9-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 9
+// CHECK9-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 9
 // CHECK9-NEXT:    store i64 0, ptr [[TMP50]], align 8
-// CHECK9-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 10
+// CHECK9-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 10
 // CHECK9-NEXT:    store [3 x i32] zeroinitializer, ptr [[TMP51]], align 4
-// CHECK9-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 11
+// CHECK9-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 11
 // CHECK9-NEXT:    store [3 x i32] [[TMP40]], ptr [[TMP52]], align 4
-// CHECK9-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 12
+// CHECK9-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 12
 // CHECK9-NEXT:    store i32 0, ptr [[TMP53]], align 4
-// CHECK9-NEXT:    [[TMP54:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 [[TMP39]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l92.region_id, ptr [[KERNEL_ARGS7]])
+// CHECK9-NEXT:    [[TMP54:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 [[TMP39]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l92.region_id, ptr [[KERNEL_ARGS6]])
 // CHECK9-NEXT:    [[TMP55:%.*]] = icmp ne i32 [[TMP54]], 0
-// CHECK9-NEXT:    br i1 [[TMP55]], label [[OMP_OFFLOAD_FAILED8:%.*]], label [[OMP_OFFLOAD_CONT9:%.*]]
-// CHECK9:       omp_offload.failed8:
+// CHECK9-NEXT:    br i1 [[TMP55]], label [[OMP_OFFLOAD_FAILED7:%.*]], label [[OMP_OFFLOAD_CONT8:%.*]]
+// CHECK9:       omp_offload.failed7:
 // CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l92(i64 [[TMP31]]) #[[ATTR2]]
-// CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT9]]
-// CHECK9:       omp_offload.cont9:
+// CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT8]]
+// CHECK9:       omp_offload.cont8:
 // CHECK9-NEXT:    [[TMP56:%.*]] = load i32, ptr @Arg, align 4
 // CHECK9-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiEiT_(i32 noundef [[TMP56]])
 // CHECK9-NEXT:    ret i32 [[CALL]]
@@ -5052,12 +5052,12 @@ int main() {
 // CHECK9-NEXT:    store i64 [[ARG]], ptr [[ARG_ADDR]], align 8
 // CHECK9-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARG_ADDR]], align 4
 // CHECK9-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0
-// CHECK9-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
-// CHECK9-NEXT:    store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1
+// CHECK9-NEXT:    [[STOREDV:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK9-NEXT:    store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR_]], align 1
 // CHECK9-NEXT:    [[TMP1:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1
-// CHECK9-NEXT:    [[TOBOOL1:%.*]] = trunc i8 [[TMP1]] to i1
-// CHECK9-NEXT:    [[FROMBOOL2:%.*]] = zext i1 [[TOBOOL1]] to i8
-// CHECK9-NEXT:    store i8 [[FROMBOOL2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
+// CHECK9-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP1]] to i1
+// CHECK9-NEXT:    [[STOREDV1:%.*]] = zext i1 [[LOADEDV]] to i8
+// CHECK9-NEXT:    store i8 [[STOREDV1]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
 // CHECK9-NEXT:    [[TMP2:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8
 // CHECK9-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l92.omp_outlined, i64 [[TMP2]])
 // CHECK9-NEXT:    ret void
@@ -5112,8 +5112,8 @@ int main() {
 // CHECK9-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP38]]
 // CHECK9-NEXT:    [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
 // CHECK9-NEXT:    [[TMP11:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !llvm.access.group [[ACC_GRP38]]
-// CHECK9-NEXT:    [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1
-// CHECK9-NEXT:    br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
+// CHECK9-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP11]] to i1
+// CHECK9-NEXT:    br i1 [[LOADEDV]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
 // CHECK9:       omp_if.then:
 // CHECK9-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l92.omp_outlined.omp_outlined, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP38]]
 // CHECK9-NEXT:    br label [[OMP_IF_END:%.*]]
@@ -5237,8 +5237,8 @@ int main() {
 // CHECK9-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK9-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1
-// CHECK9-NEXT:    [[_TMP6:%.*]] = alloca i32, align 4
-// CHECK9-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK9-NEXT:    [[_TMP5:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK9-NEXT:    store i32 [[ARG]], ptr [[ARG_ADDR]], align 4
 // CHECK9-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 3, ptr [[TMP0]], align 4
@@ -5319,45 +5319,45 @@ int main() {
 // CHECK9-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP37:%.*]] = load i32, ptr [[ARG_ADDR]], align 4
 // CHECK9-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP37]], 0
-// CHECK9-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
-// CHECK9-NEXT:    store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1
+// CHECK9-NEXT:    [[STOREDV:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK9-NEXT:    store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR_]], align 1
 // CHECK9-NEXT:    [[TMP38:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1
-// CHECK9-NEXT:    [[TOBOOL5:%.*]] = trunc i8 [[TMP38]] to i1
-// CHECK9-NEXT:    [[TMP39:%.*]] = select i1 [[TOBOOL5]], i32 0, i32 1
+// CHECK9-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP38]] to i1
+// CHECK9-NEXT:    [[TMP39:%.*]] = select i1 [[LOADEDV]], i32 0, i32 1
 // CHECK9-NEXT:    [[TMP40:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP39]], 0
-// CHECK9-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0
 // CHECK9-NEXT:    store i32 3, ptr [[TMP41]], align 4
-// CHECK9-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1
+// CHECK9-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1
 // CHECK9-NEXT:    store i32 1, ptr [[TMP42]], align 4
-// CHECK9-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 2
 // CHECK9-NEXT:    store ptr [[TMP35]], ptr [[TMP43]], align 8
-// CHECK9-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 3
+// CHECK9-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 3
 // CHECK9-NEXT:    store ptr [[TMP36]], ptr [[TMP44]], align 8
-// CHECK9-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 4
+// CHECK9-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 4
 // CHECK9-NEXT:    store ptr @.offload_sizes.1, ptr [[TMP45]], align 8
-// CHECK9-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 5
+// CHECK9-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 5
 // CHECK9-NEXT:    store ptr @.offload_maptypes.2, ptr [[TMP46]], align 8
-// CHECK9-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 6
+// CHECK9-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 6
 // CHECK9-NEXT:    store ptr null, ptr [[TMP47]], align 8
-// CHECK9-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 7
+// CHECK9-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 7
 // CHECK9-NEXT:    store ptr null, ptr [[TMP48]], align 8
-// CHECK9-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 8
+// CHECK9-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 8
 // CHECK9-NEXT:    store i64 100, ptr [[TMP49]], align 8
-// CHECK9-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 9
+// CHECK9-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 9
 // CHECK9-NEXT:    store i64 0, ptr [[TMP50]], align 8
-// CHECK9-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 10
+// CHECK9-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 10
 // CHECK9-NEXT:    store [3 x i32] zeroinitializer, ptr [[TMP51]], align 4
-// CHECK9-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 11
+// CHECK9-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 11
 // CHECK9-NEXT:    store [3 x i32] [[TMP40]], ptr [[TMP52]], align 4
-// CHECK9-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 12
+// CHECK9-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 12
 // CHECK9-NEXT:    store i32 0, ptr [[TMP53]], align 4
-// CHECK9-NEXT:    [[TMP54:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 [[TMP39]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l67.region_id, ptr [[KERNEL_ARGS7]])
+// CHECK9-NEXT:    [[TMP54:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 [[TMP39]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l67.region_id, ptr [[KERNEL_ARGS6]])
 // CHECK9-NEXT:    [[TMP55:%.*]] = icmp ne i32 [[TMP54]], 0
-// CHECK9-NEXT:    br i1 [[TMP55]], label [[OMP_OFFLOAD_FAILED8:%.*]], label [[OMP_OFFLOAD_CONT9:%.*]]
-// CHECK9:       omp_offload.failed8:
+// CHECK9-NEXT:    br i1 [[TMP55]], label [[OMP_OFFLOAD_FAILED7:%.*]], label [[OMP_OFFLOAD_CONT8:%.*]]
+// CHECK9:       omp_offload.failed7:
 // CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l67(i64 [[TMP31]]) #[[ATTR2]]
-// CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT9]]
-// CHECK9:       omp_offload.cont9:
+// CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT8]]
+// CHECK9:       omp_offload.cont8:
 // CHECK9-NEXT:    ret i32 0
 //
 //
@@ -5679,12 +5679,12 @@ int main() {
 // CHECK9-NEXT:    store i64 [[ARG]], ptr [[ARG_ADDR]], align 8
 // CHECK9-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARG_ADDR]], align 4
 // CHECK9-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0
-// CHECK9-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
-// CHECK9-NEXT:    store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1
+// CHECK9-NEXT:    [[STOREDV:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK9-NEXT:    store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR_]], align 1
 // CHECK9-NEXT:    [[TMP1:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1
-// CHECK9-NEXT:    [[TOBOOL1:%.*]] = trunc i8 [[TMP1]] to i1
-// CHECK9-NEXT:    [[FROMBOOL2:%.*]] = zext i1 [[TOBOOL1]] to i8
-// CHECK9-NEXT:    store i8 [[FROMBOOL2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
+// CHECK9-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP1]] to i1
+// CHECK9-NEXT:    [[STOREDV1:%.*]] = zext i1 [[LOADEDV]] to i8
+// CHECK9-NEXT:    store i8 [[STOREDV1]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
 // CHECK9-NEXT:    [[TMP2:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8
 // CHECK9-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l67.omp_outlined, i64 [[TMP2]])
 // CHECK9-NEXT:    ret void
@@ -5739,8 +5739,8 @@ int main() {
 // CHECK9-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP56]]
 // CHECK9-NEXT:    [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
 // CHECK9-NEXT:    [[TMP11:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !llvm.access.group [[ACC_GRP56]]
-// CHECK9-NEXT:    [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1
-// CHECK9-NEXT:    br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
+// CHECK9-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP11]] to i1
+// CHECK9-NEXT:    br i1 [[LOADEDV]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
 // CHECK9:       omp_if.then:
 // CHECK9-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l67.omp_outlined.omp_outlined, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP56]]
 // CHECK9-NEXT:    br label [[OMP_IF_END:%.*]]
@@ -6248,8 +6248,8 @@ int main() {
 // CHECK11-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1
-// CHECK11-NEXT:    [[_TMP6:%.*]] = alloca i32, align 4
-// CHECK11-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK11-NEXT:    [[_TMP5:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 // CHECK11-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 3, ptr [[TMP0]], align 4
@@ -6330,45 +6330,45 @@ int main() {
 // CHECK11-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP37:%.*]] = load i32, ptr @Arg, align 4
 // CHECK11-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP37]], 0
-// CHECK11-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
-// CHECK11-NEXT:    store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1
+// CHECK11-NEXT:    [[STOREDV:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK11-NEXT:    store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR_]], align 1
 // CHECK11-NEXT:    [[TMP38:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1
-// CHECK11-NEXT:    [[TOBOOL5:%.*]] = trunc i8 [[TMP38]] to i1
-// CHECK11-NEXT:    [[TMP39:%.*]] = select i1 [[TOBOOL5]], i32 0, i32 1
+// CHECK11-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP38]] to i1
+// CHECK11-NEXT:    [[TMP39:%.*]] = select i1 [[LOADEDV]], i32 0, i32 1
 // CHECK11-NEXT:    [[TMP40:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP39]], 0
-// CHECK11-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 3, ptr [[TMP41]], align 4
-// CHECK11-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1
+// CHECK11-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1
 // CHECK11-NEXT:    store i32 1, ptr [[TMP42]], align 4
-// CHECK11-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 2
 // CHECK11-NEXT:    store ptr [[TMP35]], ptr [[TMP43]], align 8
-// CHECK11-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 3
+// CHECK11-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 3
 // CHECK11-NEXT:    store ptr [[TMP36]], ptr [[TMP44]], align 8
-// CHECK11-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 4
+// CHECK11-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 4
 // CHECK11-NEXT:    store ptr @.offload_sizes, ptr [[TMP45]], align 8
-// CHECK11-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 5
+// CHECK11-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 5
 // CHECK11-NEXT:    store ptr @.offload_maptypes, ptr [[TMP46]], align 8
-// CHECK11-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 6
+// CHECK11-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 6
 // CHECK11-NEXT:    store ptr null, ptr [[TMP47]], align 8
-// CHECK11-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 7
+// CHECK11-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 7
 // CHECK11-NEXT:    store ptr null, ptr [[TMP48]], align 8
-// CHECK11-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 8
+// CHECK11-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 8
 // CHECK11-NEXT:    store i64 100, ptr [[TMP49]], align 8
-// CHECK11-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 9
+// CHECK11-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 9
 // CHECK11-NEXT:    store i64 0, ptr [[TMP50]], align 8
-// CHECK11-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 10
+// CHECK11-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 10
 // CHECK11-NEXT:    store [3 x i32] zeroinitializer, ptr [[TMP51]], align 4
-// CHECK11-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 11
+// CHECK11-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 11
 // CHECK11-NEXT:    store [3 x i32] [[TMP40]], ptr [[TMP52]], align 4
-// CHECK11-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 12
+// CHECK11-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 12
 // CHECK11-NEXT:    store i32 0, ptr [[TMP53]], align 4
-// CHECK11-NEXT:    [[TMP54:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 [[TMP39]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l92.region_id, ptr [[KERNEL_ARGS7]])
+// CHECK11-NEXT:    [[TMP54:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 [[TMP39]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l92.region_id, ptr [[KERNEL_ARGS6]])
 // CHECK11-NEXT:    [[TMP55:%.*]] = icmp ne i32 [[TMP54]], 0
-// CHECK11-NEXT:    br i1 [[TMP55]], label [[OMP_OFFLOAD_FAILED8:%.*]], label [[OMP_OFFLOAD_CONT9:%.*]]
-// CHECK11:       omp_offload.failed8:
+// CHECK11-NEXT:    br i1 [[TMP55]], label [[OMP_OFFLOAD_FAILED7:%.*]], label [[OMP_OFFLOAD_CONT8:%.*]]
+// CHECK11:       omp_offload.failed7:
 // CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l92(i64 [[TMP31]]) #[[ATTR2]]
-// CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT9]]
-// CHECK11:       omp_offload.cont9:
+// CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT8]]
+// CHECK11:       omp_offload.cont8:
 // CHECK11-NEXT:    [[TMP56:%.*]] = load i32, ptr @Arg, align 4
 // CHECK11-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiEiT_(i32 noundef [[TMP56]])
 // CHECK11-NEXT:    ret i32 [[CALL]]
@@ -6692,12 +6692,12 @@ int main() {
 // CHECK11-NEXT:    store i64 [[ARG]], ptr [[ARG_ADDR]], align 8
 // CHECK11-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARG_ADDR]], align 4
 // CHECK11-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0
-// CHECK11-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
-// CHECK11-NEXT:    store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1
+// CHECK11-NEXT:    [[STOREDV:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK11-NEXT:    store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR_]], align 1
 // CHECK11-NEXT:    [[TMP1:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1
-// CHECK11-NEXT:    [[TOBOOL1:%.*]] = trunc i8 [[TMP1]] to i1
-// CHECK11-NEXT:    [[FROMBOOL2:%.*]] = zext i1 [[TOBOOL1]] to i8
-// CHECK11-NEXT:    store i8 [[FROMBOOL2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
+// CHECK11-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP1]] to i1
+// CHECK11-NEXT:    [[STOREDV1:%.*]] = zext i1 [[LOADEDV]] to i8
+// CHECK11-NEXT:    store i8 [[STOREDV1]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
 // CHECK11-NEXT:    [[TMP2:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8
 // CHECK11-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l92.omp_outlined, i64 [[TMP2]])
 // CHECK11-NEXT:    ret void
@@ -6744,8 +6744,8 @@ int main() {
 // CHECK11-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
 // CHECK11-NEXT:    store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
 // CHECK11-NEXT:    [[TMP5:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
-// CHECK11-NEXT:    [[TOBOOL:%.*]] = trunc i8 [[TMP5]] to i1
-// CHECK11-NEXT:    br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE5:%.*]]
+// CHECK11-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP5]] to i1
+// CHECK11-NEXT:    br i1 [[LOADEDV]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE5:%.*]]
 // CHECK11:       omp_if.then:
 // CHECK11-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
 // CHECK11:       omp.inner.for.cond:
@@ -6759,13 +6759,13 @@ int main() {
 // CHECK11-NEXT:    [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]]
 // CHECK11-NEXT:    [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
 // CHECK11-NEXT:    [[TMP12:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !llvm.access.group [[ACC_GRP35]]
-// CHECK11-NEXT:    [[TOBOOL2:%.*]] = trunc i8 [[TMP12]] to i1
-// CHECK11-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TOBOOL2]] to i8
-// CHECK11-NEXT:    store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1, !llvm.access.group [[ACC_GRP35]]
+// CHECK11-NEXT:    [[LOADEDV2:%.*]] = trunc i8 [[TMP12]] to i1
+// CHECK11-NEXT:    [[STOREDV:%.*]] = zext i1 [[LOADEDV2]] to i8
+// CHECK11-NEXT:    store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1, !llvm.access.group [[ACC_GRP35]]
 // CHECK11-NEXT:    [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group [[ACC_GRP35]]
 // CHECK11-NEXT:    [[TMP14:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !llvm.access.group [[ACC_GRP35]]
-// CHECK11-NEXT:    [[TOBOOL3:%.*]] = trunc i8 [[TMP14]] to i1
-// CHECK11-NEXT:    br i1 [[TOBOOL3]], label [[OMP_IF_THEN4:%.*]], label [[OMP_IF_ELSE:%.*]]
+// CHECK11-NEXT:    [[LOADEDV3:%.*]] = trunc i8 [[TMP14]] to i1
+// CHECK11-NEXT:    br i1 [[LOADEDV3]], label [[OMP_IF_THEN4:%.*]], label [[OMP_IF_ELSE:%.*]]
 // CHECK11:       omp_if.then4:
 // CHECK11-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l92.omp_outlined.omp_outlined, i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]), !llvm.access.group [[ACC_GRP35]]
 // CHECK11-NEXT:    br label [[OMP_IF_END:%.*]]
@@ -6799,13 +6799,13 @@ int main() {
 // CHECK11-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
 // CHECK11-NEXT:    [[TMP23:%.*]] = zext i32 [[TMP22]] to i64
 // CHECK11-NEXT:    [[TMP24:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
-// CHECK11-NEXT:    [[TOBOOL9:%.*]] = trunc i8 [[TMP24]] to i1
-// CHECK11-NEXT:    [[FROMBOOL11:%.*]] = zext i1 [[TOBOOL9]] to i8
-// CHECK11-NEXT:    store i8 [[FROMBOOL11]], ptr [[DOTCAPTURE_EXPR__CASTED10]], align 1
+// CHECK11-NEXT:    [[LOADEDV9:%.*]] = trunc i8 [[TMP24]] to i1
+// CHECK11-NEXT:    [[STOREDV11:%.*]] = zext i1 [[LOADEDV9]] to i8
+// CHECK11-NEXT:    store i8 [[STOREDV11]], ptr [[DOTCAPTURE_EXPR__CASTED10]], align 1
 // CHECK11-NEXT:    [[TMP25:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED10]], align 8
 // CHECK11-NEXT:    [[TMP26:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
-// CHECK11-NEXT:    [[TOBOOL12:%.*]] = trunc i8 [[TMP26]] to i1
-// CHECK11-NEXT:    br i1 [[TOBOOL12]], label [[OMP_IF_THEN13:%.*]], label [[OMP_IF_ELSE14:%.*]]
+// CHECK11-NEXT:    [[LOADEDV12:%.*]] = trunc i8 [[TMP26]] to i1
+// CHECK11-NEXT:    br i1 [[LOADEDV12]], label [[OMP_IF_THEN13:%.*]], label [[OMP_IF_ELSE14:%.*]]
 // CHECK11:       omp_if.then13:
 // CHECK11-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l92.omp_outlined.omp_outlined.1, i64 [[TMP21]], i64 [[TMP23]], i64 [[TMP25]])
 // CHECK11-NEXT:    br label [[OMP_IF_END16:%.*]]
@@ -6871,8 +6871,8 @@ int main() {
 // CHECK11-NEXT:    store i32 1, ptr [[DOTOMP_STRIDE]], align 4
 // CHECK11-NEXT:    store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK11-NEXT:    [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
-// CHECK11-NEXT:    [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1
-// CHECK11-NEXT:    br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
+// CHECK11-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
+// CHECK11-NEXT:    br i1 [[LOADEDV]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
 // CHECK11:       omp_if.then:
 // CHECK11-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK11-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
@@ -6998,8 +6998,8 @@ int main() {
 // CHECK11-NEXT:    store i32 1, ptr [[DOTOMP_STRIDE]], align 4
 // CHECK11-NEXT:    store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
 // CHECK11-NEXT:    [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
-// CHECK11-NEXT:    [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1
-// CHECK11-NEXT:    br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
+// CHECK11-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
+// CHECK11-NEXT:    br i1 [[LOADEDV]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
 // CHECK11:       omp_if.then:
 // CHECK11-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
 // CHECK11-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
@@ -7107,8 +7107,8 @@ int main() {
 // CHECK11-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
 // CHECK11-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
 // CHECK11-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1
-// CHECK11-NEXT:    [[_TMP6:%.*]] = alloca i32, align 4
-// CHECK11-NEXT:    [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK11-NEXT:    [[_TMP5:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
 // CHECK11-NEXT:    store i32 [[ARG]], ptr [[ARG_ADDR]], align 4
 // CHECK11-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 3, ptr [[TMP0]], align 4
@@ -7189,45 +7189,45 @@ int main() {
 // CHECK11-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP37:%.*]] = load i32, ptr [[ARG_ADDR]], align 4
 // CHECK11-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP37]], 0
-// CHECK11-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
-// CHECK11-NEXT:    store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1
+// CHECK11-NEXT:    [[STOREDV:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK11-NEXT:    store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR_]], align 1
 // CHECK11-NEXT:    [[TMP38:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1
-// CHECK11-NEXT:    [[TOBOOL5:%.*]] = trunc i8 [[TMP38]] to i1
-// CHECK11-NEXT:    [[TMP39:%.*]] = select i1 [[TOBOOL5]], i32 0, i32 1
+// CHECK11-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP38]] to i1
+// CHECK11-NEXT:    [[TMP39:%.*]] = select i1 [[LOADEDV]], i32 0, i32 1
 // CHECK11-NEXT:    [[TMP40:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP39]], 0
-// CHECK11-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0
 // CHECK11-NEXT:    store i32 3, ptr [[TMP41]], align 4
-// CHECK11-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1
+// CHECK11-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1
 // CHECK11-NEXT:    store i32 1, ptr [[TMP42]], align 4
-// CHECK11-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 2
 // CHECK11-NEXT:    store ptr [[TMP35]], ptr [[TMP43]], align 8
-// CHECK11-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 3
+// CHECK11-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 3
 // CHECK11-NEXT:    store ptr [[TMP36]], ptr [[TMP44]], align 8
-// CHECK11-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 4
+// CHECK11-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 4
 // CHECK11-NEXT:    store ptr @.offload_sizes.2, ptr [[TMP45]], align 8
-// CHECK11-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 5
+// CHECK11-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 5
 // CHECK11-NEXT:    store ptr @.offload_maptypes.3, ptr [[TMP46]], align 8
-// CHECK11-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 6
+// CHECK11-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 6
 // CHECK11-NEXT:    store ptr null, ptr [[TMP47]], align 8
-// CHECK11-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 7
+// CHECK11-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 7
 // CHECK11-NEXT:    store ptr null, ptr [[TMP48]], align 8
-// CHECK11-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 8
+// CHECK11-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 8
 // CHECK11-NEXT:    store i64 100, ptr [[TMP49]], align 8
-// CHECK11-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 9
+// CHECK11-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 9
 // CHECK11-NEXT:    store i64 0, ptr [[TMP50]], align 8
-// CHECK11-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 10
+// CHECK11-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 10
 // CHECK11-NEXT:    store [3 x i32] zeroinitializer, ptr [[TMP51]], align 4
-// CHECK11-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 11
+// CHECK11-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 11
 // CHECK11-NEXT:    store [3 x i32] [[TMP40]], ptr [[TMP52]], align 4
-// CHECK11-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 12
+// CHECK11-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 12
 // CHECK11-NEXT:    store i32 0, ptr [[TMP53]], align 4
-// CHECK11-NEXT:    [[TMP54:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 [[TMP39]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l67.region_id, ptr [[KERNEL_ARGS7]])
+// CHECK11-NEXT:    [[TMP54:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 [[TMP39]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l67.region_id, ptr [[KERNEL_ARGS6]])
 // CHECK11-NEXT:    [[TMP55:%.*]] = icmp ne i32 [[TMP54]], 0
-// CHECK11-NEXT:    br i1 [[TMP55]], label [[OMP_OFFLOAD_FAILED8:%.*]], label [[OMP_OFFLOAD_CONT9:%.*]]
-// CHECK11:       omp_offload.failed8:
+// CHECK11-NEXT:    br i1 [[TMP55]], label [[OMP_OFFLOAD_FAILED7:%.*]], label [[OMP_OFFLOAD_CONT8:%.*]]
+// CHECK11:       omp_offload.failed7:
 // CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l67(i64 [[TMP31]]) #[[ATTR2]]
-// CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT9]]
-// CHECK11:       omp_offload.cont9:
+// CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT8]]
+// CHECK11:       omp_offload.cont8:
 // CHECK11-NEXT:    ret i32 0
 //
 //
@@ -7549,12 +7549,12 @@ int main() {
 // CHECK11-NEXT:    store i64 [[ARG]], ptr [[ARG_ADDR]], align 8
 // CHECK11-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARG_ADDR]], align 4
 // CHECK11-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0
-// CHECK11-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
-// CHECK11-NEXT:    store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1
+// CHECK11-NEXT:    [[STOREDV:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK11-NEXT:    store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR_]], align 1
 // CHECK11-NEXT:    [[TMP1:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1
-// CHECK11-NEXT:    [[TOBOOL1:%.*]] = trunc i8 [[TMP1]] to i1
-// CHECK11-NEXT:    [[FROMBOOL2:%.*]] = zext i1 [[TOBOOL1]] to i8
-// CHECK11-NEXT:    store i8 [[FROMBOOL2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
+// CHECK11-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP1]] to i1
+// CHECK11-NEXT:    [[STOREDV1:%.*]] = zext i1 [[LOADEDV]] to i8
+// CHECK11-NEXT:    store i8 [[STOREDV1]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
 // CHECK11-NEXT:    [[TMP2:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8
 // CHECK11-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l67.omp_outlined, i64 [[TMP2]])
 // CHECK11-NEXT:    ret void
@@ -7609,8 +7609,8 @@ int main() {
 // CHECK11-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP55]]
 // CHECK11-NEXT:    [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
 // CHECK11-NEXT:    [[TMP11:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !llvm.access.group [[ACC_GRP55]]
-// CHECK11-NEXT:    [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1
-// CHECK11-NEXT:    br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
+// CHECK11-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP11]] to i1
+// CHECK11-NEXT:    br i1 [[LOADEDV]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
 // CHECK11:       omp_if.then:
 // CHECK11-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l67.omp_outlined.omp_outlined, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP55]]
 // CHECK11-NEXT:    br label [[OMP_IF_END:%.*]]
@@ -7863,8 +7863,8 @@ int main() {
 // CHECK13-NEXT:    store i32 100, ptr [[I6]], align 4
 // CHECK13-NEXT:    [[TMP10:%.*]] = load i32, ptr @Arg, align 4
 // CHECK13-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP10]], 0
-// CHECK13-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
-// CHECK13-NEXT:    store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1
+// CHECK13-NEXT:    [[STOREDV:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK13-NEXT:    store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR_]], align 1
 // CHECK13-NEXT:    store i32 0, ptr [[DOTOMP_LB17]], align 4
 // CHECK13-NEXT:    store i32 99, ptr [[DOTOMP_UB18]], align 4
 // CHECK13-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB17]], align 4
@@ -7971,8 +7971,8 @@ int main() {
 // CHECK13-NEXT:    store i32 100, ptr [[I6]], align 4
 // CHECK13-NEXT:    [[TMP10:%.*]] = load i32, ptr [[ARG_ADDR]], align 4
 // CHECK13-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP10]], 0
-// CHECK13-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
-// CHECK13-NEXT:    store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1
+// CHECK13-NEXT:    [[STOREDV:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK13-NEXT:    store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR_]], align 1
 // CHECK13-NEXT:    store i32 0, ptr [[DOTOMP_LB17]], align 4
 // CHECK13-NEXT:    store i32 99, ptr [[DOTOMP_UB18]], align 4
 // CHECK13-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB17]], align 4
@@ -8144,60 +8144,60 @@ int main() {
 // CHECK15-NEXT:    store i32 100, ptr [[I6]], align 4
 // CHECK15-NEXT:    [[TMP10:%.*]] = load i32, ptr @Arg, align 4
 // CHECK15-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP10]], 0
-// CHECK15-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
-// CHECK15-NEXT:    store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1
+// CHECK15-NEXT:    [[STOREDV:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK15-NEXT:    store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR_]], align 1
 // CHECK15-NEXT:    store i32 0, ptr [[DOTOMP_LB17]], align 4
 // CHECK15-NEXT:    store i32 99, ptr [[DOTOMP_UB18]], align 4
 // CHECK15-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB17]], align 4
 // CHECK15-NEXT:    store i32 [[TMP11]], ptr [[DOTOMP_IV19]], align 4
 // CHECK15-NEXT:    [[TMP12:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1
-// CHECK15-NEXT:    [[TOBOOL21:%.*]] = trunc i8 [[TMP12]] to i1
-// CHECK15-NEXT:    br i1 [[TOBOOL21]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
+// CHECK15-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP12]] to i1
+// CHECK15-NEXT:    br i1 [[LOADEDV]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
 // CHECK15:       omp_if.then:
-// CHECK15-NEXT:    br label [[OMP_INNER_FOR_COND22:%.*]]
-// CHECK15:       omp.inner.for.cond22:
+// CHECK15-NEXT:    br label [[OMP_INNER_FOR_COND21:%.*]]
+// CHECK15:       omp.inner.for.cond21:
 // CHECK15-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV19]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]]
 // CHECK15-NEXT:    [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB18]], align 4, !llvm.access.group [[ACC_GRP14]]
-// CHECK15-NEXT:    [[CMP23:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]]
-// CHECK15-NEXT:    br i1 [[CMP23]], label [[OMP_INNER_FOR_BODY24:%.*]], label [[OMP_INNER_FOR_END30:%.*]]
-// CHECK15:       omp.inner.for.body24:
+// CHECK15-NEXT:    [[CMP22:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]]
+// CHECK15-NEXT:    br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]]
+// CHECK15:       omp.inner.for.body23:
 // CHECK15-NEXT:    [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV19]], align 4, !llvm.access.group [[ACC_GRP14]]
-// CHECK15-NEXT:    [[MUL25:%.*]] = mul nsw i32 [[TMP15]], 1
-// CHECK15-NEXT:    [[ADD26:%.*]] = add nsw i32 0, [[MUL25]]
-// CHECK15-NEXT:    store i32 [[ADD26]], ptr [[I20]], align 4, !llvm.access.group [[ACC_GRP14]]
+// CHECK15-NEXT:    [[MUL24:%.*]] = mul nsw i32 [[TMP15]], 1
+// CHECK15-NEXT:    [[ADD25:%.*]] = add nsw i32 0, [[MUL24]]
+// CHECK15-NEXT:    store i32 [[ADD25]], ptr [[I20]], align 4, !llvm.access.group [[ACC_GRP14]]
 // CHECK15-NEXT:    call void @_Z3fn6v(), !llvm.access.group [[ACC_GRP14]]
-// CHECK15-NEXT:    br label [[OMP_BODY_CONTINUE27:%.*]]
-// CHECK15:       omp.body.continue27:
-// CHECK15-NEXT:    br label [[OMP_INNER_FOR_INC28:%.*]]
-// CHECK15:       omp.inner.for.inc28:
+// CHECK15-NEXT:    br label [[OMP_BODY_CONTINUE26:%.*]]
+// CHECK15:       omp.body.continue26:
+// CHECK15-NEXT:    br label [[OMP_INNER_FOR_INC27:%.*]]
+// CHECK15:       omp.inner.for.inc27:
 // CHECK15-NEXT:    [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV19]], align 4, !llvm.access.group [[ACC_GRP14]]
-// CHECK15-NEXT:    [[ADD29:%.*]] = add nsw i32 [[TMP16]], 1
-// CHECK15-NEXT:    store i32 [[ADD29]], ptr [[DOTOMP_IV19]], align 4, !llvm.access.group [[ACC_GRP14]]
-// CHECK15-NEXT:    br label [[OMP_INNER_FOR_COND22]], !llvm.loop [[LOOP15:![0-9]+]]
-// CHECK15:       omp.inner.for.end30:
+// CHECK15-NEXT:    [[ADD28:%.*]] = add nsw i32 [[TMP16]], 1
+// CHECK15-NEXT:    store i32 [[ADD28]], ptr [[DOTOMP_IV19]], align 4, !llvm.access.group [[ACC_GRP14]]
+// CHECK15-NEXT:    br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP15:![0-9]+]]
+// CHECK15:       omp.inner.for.end29:
 // CHECK15-NEXT:    br label [[OMP_IF_END:%.*]]
 // CHECK15:       omp_if.else:
-// CHECK15-NEXT:    br label [[OMP_INNER_FOR_COND31:%.*]]
-// CHECK15:       omp.inner.for.cond31:
+// CHECK15-NEXT:    br label [[OMP_INNER_FOR_COND30:%.*]]
+// CHECK15:       omp.inner.for.cond30:
 // CHECK15-NEXT:    [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV19]], align 4
 // CHECK15-NEXT:    [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB18]], align 4
-// CHECK15-NEXT:    [[CMP32:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]]
-// CHECK15-NEXT:    br i1 [[CMP32]], label [[OMP_INNER_FOR_BODY33:%.*]], label [[OMP_INNER_FOR_END39:%.*]]
-// CHECK15:       omp.inner.for.body33:
+// CHECK15-NEXT:    [[CMP31:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]]
+// CHECK15-NEXT:    br i1 [[CMP31]], label [[OMP_INNER_FOR_BODY32:%.*]], label [[OMP_INNER_FOR_END38:%.*]]
+// CHECK15:       omp.inner.for.body32:
 // CHECK15-NEXT:    [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV19]], align 4
-// CHECK15-NEXT:    [[MUL34:%.*]] = mul nsw i32 [[TMP19]], 1
-// CHECK15-NEXT:    [[ADD35:%.*]] = add nsw i32 0, [[MUL34]]
-// CHECK15-NEXT:    store i32 [[ADD35]], ptr [[I20]], align 4
+// CHECK15-NEXT:    [[MUL33:%.*]] = mul nsw i32 [[TMP19]], 1
+// CHECK15-NEXT:    [[ADD34:%.*]] = add nsw i32 0, [[MUL33]]
+// CHECK15-NEXT:    store i32 [[ADD34]], ptr [[I20]], align 4
 // CHECK15-NEXT:    call void @_Z3fn6v()
-// CHECK15-NEXT:    br label [[OMP_BODY_CONTINUE36:%.*]]
-// CHECK15:       omp.body.continue36:
-// CHECK15-NEXT:    br label [[OMP_INNER_FOR_INC37:%.*]]
-// CHECK15:       omp.inner.for.inc37:
+// CHECK15-NEXT:    br label [[OMP_BODY_CONTINUE35:%.*]]
+// CHECK15:       omp.body.continue35:
+// CHECK15-NEXT:    br label [[OMP_INNER_FOR_INC36:%.*]]
+// CHECK15:       omp.inner.for.inc36:
 // CHECK15-NEXT:    [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV19]], align 4
-// CHECK15-NEXT:    [[ADD38:%.*]] = add nsw i32 [[TMP20]], 1
-// CHECK15-NEXT:    store i32 [[ADD38]], ptr [[DOTOMP_IV19]], align 4
-// CHECK15-NEXT:    br label [[OMP_INNER_FOR_COND31]], !llvm.loop [[LOOP17:![0-9]+]]
-// CHECK15:       omp.inner.for.end39:
+// CHECK15-NEXT:    [[ADD37:%.*]] = add nsw i32 [[TMP20]], 1
+// CHECK15-NEXT:    store i32 [[ADD37]], ptr [[DOTOMP_IV19]], align 4
+// CHECK15-NEXT:    br label [[OMP_INNER_FOR_COND30]], !llvm.loop [[LOOP17:![0-9]+]]
+// CHECK15:       omp.inner.for.end38:
 // CHECK15-NEXT:    br label [[OMP_IF_END]]
 // CHECK15:       omp_if.end:
 // CHECK15-NEXT:    store i32 100, ptr [[I20]], align 4
@@ -8281,8 +8281,8 @@ int main() {
 // CHECK15-NEXT:    store i32 100, ptr [[I6]], align 4
 // CHECK15-NEXT:    [[TMP10:%.*]] = load i32, ptr [[ARG_ADDR]], align 4
 // CHECK15-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP10]], 0
-// CHECK15-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
-// CHECK15-NEXT:    store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1
+// CHECK15-NEXT:    [[STOREDV:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK15-NEXT:    store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR_]], align 1
 // CHECK15-NEXT:    store i32 0, ptr [[DOTOMP_LB17]], align 4
 // CHECK15-NEXT:    store i32 99, ptr [[DOTOMP_UB18]], align 4
 // CHECK15-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB17]], align 4
diff --git a/clang/test/OpenMP/teams_distribute_simd_codegen.cpp b/clang/test/OpenMP/teams_distribute_simd_codegen.cpp
index 2fab0cff55373..d8724dd21b783 100644
--- a/clang/test/OpenMP/teams_distribute_simd_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_simd_codegen.cpp
@@ -2397,12 +2397,12 @@ int main (int argc, char **argv) {
 // CHECK21-NEXT:    [[B:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 1
 // CHECK21-NEXT:    [[TMP1:%.*]] = load float, ptr [[B]], align 4
 // CHECK21-NEXT:    [[TOBOOL:%.*]] = fcmp une float [[TMP1]], 0.000000e+00
-// CHECK21-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
-// CHECK21-NEXT:    store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1
+// CHECK21-NEXT:    [[STOREDV:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK21-NEXT:    store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR_]], align 1
 // CHECK21-NEXT:    [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1
-// CHECK21-NEXT:    [[TOBOOL1:%.*]] = trunc i8 [[TMP2]] to i1
-// CHECK21-NEXT:    [[FROMBOOL2:%.*]] = zext i1 [[TOBOOL1]] to i8
-// CHECK21-NEXT:    store i8 [[FROMBOOL2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
+// CHECK21-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
+// CHECK21-NEXT:    [[STOREDV1:%.*]] = zext i1 [[LOADEDV]] to i8
+// CHECK21-NEXT:    store i8 [[STOREDV1]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
 // CHECK21-NEXT:    [[TMP3:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8
 // CHECK21-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l123.omp_outlined, ptr [[TMP0]], i64 [[TMP3]])
 // CHECK21-NEXT:    ret void
@@ -2448,8 +2448,8 @@ int main (int argc, char **argv) {
 // CHECK21-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
 // CHECK21-NEXT:    store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
 // CHECK21-NEXT:    [[TMP6:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
-// CHECK21-NEXT:    [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1
-// CHECK21-NEXT:    br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
+// CHECK21-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP6]] to i1
+// CHECK21-NEXT:    br i1 [[LOADEDV]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
 // CHECK21:       omp_if.then:
 // CHECK21-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
 // CHECK21:       omp.inner.for.cond:
@@ -2625,12 +2625,12 @@ int main (int argc, char **argv) {
 // CHECK23-NEXT:    [[B:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 1
 // CHECK23-NEXT:    [[TMP1:%.*]] = load float, ptr [[B]], align 4
 // CHECK23-NEXT:    [[TOBOOL:%.*]] = fcmp une float [[TMP1]], 0.000000e+00
-// CHECK23-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
-// CHECK23-NEXT:    store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1
+// CHECK23-NEXT:    [[STOREDV:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK23-NEXT:    store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR_]], align 1
 // CHECK23-NEXT:    [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1
-// CHECK23-NEXT:    [[TOBOOL1:%.*]] = trunc i8 [[TMP2]] to i1
-// CHECK23-NEXT:    [[FROMBOOL2:%.*]] = zext i1 [[TOBOOL1]] to i8
-// CHECK23-NEXT:    store i8 [[FROMBOOL2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
+// CHECK23-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
+// CHECK23-NEXT:    [[STOREDV1:%.*]] = zext i1 [[LOADEDV]] to i8
+// CHECK23-NEXT:    store i8 [[STOREDV1]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
 // CHECK23-NEXT:    [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK23-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l123.omp_outlined, ptr [[TMP0]], i32 [[TMP3]])
 // CHECK23-NEXT:    ret void
@@ -2676,8 +2676,8 @@ int main (int argc, char **argv) {
 // CHECK23-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
 // CHECK23-NEXT:    store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
 // CHECK23-NEXT:    [[TMP6:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
-// CHECK23-NEXT:    [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1
-// CHECK23-NEXT:    br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
+// CHECK23-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP6]] to i1
+// CHECK23-NEXT:    br i1 [[LOADEDV]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
 // CHECK23:       omp_if.then:
 // CHECK23-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
 // CHECK23:       omp.inner.for.cond:
@@ -2888,15 +2888,15 @@ int main (int argc, char **argv) {
 // CHECK29-NEXT:    [[B:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 1
 // CHECK29-NEXT:    [[TMP0:%.*]] = load float, ptr [[B]], align 4
 // CHECK29-NEXT:    [[TOBOOL:%.*]] = fcmp une float [[TMP0]], 0.000000e+00
-// CHECK29-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
-// CHECK29-NEXT:    store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1
+// CHECK29-NEXT:    [[STOREDV:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK29-NEXT:    store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR_]], align 1
 // CHECK29-NEXT:    store i32 0, ptr [[DOTOMP_LB]], align 4
 // CHECK29-NEXT:    store i32 122, ptr [[DOTOMP_UB]], align 4
 // CHECK29-NEXT:    [[TMP1:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
 // CHECK29-NEXT:    store i32 [[TMP1]], ptr [[DOTOMP_IV]], align 4
 // CHECK29-NEXT:    [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1
-// CHECK29-NEXT:    [[TOBOOL2:%.*]] = trunc i8 [[TMP2]] to i1
-// CHECK29-NEXT:    br i1 [[TOBOOL2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
+// CHECK29-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
+// CHECK29-NEXT:    br i1 [[LOADEDV]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
 // CHECK29:       omp_if.then:
 // CHECK29-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
 // CHECK29:       omp.inner.for.cond:
@@ -2909,8 +2909,8 @@ int main (int argc, char **argv) {
 // CHECK29-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP5]], 1
 // CHECK29-NEXT:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
 // CHECK29-NEXT:    store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP2]]
-// CHECK29-NEXT:    [[B3:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 1
-// CHECK29-NEXT:    [[TMP6:%.*]] = load float, ptr [[B3]], align 4, !nontemporal [[META3:![0-9]+]], !llvm.access.group [[ACC_GRP2]]
+// CHECK29-NEXT:    [[B2:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 1
+// CHECK29-NEXT:    [[TMP6:%.*]] = load float, ptr [[B2]], align 4, !nontemporal [[META3:![0-9]+]], !llvm.access.group [[ACC_GRP2]]
 // CHECK29-NEXT:    [[CONV:%.*]] = fptosi float [[TMP6]] to i32
 // CHECK29-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 0
 // CHECK29-NEXT:    [[TMP7:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP2]]
@@ -2922,46 +2922,46 @@ int main (int argc, char **argv) {
 // CHECK29-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK29:       omp.inner.for.inc:
 // CHECK29-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]]
-// CHECK29-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP8]], 1
-// CHECK29-NEXT:    store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]]
+// CHECK29-NEXT:    [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1
+// CHECK29-NEXT:    store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]]
 // CHECK29-NEXT:    br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]]
 // CHECK29:       omp.inner.for.end:
 // CHECK29-NEXT:    br label [[OMP_IF_END:%.*]]
 // CHECK29:       omp_if.else:
-// CHECK29-NEXT:    br label [[OMP_INNER_FOR_COND5:%.*]]
-// CHECK29:       omp.inner.for.cond5:
+// CHECK29-NEXT:    br label [[OMP_INNER_FOR_COND4:%.*]]
+// CHECK29:       omp.inner.for.cond4:
 // CHECK29-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
 // CHECK29-NEXT:    [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK29-NEXT:    [[CMP6:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]]
-// CHECK29-NEXT:    br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY7:%.*]], label [[OMP_INNER_FOR_END18:%.*]]
-// CHECK29:       omp.inner.for.body7:
+// CHECK29-NEXT:    [[CMP5:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]]
+// CHECK29-NEXT:    br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY6:%.*]], label [[OMP_INNER_FOR_END17:%.*]]
+// CHECK29:       omp.inner.for.body6:
 // CHECK29-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK29-NEXT:    [[MUL8:%.*]] = mul nsw i32 [[TMP11]], 1
-// CHECK29-NEXT:    [[ADD9:%.*]] = add nsw i32 0, [[MUL8]]
-// CHECK29-NEXT:    store i32 [[ADD9]], ptr [[I]], align 4
-// CHECK29-NEXT:    [[B10:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 1
-// CHECK29-NEXT:    [[TMP12:%.*]] = load float, ptr [[B10]], align 4
-// CHECK29-NEXT:    [[CONV11:%.*]] = fptosi float [[TMP12]] to i32
-// CHECK29-NEXT:    [[A12:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 0
+// CHECK29-NEXT:    [[MUL7:%.*]] = mul nsw i32 [[TMP11]], 1
+// CHECK29-NEXT:    [[ADD8:%.*]] = add nsw i32 0, [[MUL7]]
+// CHECK29-NEXT:    store i32 [[ADD8]], ptr [[I]], align 4
+// CHECK29-NEXT:    [[B9:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 1
+// CHECK29-NEXT:    [[TMP12:%.*]] = load float, ptr [[B9]], align 4
+// CHECK29-NEXT:    [[CONV10:%.*]] = fptosi float [[TMP12]] to i32
+// CHECK29-NEXT:    [[A11:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 0
 // CHECK29-NEXT:    [[TMP13:%.*]] = load i32, ptr [[I]], align 4
-// CHECK29-NEXT:    [[IDXPROM13:%.*]] = sext i32 [[TMP13]] to i64
-// CHECK29-NEXT:    [[ARRAYIDX14:%.*]] = getelementptr inbounds [123 x i32], ptr [[A12]], i64 0, i64 [[IDXPROM13]]
-// CHECK29-NEXT:    store i32 [[CONV11]], ptr [[ARRAYIDX14]], align 4
-// CHECK29-NEXT:    br label [[OMP_BODY_CONTINUE15:%.*]]
-// CHECK29:       omp.body.continue15:
-// CHECK29-NEXT:    br label [[OMP_INNER_FOR_INC16:%.*]]
-// CHECK29:       omp.inner.for.inc16:
+// CHECK29-NEXT:    [[IDXPROM12:%.*]] = sext i32 [[TMP13]] to i64
+// CHECK29-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds [123 x i32], ptr [[A11]], i64 0, i64 [[IDXPROM12]]
+// CHECK29-NEXT:    store i32 [[CONV10]], ptr [[ARRAYIDX13]], align 4
+// CHECK29-NEXT:    br label [[OMP_BODY_CONTINUE14:%.*]]
+// CHECK29:       omp.body.continue14:
+// CHECK29-NEXT:    br label [[OMP_INNER_FOR_INC15:%.*]]
+// CHECK29:       omp.inner.for.inc15:
 // CHECK29-NEXT:    [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK29-NEXT:    [[ADD17:%.*]] = add nsw i32 [[TMP14]], 1
-// CHECK29-NEXT:    store i32 [[ADD17]], ptr [[DOTOMP_IV]], align 4
-// CHECK29-NEXT:    br label [[OMP_INNER_FOR_COND5]], !llvm.loop [[LOOP7:![0-9]+]]
-// CHECK29:       omp.inner.for.end18:
+// CHECK29-NEXT:    [[ADD16:%.*]] = add nsw i32 [[TMP14]], 1
+// CHECK29-NEXT:    store i32 [[ADD16]], ptr [[DOTOMP_IV]], align 4
+// CHECK29-NEXT:    br label [[OMP_INNER_FOR_COND4]], !llvm.loop [[LOOP7:![0-9]+]]
+// CHECK29:       omp.inner.for.end17:
 // CHECK29-NEXT:    br label [[OMP_IF_END]]
 // CHECK29:       omp_if.end:
 // CHECK29-NEXT:    store i32 123, ptr [[I]], align 4
-// CHECK29-NEXT:    [[A19:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 0
-// CHECK29-NEXT:    [[ARRAYIDX20:%.*]] = getelementptr inbounds [123 x i32], ptr [[A19]], i64 0, i64 0
-// CHECK29-NEXT:    [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX20]], align 4
+// CHECK29-NEXT:    [[A18:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 0
+// CHECK29-NEXT:    [[ARRAYIDX19:%.*]] = getelementptr inbounds [123 x i32], ptr [[A18]], i64 0, i64 0
+// CHECK29-NEXT:    [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX19]], align 4
 // CHECK29-NEXT:    ret i32 [[TMP15]]
 //
 //
@@ -2988,15 +2988,15 @@ int main (int argc, char **argv) {
 // CHECK31-NEXT:    [[B:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 1
 // CHECK31-NEXT:    [[TMP0:%.*]] = load float, ptr [[B]], align 4
 // CHECK31-NEXT:    [[TOBOOL:%.*]] = fcmp une float [[TMP0]], 0.000000e+00
-// CHECK31-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
-// CHECK31-NEXT:    store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1
+// CHECK31-NEXT:    [[STOREDV:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK31-NEXT:    store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR_]], align 1
 // CHECK31-NEXT:    store i32 0, ptr [[DOTOMP_LB]], align 4
 // CHECK31-NEXT:    store i32 122, ptr [[DOTOMP_UB]], align 4
 // CHECK31-NEXT:    [[TMP1:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
 // CHECK31-NEXT:    store i32 [[TMP1]], ptr [[DOTOMP_IV]], align 4
 // CHECK31-NEXT:    [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1
-// CHECK31-NEXT:    [[TOBOOL2:%.*]] = trunc i8 [[TMP2]] to i1
-// CHECK31-NEXT:    br i1 [[TOBOOL2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
+// CHECK31-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
+// CHECK31-NEXT:    br i1 [[LOADEDV]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
 // CHECK31:       omp_if.then:
 // CHECK31-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
 // CHECK31:       omp.inner.for.cond:
@@ -3009,8 +3009,8 @@ int main (int argc, char **argv) {
 // CHECK31-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP5]], 1
 // CHECK31-NEXT:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
 // CHECK31-NEXT:    store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]]
-// CHECK31-NEXT:    [[B3:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 1
-// CHECK31-NEXT:    [[TMP6:%.*]] = load float, ptr [[B3]], align 4, !nontemporal [[META4:![0-9]+]], !llvm.access.group [[ACC_GRP3]]
+// CHECK31-NEXT:    [[B2:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 1
+// CHECK31-NEXT:    [[TMP6:%.*]] = load float, ptr [[B2]], align 4, !nontemporal [[META4:![0-9]+]], !llvm.access.group [[ACC_GRP3]]
 // CHECK31-NEXT:    [[CONV:%.*]] = fptosi float [[TMP6]] to i32
 // CHECK31-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 0
 // CHECK31-NEXT:    [[TMP7:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]]
@@ -3021,45 +3021,45 @@ int main (int argc, char **argv) {
 // CHECK31-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK31:       omp.inner.for.inc:
 // CHECK31-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
-// CHECK31-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP8]], 1
-// CHECK31-NEXT:    store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
+// CHECK31-NEXT:    [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1
+// CHECK31-NEXT:    store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
 // CHECK31-NEXT:    br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]]
 // CHECK31:       omp.inner.for.end:
 // CHECK31-NEXT:    br label [[OMP_IF_END:%.*]]
 // CHECK31:       omp_if.else:
-// CHECK31-NEXT:    br label [[OMP_INNER_FOR_COND5:%.*]]
-// CHECK31:       omp.inner.for.cond5:
+// CHECK31-NEXT:    br label [[OMP_INNER_FOR_COND4:%.*]]
+// CHECK31:       omp.inner.for.cond4:
 // CHECK31-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
 // CHECK31-NEXT:    [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK31-NEXT:    [[CMP6:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]]
-// CHECK31-NEXT:    br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY7:%.*]], label [[OMP_INNER_FOR_END17:%.*]]
-// CHECK31:       omp.inner.for.body7:
+// CHECK31-NEXT:    [[CMP5:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]]
+// CHECK31-NEXT:    br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY6:%.*]], label [[OMP_INNER_FOR_END16:%.*]]
+// CHECK31:       omp.inner.for.body6:
 // CHECK31-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK31-NEXT:    [[MUL8:%.*]] = mul nsw i32 [[TMP11]], 1
-// CHECK31-NEXT:    [[ADD9:%.*]] = add nsw i32 0, [[MUL8]]
-// CHECK31-NEXT:    store i32 [[ADD9]], ptr [[I]], align 4
-// CHECK31-NEXT:    [[B10:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 1
-// CHECK31-NEXT:    [[TMP12:%.*]] = load float, ptr [[B10]], align 4
-// CHECK31-NEXT:    [[CONV11:%.*]] = fptosi float [[TMP12]] to i32
-// CHECK31-NEXT:    [[A12:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 0
+// CHECK31-NEXT:    [[MUL7:%.*]] = mul nsw i32 [[TMP11]], 1
+// CHECK31-NEXT:    [[ADD8:%.*]] = add nsw i32 0, [[MUL7]]
+// CHECK31-NEXT:    store i32 [[ADD8]], ptr [[I]], align 4
+// CHECK31-NEXT:    [[B9:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 1
+// CHECK31-NEXT:    [[TMP12:%.*]] = load float, ptr [[B9]], align 4
+// CHECK31-NEXT:    [[CONV10:%.*]] = fptosi float [[TMP12]] to i32
+// CHECK31-NEXT:    [[A11:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 0
 // CHECK31-NEXT:    [[TMP13:%.*]] = load i32, ptr [[I]], align 4
-// CHECK31-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds [123 x i32], ptr [[A12]], i32 0, i32 [[TMP13]]
-// CHECK31-NEXT:    store i32 [[CONV11]], ptr [[ARRAYIDX13]], align 4
-// CHECK31-NEXT:    br label [[OMP_BODY_CONTINUE14:%.*]]
-// CHECK31:       omp.body.continue14:
-// CHECK31-NEXT:    br label [[OMP_INNER_FOR_INC15:%.*]]
-// CHECK31:       omp.inner.for.inc15:
+// CHECK31-NEXT:    [[ARRAYIDX12:%.*]] = getelementptr inbounds [123 x i32], ptr [[A11]], i32 0, i32 [[TMP13]]
+// CHECK31-NEXT:    store i32 [[CONV10]], ptr [[ARRAYIDX12]], align 4
+// CHECK31-NEXT:    br label [[OMP_BODY_CONTINUE13:%.*]]
+// CHECK31:       omp.body.continue13:
+// CHECK31-NEXT:    br label [[OMP_INNER_FOR_INC14:%.*]]
+// CHECK31:       omp.inner.for.inc14:
 // CHECK31-NEXT:    [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK31-NEXT:    [[ADD16:%.*]] = add nsw i32 [[TMP14]], 1
-// CHECK31-NEXT:    store i32 [[ADD16]], ptr [[DOTOMP_IV]], align 4
-// CHECK31-NEXT:    br label [[OMP_INNER_FOR_COND5]], !llvm.loop [[LOOP8:![0-9]+]]
-// CHECK31:       omp.inner.for.end17:
+// CHECK31-NEXT:    [[ADD15:%.*]] = add nsw i32 [[TMP14]], 1
+// CHECK31-NEXT:    store i32 [[ADD15]], ptr [[DOTOMP_IV]], align 4
+// CHECK31-NEXT:    br label [[OMP_INNER_FOR_COND4]], !llvm.loop [[LOOP8:![0-9]+]]
+// CHECK31:       omp.inner.for.end16:
 // CHECK31-NEXT:    br label [[OMP_IF_END]]
 // CHECK31:       omp_if.end:
 // CHECK31-NEXT:    store i32 123, ptr [[I]], align 4
-// CHECK31-NEXT:    [[A18:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 0
-// CHECK31-NEXT:    [[ARRAYIDX19:%.*]] = getelementptr inbounds [123 x i32], ptr [[A18]], i32 0, i32 0
-// CHECK31-NEXT:    [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX19]], align 4
+// CHECK31-NEXT:    [[A17:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 0
+// CHECK31-NEXT:    [[ARRAYIDX18:%.*]] = getelementptr inbounds [123 x i32], ptr [[A17]], i32 0, i32 0
+// CHECK31-NEXT:    [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX18]], align 4
 // CHECK31-NEXT:    ret i32 [[TMP15]]
 //
 //
@@ -4010,12 +4010,12 @@ int main (int argc, char **argv) {
 // CHECK37-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8
 // CHECK37-NEXT:    [[TMP2:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
 // CHECK37-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0
-// CHECK37-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
-// CHECK37-NEXT:    store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1
+// CHECK37-NEXT:    [[STOREDV:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK37-NEXT:    store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR_]], align 1
 // CHECK37-NEXT:    [[TMP3:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1
-// CHECK37-NEXT:    [[TOBOOL1:%.*]] = trunc i8 [[TMP3]] to i1
-// CHECK37-NEXT:    [[FROMBOOL2:%.*]] = zext i1 [[TOBOOL1]] to i8
-// CHECK37-NEXT:    store i8 [[FROMBOOL2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
+// CHECK37-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP3]] to i1
+// CHECK37-NEXT:    [[STOREDV1:%.*]] = zext i1 [[LOADEDV]] to i8
+// CHECK37-NEXT:    store i8 [[STOREDV1]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
 // CHECK37-NEXT:    [[TMP4:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8
 // CHECK37-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l192.omp_outlined, ptr [[N_ADDR]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP4]])
 // CHECK37-NEXT:    ret void
@@ -4085,8 +4085,8 @@ int main (int argc, char **argv) {
 // CHECK37-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
 // CHECK37-NEXT:    store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4
 // CHECK37-NEXT:    [[TMP14:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
-// CHECK37-NEXT:    [[TOBOOL:%.*]] = trunc i8 [[TMP14]] to i1
-// CHECK37-NEXT:    br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
+// CHECK37-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP14]] to i1
+// CHECK37-NEXT:    br i1 [[LOADEDV]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
 // CHECK37:       omp_if.then:
 // CHECK37-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
 // CHECK37:       omp.inner.for.cond:
@@ -4474,12 +4474,12 @@ int main (int argc, char **argv) {
 // CHECK39-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4
 // CHECK39-NEXT:    [[TMP2:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
 // CHECK39-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0
-// CHECK39-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
-// CHECK39-NEXT:    store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1
+// CHECK39-NEXT:    [[STOREDV:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK39-NEXT:    store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR_]], align 1
 // CHECK39-NEXT:    [[TMP3:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1
-// CHECK39-NEXT:    [[TOBOOL1:%.*]] = trunc i8 [[TMP3]] to i1
-// CHECK39-NEXT:    [[FROMBOOL2:%.*]] = zext i1 [[TOBOOL1]] to i8
-// CHECK39-NEXT:    store i8 [[FROMBOOL2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
+// CHECK39-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP3]] to i1
+// CHECK39-NEXT:    [[STOREDV1:%.*]] = zext i1 [[LOADEDV]] to i8
+// CHECK39-NEXT:    store i8 [[STOREDV1]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
 // CHECK39-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK39-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l192.omp_outlined, ptr [[N_ADDR]], i32 [[TMP0]], ptr [[TMP1]], i32 [[TMP4]])
 // CHECK39-NEXT:    ret void
@@ -4549,8 +4549,8 @@ int main (int argc, char **argv) {
 // CHECK39-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
 // CHECK39-NEXT:    store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4
 // CHECK39-NEXT:    [[TMP14:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
-// CHECK39-NEXT:    [[TOBOOL:%.*]] = trunc i8 [[TMP14]] to i1
-// CHECK39-NEXT:    br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
+// CHECK39-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP14]] to i1
+// CHECK39-NEXT:    br i1 [[LOADEDV]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
 // CHECK39:       omp_if.then:
 // CHECK39-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
 // CHECK39:       omp.inner.for.cond:
@@ -5091,8 +5091,8 @@ int main (int argc, char **argv) {
 // CHECK45-NEXT:    store i64 [[TMP1]], ptr [[__VLA_EXPR0]], align 8
 // CHECK45-NEXT:    [[TMP3:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
 // CHECK45-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP3]], 0
-// CHECK45-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
-// CHECK45-NEXT:    store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1
+// CHECK45-NEXT:    [[STOREDV:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK45-NEXT:    store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR_]], align 1
 // CHECK45-NEXT:    [[TMP4:%.*]] = load i32, ptr [[N]], align 4
 // CHECK45-NEXT:    store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_1]], align 4
 // CHECK45-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
@@ -5111,15 +5111,15 @@ int main (int argc, char **argv) {
 // CHECK45-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
 // CHECK45-NEXT:    store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4
 // CHECK45-NEXT:    [[TMP9:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1
-// CHECK45-NEXT:    [[TOBOOL5:%.*]] = trunc i8 [[TMP9]] to i1
-// CHECK45-NEXT:    br i1 [[TOBOOL5]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
+// CHECK45-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP9]] to i1
+// CHECK45-NEXT:    br i1 [[LOADEDV]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
 // CHECK45:       omp_if.then:
 // CHECK45-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
 // CHECK45:       omp.inner.for.cond:
 // CHECK45-NEXT:    [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2:![0-9]+]]
 // CHECK45-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP2]]
-// CHECK45-NEXT:    [[CMP6:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]]
-// CHECK45-NEXT:    br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// CHECK45-NEXT:    [[CMP5:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]]
+// CHECK45-NEXT:    br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
 // CHECK45:       omp.inner.for.body:
 // CHECK45-NEXT:    [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]]
 // CHECK45-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1
@@ -5134,44 +5134,44 @@ int main (int argc, char **argv) {
 // CHECK45-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK45:       omp.inner.for.inc:
 // CHECK45-NEXT:    [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]]
-// CHECK45-NEXT:    [[ADD7:%.*]] = add nsw i32 [[TMP14]], 1
-// CHECK45-NEXT:    store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]]
+// CHECK45-NEXT:    [[ADD6:%.*]] = add nsw i32 [[TMP14]], 1
+// CHECK45-NEXT:    store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]]
 // CHECK45-NEXT:    br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]]
 // CHECK45:       omp.inner.for.end:
 // CHECK45-NEXT:    br label [[OMP_IF_END:%.*]]
 // CHECK45:       omp_if.else:
-// CHECK45-NEXT:    br label [[OMP_INNER_FOR_COND8:%.*]]
-// CHECK45:       omp.inner.for.cond8:
+// CHECK45-NEXT:    br label [[OMP_INNER_FOR_COND7:%.*]]
+// CHECK45:       omp.inner.for.cond7:
 // CHECK45-NEXT:    [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
 // CHECK45-NEXT:    [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK45-NEXT:    [[CMP9:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]]
-// CHECK45-NEXT:    br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY10:%.*]], label [[OMP_INNER_FOR_END18:%.*]]
-// CHECK45:       omp.inner.for.body10:
+// CHECK45-NEXT:    [[CMP8:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]]
+// CHECK45-NEXT:    br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END17:%.*]]
+// CHECK45:       omp.inner.for.body9:
 // CHECK45-NEXT:    [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK45-NEXT:    [[MUL11:%.*]] = mul nsw i32 [[TMP17]], 1
-// CHECK45-NEXT:    [[ADD12:%.*]] = add nsw i32 0, [[MUL11]]
-// CHECK45-NEXT:    store i32 [[ADD12]], ptr [[I4]], align 4
+// CHECK45-NEXT:    [[MUL10:%.*]] = mul nsw i32 [[TMP17]], 1
+// CHECK45-NEXT:    [[ADD11:%.*]] = add nsw i32 0, [[MUL10]]
+// CHECK45-NEXT:    store i32 [[ADD11]], ptr [[I4]], align 4
 // CHECK45-NEXT:    [[TMP18:%.*]] = load i32, ptr [[I4]], align 4
-// CHECK45-NEXT:    [[IDXPROM13:%.*]] = sext i32 [[TMP18]] to i64
-// CHECK45-NEXT:    [[ARRAYIDX14:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[IDXPROM13]]
-// CHECK45-NEXT:    store i32 0, ptr [[ARRAYIDX14]], align 4
-// CHECK45-NEXT:    br label [[OMP_BODY_CONTINUE15:%.*]]
-// CHECK45:       omp.body.continue15:
-// CHECK45-NEXT:    br label [[OMP_INNER_FOR_INC16:%.*]]
-// CHECK45:       omp.inner.for.inc16:
+// CHECK45-NEXT:    [[IDXPROM12:%.*]] = sext i32 [[TMP18]] to i64
+// CHECK45-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[IDXPROM12]]
+// CHECK45-NEXT:    store i32 0, ptr [[ARRAYIDX13]], align 4
+// CHECK45-NEXT:    br label [[OMP_BODY_CONTINUE14:%.*]]
+// CHECK45:       omp.body.continue14:
+// CHECK45-NEXT:    br label [[OMP_INNER_FOR_INC15:%.*]]
+// CHECK45:       omp.inner.for.inc15:
 // CHECK45-NEXT:    [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK45-NEXT:    [[ADD17:%.*]] = add nsw i32 [[TMP19]], 1
-// CHECK45-NEXT:    store i32 [[ADD17]], ptr [[DOTOMP_IV]], align 4
-// CHECK45-NEXT:    br label [[OMP_INNER_FOR_COND8]], !llvm.loop [[LOOP6:![0-9]+]]
-// CHECK45:       omp.inner.for.end18:
+// CHECK45-NEXT:    [[ADD16:%.*]] = add nsw i32 [[TMP19]], 1
+// CHECK45-NEXT:    store i32 [[ADD16]], ptr [[DOTOMP_IV]], align 4
+// CHECK45-NEXT:    br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP6:![0-9]+]]
+// CHECK45:       omp.inner.for.end17:
 // CHECK45-NEXT:    br label [[OMP_IF_END]]
 // CHECK45:       omp_if.end:
 // CHECK45-NEXT:    [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK45-NEXT:    [[SUB19:%.*]] = sub nsw i32 [[TMP20]], 0
-// CHECK45-NEXT:    [[DIV20:%.*]] = sdiv i32 [[SUB19]], 1
-// CHECK45-NEXT:    [[MUL21:%.*]] = mul nsw i32 [[DIV20]], 1
-// CHECK45-NEXT:    [[ADD22:%.*]] = add nsw i32 0, [[MUL21]]
-// CHECK45-NEXT:    store i32 [[ADD22]], ptr [[I4]], align 4
+// CHECK45-NEXT:    [[SUB18:%.*]] = sub nsw i32 [[TMP20]], 0
+// CHECK45-NEXT:    [[DIV19:%.*]] = sdiv i32 [[SUB18]], 1
+// CHECK45-NEXT:    [[MUL20:%.*]] = mul nsw i32 [[DIV19]], 1
+// CHECK45-NEXT:    [[ADD21:%.*]] = add nsw i32 0, [[MUL20]]
+// CHECK45-NEXT:    store i32 [[ADD21]], ptr [[I4]], align 4
 // CHECK45-NEXT:    br label [[SIMD_IF_END]]
 // CHECK45:       simd.if.end:
 // CHECK45-NEXT:    [[TMP21:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
@@ -5259,8 +5259,8 @@ int main (int argc, char **argv) {
 // CHECK47-NEXT:    store i32 [[TMP0]], ptr [[__VLA_EXPR0]], align 4
 // CHECK47-NEXT:    [[TMP2:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
 // CHECK47-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0
-// CHECK47-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
-// CHECK47-NEXT:    store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1
+// CHECK47-NEXT:    [[STOREDV:%.*]] = zext i1 [[TOBOOL]] to i8
+// CHECK47-NEXT:    store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR_]], align 1
 // CHECK47-NEXT:    [[TMP3:%.*]] = load i32, ptr [[N]], align 4
 // CHECK47-NEXT:    store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4
 // CHECK47-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
@@ -5279,15 +5279,15 @@ int main (int argc, char **argv) {
 // CHECK47-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
 // CHECK47-NEXT:    store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4
 // CHECK47-NEXT:    [[TMP8:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1
-// CHECK47-NEXT:    [[TOBOOL5:%.*]] = trunc i8 [[TMP8]] to i1
-// CHECK47-NEXT:    br i1 [[TOBOOL5]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
+// CHECK47-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP8]] to i1
+// CHECK47-NEXT:    br i1 [[LOADEDV]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
 // CHECK47:       omp_if.then:
 // CHECK47-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
 // CHECK47:       omp.inner.for.cond:
 // CHECK47-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3:![0-9]+]]
 // CHECK47-NEXT:    [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP3]]
-// CHECK47-NEXT:    [[CMP6:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]]
-// CHECK47-NEXT:    br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// CHECK47-NEXT:    [[CMP5:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]]
+// CHECK47-NEXT:    br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
 // CHECK47:       omp.inner.for.body:
 // CHECK47-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
 // CHECK47-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1
@@ -5301,43 +5301,43 @@ int main (int argc, char **argv) {
 // CHECK47-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK47:       omp.inner.for.inc:
 // CHECK47-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
-// CHECK47-NEXT:    [[ADD7:%.*]] = add nsw i32 [[TMP13]], 1
-// CHECK47-NEXT:    store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
+// CHECK47-NEXT:    [[ADD6:%.*]] = add nsw i32 [[TMP13]], 1
+// CHECK47-NEXT:    store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
 // CHECK47-NEXT:    br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]]
 // CHECK47:       omp.inner.for.end:
 // CHECK47-NEXT:    br label [[OMP_IF_END:%.*]]
 // CHECK47:       omp_if.else:
-// CHECK47-NEXT:    br label [[OMP_INNER_FOR_COND8:%.*]]
-// CHECK47:       omp.inner.for.cond8:
+// CHECK47-NEXT:    br label [[OMP_INNER_FOR_COND7:%.*]]
+// CHECK47:       omp.inner.for.cond7:
 // CHECK47-NEXT:    [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
 // CHECK47-NEXT:    [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK47-NEXT:    [[CMP9:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]]
-// CHECK47-NEXT:    br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY10:%.*]], label [[OMP_INNER_FOR_END17:%.*]]
-// CHECK47:       omp.inner.for.body10:
+// CHECK47-NEXT:    [[CMP8:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]]
+// CHECK47-NEXT:    br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END16:%.*]]
+// CHECK47:       omp.inner.for.body9:
 // CHECK47-NEXT:    [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK47-NEXT:    [[MUL11:%.*]] = mul nsw i32 [[TMP16]], 1
-// CHECK47-NEXT:    [[ADD12:%.*]] = add nsw i32 0, [[MUL11]]
-// CHECK47-NEXT:    store i32 [[ADD12]], ptr [[I4]], align 4
+// CHECK47-NEXT:    [[MUL10:%.*]] = mul nsw i32 [[TMP16]], 1
+// CHECK47-NEXT:    [[ADD11:%.*]] = add nsw i32 0, [[MUL10]]
+// CHECK47-NEXT:    store i32 [[ADD11]], ptr [[I4]], align 4
 // CHECK47-NEXT:    [[TMP17:%.*]] = load i32, ptr [[I4]], align 4
-// CHECK47-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i32 [[TMP17]]
-// CHECK47-NEXT:    store i32 0, ptr [[ARRAYIDX13]], align 4
-// CHECK47-NEXT:    br label [[OMP_BODY_CONTINUE14:%.*]]
-// CHECK47:       omp.body.continue14:
-// CHECK47-NEXT:    br label [[OMP_INNER_FOR_INC15:%.*]]
-// CHECK47:       omp.inner.for.inc15:
+// CHECK47-NEXT:    [[ARRAYIDX12:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i32 [[TMP17]]
+// CHECK47-NEXT:    store i32 0, ptr [[ARRAYIDX12]], align 4
+// CHECK47-NEXT:    br label [[OMP_BODY_CONTINUE13:%.*]]
+// CHECK47:       omp.body.continue13:
+// CHECK47-NEXT:    br label [[OMP_INNER_FOR_INC14:%.*]]
+// CHECK47:       omp.inner.for.inc14:
 // CHECK47-NEXT:    [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK47-NEXT:    [[ADD16:%.*]] = add nsw i32 [[TMP18]], 1
-// CHECK47-NEXT:    store i32 [[ADD16]], ptr [[DOTOMP_IV]], align 4
-// CHECK47-NEXT:    br label [[OMP_INNER_FOR_COND8]], !llvm.loop [[LOOP7:![0-9]+]]
-// CHECK47:       omp.inner.for.end17:
+// CHECK47-NEXT:    [[ADD15:%.*]] = add nsw i32 [[TMP18]], 1
+// CHECK47-NEXT:    store i32 [[ADD15]], ptr [[DOTOMP_IV]], align 4
+// CHECK47-NEXT:    br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP7:![0-9]+]]
+// CHECK47:       omp.inner.for.end16:
 // CHECK47-NEXT:    br label [[OMP_IF_END]]
 // CHECK47:       omp_if.end:
 // CHECK47-NEXT:    [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK47-NEXT:    [[SUB18:%.*]] = sub nsw i32 [[TMP19]], 0
-// CHECK47-NEXT:    [[DIV19:%.*]] = sdiv i32 [[SUB18]], 1
-// CHECK47-NEXT:    [[MUL20:%.*]] = mul nsw i32 [[DIV19]], 1
-// CHECK47-NEXT:    [[ADD21:%.*]] = add nsw i32 0, [[MUL20]]
-// CHECK47-NEXT:    store i32 [[ADD21]], ptr [[I4]], align 4
+// CHECK47-NEXT:    [[SUB17:%.*]] = sub nsw i32 [[TMP19]], 0
+// CHECK47-NEXT:    [[DIV18:%.*]] = sdiv i32 [[SUB17]], 1
+// CHECK47-NEXT:    [[MUL19:%.*]] = mul nsw i32 [[DIV18]], 1
+// CHECK47-NEXT:    [[ADD20:%.*]] = add nsw i32 0, [[MUL19]]
+// CHECK47-NEXT:    store i32 [[ADD20]], ptr [[I4]], align 4
 // CHECK47-NEXT:    br label [[SIMD_IF_END]]
 // CHECK47:       simd.if.end:
 // CHECK47-NEXT:    [[TMP20:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4

>From 15f680f5112176120d469afa8ea5251b9fb8e0c9 Mon Sep 17 00:00:00 2001
From: "Podchishchaeva, Mariya" <mariya.podchishchaeva at intel.com>
Date: Wed, 29 May 2024 06:51:04 -0700
Subject: [PATCH 03/21] Cleanup

---
 clang/lib/CodeGen/CGExpr.cpp       | 10 +++++-----
 clang/lib/CodeGen/CodeGenTypes.cpp |  1 -
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 1bf2d188d4b8d..3642f303c96b8 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -1989,8 +1989,8 @@ llvm::Value *CodeGenFunction::EmitLoadOfScalar(Address Addr, bool Volatile,
     return EmitAtomicLoad(AtomicLValue, Loc).getScalarVal();
   }
 
-  Addr = Addr.withElementType(
-      CGM.getTypes().convertTypeForLoadStore(Ty, Addr.getElementType()));
+  Addr =
+      Addr.withElementType(convertTypeForLoadStore(Ty, Addr.getElementType()));
 
   llvm::LoadInst *Load = Builder.CreateLoad(Addr, Volatile);
   if (isNontemporal) {
@@ -2108,7 +2108,7 @@ void CodeGenFunction::EmitStoreOfScalar(llvm::Value *Value, Address Addr,
     auto *VecTy = dyn_cast<llvm::FixedVectorType>(SrcTy);
     if (!CGM.getCodeGenOpts().PreserveVec3Type) {
       // Handle vec3 special.
-      if (!Ty->isExtVectorBoolType() && VecTy &&
+      if (!ClangVecTy->isExtVectorBoolType() && VecTy &&
           cast<llvm::FixedVectorType>(VecTy)->getNumElements() == 3) {
         // Our source is a vec3, do a shuffle vector to make it a vec4.
         Value = Builder.CreateShuffleVector(Value, ArrayRef<int>{0, 1, 2, -1},
@@ -2468,8 +2468,7 @@ void CodeGenFunction::EmitStoreThroughLValue(RValue Src, LValue Dst,
 void CodeGenFunction::EmitStoreThroughBitfieldLValue(RValue Src, LValue Dst,
                                                      llvm::Value **Result) {
   const CGBitFieldInfo &Info = Dst.getBitFieldInfo();
-  llvm::Type *ResLTy = CGM.getTypes().convertTypeForLoadStore(
-      Dst.getType(), ConvertType(Dst.getType()));
+  llvm::Type *ResLTy = convertTypeForLoadStore(Dst.getType());
   Address Ptr = Dst.getBitFieldAddress();
 
   // Get the source value, truncated to the width of the bit-field.
@@ -2537,6 +2536,7 @@ void CodeGenFunction::EmitStoreThroughBitfieldLValue(RValue Src, LValue Dst,
         ResultVal = Builder.CreateAShr(ResultVal, HighBits, "bf.result.ashr");
       }
     }
+
     ResultVal = Builder.CreateIntCast(ResultVal, ResLTy, Info.IsSigned,
                                       "bf.result.cast");
     *Result = EmitFromMemory(ResultVal, Dst.getType());
diff --git a/clang/lib/CodeGen/CodeGenTypes.cpp b/clang/lib/CodeGen/CodeGenTypes.cpp
index 5b92d7c5c2fb3..96b87a67324ce 100644
--- a/clang/lib/CodeGen/CodeGenTypes.cpp
+++ b/clang/lib/CodeGen/CodeGenTypes.cpp
@@ -120,7 +120,6 @@ llvm::Type *CodeGenTypes::ConvertTypeForMem(QualType T, bool ForBitField) {
     return llvm::IntegerType::get(getLLVMContext(),
                                   (unsigned)Context.getTypeSize(T));
 
-
   // Else, don't map it.
   return R;
 }

>From 0ee57a3f46dddf381e16c021a2d6670f4db93f24 Mon Sep 17 00:00:00 2001
From: "Podchishchaeva, Mariya" <mariya.podchishchaeva at intel.com>
Date: Tue, 4 Jun 2024 10:04:39 -0700
Subject: [PATCH 04/21] Address some comments

---
 clang/lib/CodeGen/CGExpr.cpp       | 9 +++++++--
 clang/lib/CodeGen/CGExprScalar.cpp | 5 +++--
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 3642f303c96b8..bee3dac20ffa8 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -2014,6 +2014,9 @@ llvm::Value *CodeGenFunction::EmitLoadOfScalar(Address Addr, bool Volatile,
   return EmitFromMemory(Load, Ty);
 }
 
+/// Converts a scalar value from its primary IR type (as returned
+/// by ConvertType) to its load/store type (as returned by
+/// convertTypeForLoadStore).
 llvm::Value *CodeGenFunction::EmitToMemory(llvm::Value *Value, QualType Ty) {
   if (hasBooleanRepresentation(Ty) ||
       (Ty->isBitIntType() && Value->getType()->isIntegerTy())) {
@@ -2035,6 +2038,9 @@ llvm::Value *CodeGenFunction::EmitToMemory(llvm::Value *Value, QualType Ty) {
   return Value;
 }
 
+/// Converts a scalar value from its load/store type (as returned
+/// by convertTypeForLoadStore) to its primary IR type (as returned
+/// by ConvertType).
 llvm::Value *CodeGenFunction::EmitFromMemory(llvm::Value *Value, QualType Ty) {
   if (Ty->isExtVectorBoolType()) {
     const auto *RawIntTy = Value->getType();
@@ -2050,8 +2056,7 @@ llvm::Value *CodeGenFunction::EmitFromMemory(llvm::Value *Value, QualType Ty) {
 
   if (hasBooleanRepresentation(Ty) || Ty->isBitIntType()) {
     llvm::Type *ResTy = ConvertType(Ty);
-    bool Signed = Ty->isSignedIntegerOrEnumerationType();
-    return Builder.CreateIntCast(Value, ResTy, Signed, "loadedv");
+    return Builder.CreateTrunc(Value, ResTy, "loadedv");
   }
 
   return Value;
diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp
index 75ea2308688f1..febd6fd1594fe 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -436,9 +436,10 @@ class ScalarExprEmitter
 
     if (Value *Result = ConstantEmitter(CGF).tryEmitConstantExpr(E)) {
       if (E->isGLValue())
-        return CGF.Builder.CreateLoad(
+        return CGF.EmitLoadOfScalar(
             Address(Result, CGF.convertTypeForLoadStore(E->getType()),
-                    CGF.getContext().getTypeAlignInChars(E->getType())));
+                    CGF.getContext().getTypeAlignInChars(E->getType())),
+            /*Volatile*/ false, E->getType(), E->getExprLoc());
       return Result;
     }
     return Visit(E->getSubExpr());

>From 87bed2e55c4fabb15a1a11dd6fdea420e1cae8b8 Mon Sep 17 00:00:00 2001
From: "Podchishchaeva, Mariya" <mariya.podchishchaeva at intel.com>
Date: Wed, 5 Jun 2024 05:15:20 -0700
Subject: [PATCH 05/21] Move constant folding into AppendBitField

---
 clang/lib/CodeGen/CGExprConstant.cpp | 44 +++++++++++++++-------------
 1 file changed, 23 insertions(+), 21 deletions(-)

diff --git a/clang/lib/CodeGen/CGExprConstant.cpp b/clang/lib/CodeGen/CGExprConstant.cpp
index 0277ea0f8d2c8..b0bb7734f3943 100644
--- a/clang/lib/CodeGen/CGExprConstant.cpp
+++ b/clang/lib/CodeGen/CGExprConstant.cpp
@@ -585,7 +585,7 @@ class ConstStructBuilder {
                    bool AllowOverwrite = false);
 
   bool AppendBitField(const FieldDecl *Field, uint64_t FieldOffset,
-                      llvm::ConstantInt *InitExpr, bool AllowOverwrite = false);
+                      llvm::Constant *InitExpr, bool AllowOverwrite = false);
 
   bool Build(const InitListExpr *ILE, bool AllowOverwrite);
   bool Build(const APValue &Val, const RecordDecl *RD, bool IsPrimaryBase,
@@ -610,8 +610,26 @@ bool ConstStructBuilder::AppendBytes(CharUnits FieldOffsetInChars,
 }
 
 bool ConstStructBuilder::AppendBitField(
-    const FieldDecl *Field, uint64_t FieldOffset, llvm::ConstantInt *CI,
+    const FieldDecl *Field, uint64_t FieldOffset, llvm::Constant *C,
     bool AllowOverwrite) {
+
+  llvm::ConstantInt *CI = nullptr;
+  if (!isa<llvm::ConstantInt>(C)) {
+    // Constants for long _BitInt types are split into individual bytes.
+    // Try to fold these back into an integer constant. If that doesn't work
+    // out, we We are trying to initialize a bitfield with a non-trivial
+    // constant, this must require run-time code.
+    llvm::Type *LoadType =
+        CGM.getTypes().convertTypeForLoadStore(Field->getType(), C->getType());
+    llvm::Constant *FoldedConstant = llvm::ConstantFoldLoadFromConst(
+        C, LoadType, llvm::APInt::getZero(32), CGM.getDataLayout());
+    CI = dyn_cast_if_present<llvm::ConstantInt>(FoldedConstant);
+    if (!CI)
+      return false;
+  } else {
+    CI = cast<llvm::ConstantInt>(C);
+  }
+
   const CGRecordLayout &RL =
       CGM.getTypes().getCGRecordLayout(Field->getParent());
   const CGBitFieldInfo &Info = RL.getBitFieldInfo(Field);
@@ -761,20 +779,10 @@ bool ConstStructBuilder::Build(const InitListExpr *ILE, bool AllowOverwrite) {
       if (Field->hasAttr<NoUniqueAddressAttr>())
         AllowOverwrite = true;
     } else {
-      llvm::Type *LoadType = CGM.getTypes().convertTypeForLoadStore(
-          Field->getType(), EltInit->getType());
-      EltInit = llvm::ConstantFoldLoadFromConst(
-          EltInit, LoadType, llvm::APInt::getZero(32), CGM.getDataLayout());
       // Otherwise we have a bitfield.
-      if (auto *CI = dyn_cast<llvm::ConstantInt>(EltInit)) {
-        if (!AppendBitField(Field, Layout.getFieldOffset(FieldNo), CI,
-                            AllowOverwrite))
-          return false;
-      } else {
-        // We are trying to initialize a bitfield with a non-trivial constant,
-        // this must require run-time code.
+      if (!AppendBitField(Field, Layout.getFieldOffset(FieldNo), EltInit,
+                          AllowOverwrite))
         return false;
-      }
     }
   }
 
@@ -866,15 +874,9 @@ bool ConstStructBuilder::Build(const APValue &Val, const RecordDecl *RD,
       if (Field->hasAttr<NoUniqueAddressAttr>())
         AllowOverwrite = true;
     } else {
-      llvm::Type *LoadType = CGM.getTypes().convertTypeForLoadStore(
-          Field->getType(), EltInit->getType());
-      llvm::ConstantInt *Init =
-          cast<llvm::ConstantInt>(llvm::ConstantFoldLoadFromConst(
-              EltInit, LoadType, llvm::APInt::getZero(32),
-              CGM.getDataLayout()));
       // Otherwise we have a bitfield.
       if (!AppendBitField(*Field, Layout.getFieldOffset(FieldNo) + OffsetBits,
-                          Init, AllowOverwrite))
+                          EltInit, AllowOverwrite))
         return false;
     }
   }

>From 08482d88904d74513e9e12437a3ce02ddfac2adc Mon Sep 17 00:00:00 2001
From: "Podchishchaeva, Mariya" <mariya.podchishchaeva at intel.com>
Date: Tue, 18 Jun 2024 02:57:11 -0700
Subject: [PATCH 06/21] EmitStoreOfScalar when emitting return value

---
 clang/lib/CodeGen/CGStmt.cpp | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp
index c95c0181e1351..07da2a767eade 100644
--- a/clang/lib/CodeGen/CGStmt.cpp
+++ b/clang/lib/CodeGen/CGStmt.cpp
@@ -1539,14 +1539,11 @@ void CodeGenFunction::EmitReturnStmt(const ReturnStmt &S) {
     switch (getEvaluationKind(RV->getType())) {
     case TEK_Scalar: {
       llvm::Value *Ret = EmitScalarExpr(RV);
-      // EmitStoreOfScalar could be used here, but it extends bool which for
-      // some targets is returned as i1 zeroext.
-      if (RV->getType()->isBitIntType()) {
-        QualType Ty = RV->getType();
-        if (!CGM.getTypes().LLVMTypeLayoutMatchesAST(Ty, Ret->getType()))
-          Ret = EmitToMemory(Ret, Ty);
-      }
-      Builder.CreateStore(Ret, ReturnValue);
+      if (CurFnInfo->getReturnInfo().getKind() == ABIArgInfo::Indirect)
+        EmitStoreOfScalar(Ret, MakeAddrLValue(ReturnValue, RV->getType()),
+                  /*isInit*/ true);
+      else
+        Builder.CreateStore(Ret, ReturnValue);
     } break;
     case TEK_Complex:
       EmitComplexExprIntoLValue(RV, MakeAddrLValue(ReturnValue, RV->getType()),

>From 956eeb641d236801eac7fcdde012b7af488f9e1d Mon Sep 17 00:00:00 2001
From: "Podchishchaeva, Mariya" <mariya.podchishchaeva at intel.com>
Date: Tue, 18 Jun 2024 03:17:43 -0700
Subject: [PATCH 07/21] Rename LLVMTypeLayoutMatchesAST

---
 clang/lib/CodeGen/CGExprConstant.cpp |  5 +++--
 clang/lib/CodeGen/CodeGenTypes.cpp   |  8 ++++----
 clang/lib/CodeGen/CodeGenTypes.h     | 12 ++++++------
 3 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/clang/lib/CodeGen/CGExprConstant.cpp b/clang/lib/CodeGen/CGExprConstant.cpp
index c2983c5200715..13f8eb2cafcef 100644
--- a/clang/lib/CodeGen/CGExprConstant.cpp
+++ b/clang/lib/CodeGen/CGExprConstant.cpp
@@ -617,7 +617,7 @@ bool ConstStructBuilder::AppendBitField(
   if (!isa<llvm::ConstantInt>(C)) {
     // Constants for long _BitInt types are split into individual bytes.
     // Try to fold these back into an integer constant. If that doesn't work
-    // out, we We are trying to initialize a bitfield with a non-trivial
+    // out, then we are trying to initialize a bitfield with a non-trivial
     // constant, this must require run-time code.
     llvm::Type *LoadType =
         CGM.getTypes().convertTypeForLoadStore(Field->getType(), C->getType());
@@ -1787,7 +1787,8 @@ llvm::Constant *ConstantEmitter::emitForMemory(CodeGenModule &CGM,
   }
 
   if (destType->isBitIntType()) {
-    if (!CGM.getTypes().LLVMTypeLayoutMatchesAST(destType, C->getType())) {
+    if (!CGM.getTypes().typeRequiresSplitIntoByteArray(destType,
+                                                       C->getType())) {
       // Long _BitInt has array of bytes as in-memory type.
       // So, split constant into individual bytes.
       ConstantAggregateBuilder Builder(CGM);
diff --git a/clang/lib/CodeGen/CodeGenTypes.cpp b/clang/lib/CodeGen/CodeGenTypes.cpp
index 96b87a67324ce..e45309a0376c3 100644
--- a/clang/lib/CodeGen/CodeGenTypes.cpp
+++ b/clang/lib/CodeGen/CodeGenTypes.cpp
@@ -108,7 +108,7 @@ llvm::Type *CodeGenTypes::ConvertTypeForMem(QualType T, bool ForBitField) {
   }
 
   if (T->isBitIntType()) {
-    if (!LLVMTypeLayoutMatchesAST(T, R))
+    if (!typeRequiresSplitIntoByteArray(T, R))
       return llvm::ArrayType::get(CGM.Int8Ty,
                                   Context.getTypeSizeInChars(T).getQuantity());
   }
@@ -124,8 +124,8 @@ llvm::Type *CodeGenTypes::ConvertTypeForMem(QualType T, bool ForBitField) {
   return R;
 }
 
-bool CodeGenTypes::LLVMTypeLayoutMatchesAST(QualType ASTTy,
-                                            llvm::Type *LLVMTy) {
+bool CodeGenTypes::typeRequiresSplitIntoByteArray(QualType ASTTy,
+                                                  llvm::Type *LLVMTy) {
   CharUnits ASTSize = Context.getTypeSizeInChars(ASTTy);
   CharUnits LLVMSize =
       CharUnits::fromQuantity(getDataLayout().getTypeAllocSize(LLVMTy));
@@ -143,7 +143,7 @@ llvm::Type *CodeGenTypes::convertTypeForLoadStore(QualType T,
 
   if (T->isBitIntType()) {
     llvm::Type *R = ConvertType(T);
-    if (!LLVMTypeLayoutMatchesAST(T, R))
+    if (!typeRequiresSplitIntoByteArray(T, R))
       return llvm::Type::getIntNTy(
           getLLVMContext(), Context.getTypeSizeInChars(T).getQuantity() * 8);
   }
diff --git a/clang/lib/CodeGen/CodeGenTypes.h b/clang/lib/CodeGen/CodeGenTypes.h
index c9b7a3a36832f..0a431b5507f7e 100644
--- a/clang/lib/CodeGen/CodeGenTypes.h
+++ b/clang/lib/CodeGen/CodeGenTypes.h
@@ -128,13 +128,13 @@ class CodeGenTypes {
   /// memory representation is usually i8 or i32, depending on the target.
   llvm::Type *ConvertTypeForMem(QualType T, bool ForBitField = false);
 
-  /// Check that size and abi alignment of given LLVM type matches size and
-  /// alignment of given AST type.
-  bool LLVMTypeLayoutMatchesAST(QualType ASTTy, llvm::Type *LLVMTy);
+  /// Check that size and ABI alignment of given LLVM type matches size and
+  /// alignment of given AST type. If they don't, values of the type need to be
+  /// emitted as byte array.
+  bool typeRequiresSplitIntoByteArray(QualType ASTTy, llvm::Type *LLVMTy);
 
-  /// For AST types with special memory representation and whose AST type
-  /// doesn't match LLVM type (see LLVMTypeLayoutMatchesAST) returns type that
-  /// ought to be used for load and store operations.
+  /// For AST types with special memory representation returns type
+  /// that ought to be used for load and store operations.
   llvm::Type *convertTypeForLoadStore(QualType T, llvm::Type *LLVMTy = nullptr);
 
   /// GetFunctionType - Get the LLVM function type for \arg Info.

>From 4243bc4c95c1410570ddce084b56bc362d60e504 Mon Sep 17 00:00:00 2001
From: "Podchishchaeva, Mariya" <mariya.podchishchaeva at intel.com>
Date: Fri, 21 Jun 2024 06:44:44 -0700
Subject: [PATCH 08/21] Remove additional check in EmitToMemory

---
 clang/lib/CodeGen/CGCall.cpp         |  3 ++-
 clang/lib/CodeGen/CGDecl.cpp         | 12 ++++++++++++
 clang/lib/CodeGen/CGExpr.cpp         |  3 +--
 clang/lib/CodeGen/CGExprConstant.cpp |  3 +--
 clang/lib/CodeGen/CodeGenTypes.cpp   | 10 ++++++----
 clang/lib/CodeGen/CodeGenTypes.h     |  3 ++-
 6 files changed, 24 insertions(+), 10 deletions(-)

diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index a071b16fc37f9..876c341d2e4d0 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -3860,7 +3860,8 @@ void CodeGenFunction::EmitFunctionEpilog(const CGFunctionInfo &FI,
       LValue ArgVal =
           LValue::MakeAddr(ArgAddr, RetTy, getContext(), BaseInfo, TBAAInfo);
       EmitStoreOfScalar(
-          Builder.CreateLoad(ReturnValue), ArgVal, /*isInit*/ true);
+          EmitLoadOfScalar(MakeAddrLValue(ReturnValue, RetTy), EndLoc), ArgVal,
+          /*isInit*/ true);
       break;
     }
     }
diff --git a/clang/lib/CodeGen/CGDecl.cpp b/clang/lib/CodeGen/CGDecl.cpp
index 4a213990d1e36..a769a6c58c6b4 100644
--- a/clang/lib/CodeGen/CGDecl.cpp
+++ b/clang/lib/CodeGen/CGDecl.cpp
@@ -33,6 +33,7 @@
 #include "clang/Basic/TargetInfo.h"
 #include "clang/CodeGen/CGFunctionInfo.h"
 #include "clang/Sema/Sema.h"
+#include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/GlobalVariable.h"
@@ -1969,6 +1970,17 @@ void CodeGenFunction::EmitAutoVarInit(const AutoVarEmission &emission) {
       constant = constWithPadding(CGM, IsPattern::No,
                                   replaceUndef(CGM, isPattern, constant));
     }
+
+    if (D.getType()->isBitIntType() &&
+        CGM.getTypes().typeRequiresSplitIntoByteArray(D.getType())) {
+      // Constants for long _BitInt types are split into individual bytes.
+      // Try to fold these back into an integer constant so it can be stored
+      // properly.
+      llvm::Type *LoadType = CGM.getTypes().convertTypeForLoadStore(
+          D.getType(), constant->getType());
+      constant = llvm::ConstantFoldLoadFromConst(
+          constant, LoadType, llvm::APInt::getZero(32), CGM.getDataLayout());
+    }
   }
 
   if (!constant) {
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 564bef441f80c..48e8facf83758 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -2018,8 +2018,7 @@ llvm::Value *CodeGenFunction::EmitLoadOfScalar(Address Addr, bool Volatile,
 /// by ConvertType) to its load/store type (as returned by
 /// convertTypeForLoadStore).
 llvm::Value *CodeGenFunction::EmitToMemory(llvm::Value *Value, QualType Ty) {
-  if (hasBooleanRepresentation(Ty) ||
-      (Ty->isBitIntType() && Value->getType()->isIntegerTy())) {
+  if (hasBooleanRepresentation(Ty) || Ty->isBitIntType()) {
     llvm::Type *StoreTy = convertTypeForLoadStore(Ty, Value->getType());
     bool Signed = Ty->isSignedIntegerOrEnumerationType();
     return Builder.CreateIntCast(Value, StoreTy, Signed, "storedv");
diff --git a/clang/lib/CodeGen/CGExprConstant.cpp b/clang/lib/CodeGen/CGExprConstant.cpp
index 13f8eb2cafcef..daca4c987b2e6 100644
--- a/clang/lib/CodeGen/CGExprConstant.cpp
+++ b/clang/lib/CodeGen/CGExprConstant.cpp
@@ -1787,8 +1787,7 @@ llvm::Constant *ConstantEmitter::emitForMemory(CodeGenModule &CGM,
   }
 
   if (destType->isBitIntType()) {
-    if (!CGM.getTypes().typeRequiresSplitIntoByteArray(destType,
-                                                       C->getType())) {
+    if (CGM.getTypes().typeRequiresSplitIntoByteArray(destType, C->getType())) {
       // Long _BitInt has array of bytes as in-memory type.
       // So, split constant into individual bytes.
       ConstantAggregateBuilder Builder(CGM);
diff --git a/clang/lib/CodeGen/CodeGenTypes.cpp b/clang/lib/CodeGen/CodeGenTypes.cpp
index e45309a0376c3..ebad51278121e 100644
--- a/clang/lib/CodeGen/CodeGenTypes.cpp
+++ b/clang/lib/CodeGen/CodeGenTypes.cpp
@@ -108,7 +108,7 @@ llvm::Type *CodeGenTypes::ConvertTypeForMem(QualType T, bool ForBitField) {
   }
 
   if (T->isBitIntType()) {
-    if (!typeRequiresSplitIntoByteArray(T, R))
+    if (typeRequiresSplitIntoByteArray(T, R))
       return llvm::ArrayType::get(CGM.Int8Ty,
                                   Context.getTypeSizeInChars(T).getQuantity());
   }
@@ -126,10 +126,13 @@ llvm::Type *CodeGenTypes::ConvertTypeForMem(QualType T, bool ForBitField) {
 
 bool CodeGenTypes::typeRequiresSplitIntoByteArray(QualType ASTTy,
                                                   llvm::Type *LLVMTy) {
+  if (!LLVMTy)
+    LLVMTy = ConvertType(ASTTy);
+
   CharUnits ASTSize = Context.getTypeSizeInChars(ASTTy);
   CharUnits LLVMSize =
       CharUnits::fromQuantity(getDataLayout().getTypeAllocSize(LLVMTy));
-  return ASTSize == LLVMSize;
+  return ASTSize != LLVMSize;
 }
 
 llvm::Type *CodeGenTypes::convertTypeForLoadStore(QualType T,
@@ -142,8 +145,7 @@ llvm::Type *CodeGenTypes::convertTypeForLoadStore(QualType T,
                                   (unsigned)Context.getTypeSize(T));
 
   if (T->isBitIntType()) {
-    llvm::Type *R = ConvertType(T);
-    if (!typeRequiresSplitIntoByteArray(T, R))
+    if (typeRequiresSplitIntoByteArray(T))
       return llvm::Type::getIntNTy(
           getLLVMContext(), Context.getTypeSizeInChars(T).getQuantity() * 8);
   }
diff --git a/clang/lib/CodeGen/CodeGenTypes.h b/clang/lib/CodeGen/CodeGenTypes.h
index 0a431b5507f7e..56037fe6a82b6 100644
--- a/clang/lib/CodeGen/CodeGenTypes.h
+++ b/clang/lib/CodeGen/CodeGenTypes.h
@@ -131,7 +131,8 @@ class CodeGenTypes {
   /// Check that size and ABI alignment of given LLVM type matches size and
   /// alignment of given AST type. If they don't, values of the type need to be
   /// emitted as byte array.
-  bool typeRequiresSplitIntoByteArray(QualType ASTTy, llvm::Type *LLVMTy);
+  bool typeRequiresSplitIntoByteArray(QualType ASTTy,
+                                      llvm::Type *LLVMTy = nullptr);
 
   /// For AST types with special memory representation returns type
   /// that ought to be used for load and store operations.

>From 934c65936daa212de24b8336b6515b4609e87c89 Mon Sep 17 00:00:00 2001
From: "Podchishchaeva, Mariya" <mariya.podchishchaeva at intel.com>
Date: Tue, 9 Jul 2024 05:38:47 -0700
Subject: [PATCH 09/21] Extend the constant to load/store type before split

---
 clang/lib/CodeGen/CGExprConstant.cpp | 9 ++++++++-
 clang/test/CodeGen/ext-int.c         | 4 ++--
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/clang/lib/CodeGen/CGExprConstant.cpp b/clang/lib/CodeGen/CGExprConstant.cpp
index daca4c987b2e6..b6cf39a7044da 100644
--- a/clang/lib/CodeGen/CGExprConstant.cpp
+++ b/clang/lib/CodeGen/CGExprConstant.cpp
@@ -1792,11 +1792,18 @@ llvm::Constant *ConstantEmitter::emitForMemory(CodeGenModule &CGM,
       // So, split constant into individual bytes.
       ConstantAggregateBuilder Builder(CGM);
       llvm::Type *DesiredTy = CGM.getTypes().ConvertTypeForMem(destType);
+      llvm::Type *LoadStoreTy =
+          CGM.getTypes().convertTypeForLoadStore(destType);
       // LLVM type doesn't match AST type only for big enough _BitInts, these
       // types don't appear in constant expressions involving ptrtoint, so it
       // is safe to expect a constant int here.
       auto *CI = cast<llvm::ConstantInt>(C);
-      llvm::APInt Value = CI->getValue();
+      llvm::Constant *Res = llvm::ConstantFoldCastOperand(
+          destType->isSignedIntegerOrEnumerationType()
+              ? llvm::Instruction::SExt
+              : llvm::Instruction::ZExt,
+          CI, LoadStoreTy, CGM.getDataLayout());
+      llvm::APInt Value = cast<llvm::ConstantInt>(Res)->getValue();
       Builder.addBits(Value, /*OffsetInBits=*/0, /*AllowOverwrite=*/false);
       return Builder.build(DesiredTy, /*AllowOversized*/ false);
     }
diff --git a/clang/test/CodeGen/ext-int.c b/clang/test/CodeGen/ext-int.c
index 223742cc3bc64..77a7b94723b4d 100644
--- a/clang/test/CodeGen/ext-int.c
+++ b/clang/test/CodeGen/ext-int.c
@@ -17,7 +17,7 @@ unsigned _BitInt(1) GlobSize1 = 0;
 // CHECK: @GlobSize1 = {{.*}}global i1 false
 
 // CHECK64: @__const.foo.A = private unnamed_addr constant { i17, [4 x i8], <{ i8, [23 x i8] }> } { i17 1, [4 x i8] undef, <{ i8, [23 x i8] }> <{ i8 -86, [23 x i8] zeroinitializer }> }, align 8
-// CHECK64: @BigGlob = {{.*}}global <{ i8, i8, [38 x i8] }> <{ i8 -68, i8 2, [38 x i8] zeroinitializer }>, align 8
+// @BigGlob = global [40 x i8] c"\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF", align 8
 // CHECK64: @f.p = internal global <{ i8, i8, [22 x i8] }> <{ i8 16, i8 39, [22 x i8] zeroinitializer }>, align 8
 
 void GenericTest(_BitInt(3) a, unsigned _BitInt(3) b, _BitInt(4) c) {
@@ -155,7 +155,7 @@ char *f2(char *p) {
   return p + sizeof(_BitInt(129));
 }
 
-auto BigGlob = (_BitInt(257))700;
+auto BigGlob = (_BitInt(257))-1;
 // CHECK64: define {{.*}}void @foobar(ptr {{.*}} sret([40 x i8]) align 8 %[[RET1:.+]])
 _BitInt(257) foobar() {
   // CHECK64: %A = alloca [40 x i8], align 8

>From c59d3e6c232953fb6eea74fc957a565b633a9328 Mon Sep 17 00:00:00 2001
From: "Podchishchaeva, Mariya" <mariya.podchishchaeva at intel.com>
Date: Tue, 9 Jul 2024 06:01:35 -0700
Subject: [PATCH 10/21] Address remaining comments

---
 clang/lib/CodeGen/CGExprConstant.cpp |  6 ++----
 clang/lib/CodeGen/CodeGenTypes.cpp   | 10 ++++------
 clang/lib/CodeGen/CodeGenTypes.h     |  9 ++++++---
 3 files changed, 12 insertions(+), 13 deletions(-)

diff --git a/clang/lib/CodeGen/CGExprConstant.cpp b/clang/lib/CodeGen/CGExprConstant.cpp
index b6cf39a7044da..65697df0cc53d 100644
--- a/clang/lib/CodeGen/CGExprConstant.cpp
+++ b/clang/lib/CodeGen/CGExprConstant.cpp
@@ -613,8 +613,8 @@ bool ConstStructBuilder::AppendBitField(
     const FieldDecl *Field, uint64_t FieldOffset, llvm::Constant *C,
     bool AllowOverwrite) {
 
-  llvm::ConstantInt *CI = nullptr;
-  if (!isa<llvm::ConstantInt>(C)) {
+  llvm::ConstantInt *CI = dyn_cast<llvm::ConstantInt>(C);
+  if (!CI) {
     // Constants for long _BitInt types are split into individual bytes.
     // Try to fold these back into an integer constant. If that doesn't work
     // out, then we are trying to initialize a bitfield with a non-trivial
@@ -626,8 +626,6 @@ bool ConstStructBuilder::AppendBitField(
     CI = dyn_cast_if_present<llvm::ConstantInt>(FoldedConstant);
     if (!CI)
       return false;
-  } else {
-    CI = cast<llvm::ConstantInt>(C);
   }
 
   const CGRecordLayout &RL =
diff --git a/clang/lib/CodeGen/CodeGenTypes.cpp b/clang/lib/CodeGen/CodeGenTypes.cpp
index ebad51278121e..01ff99c46ba0c 100644
--- a/clang/lib/CodeGen/CodeGenTypes.cpp
+++ b/clang/lib/CodeGen/CodeGenTypes.cpp
@@ -140,19 +140,17 @@ llvm::Type *CodeGenTypes::convertTypeForLoadStore(QualType T,
   if (!LLVMTy)
     LLVMTy = ConvertType(T);
 
-  if (!T->isBitIntType() && LLVMTy->isIntegerTy(1))
-    return llvm::IntegerType::get(getLLVMContext(),
-                                  (unsigned)Context.getTypeSize(T));
-
   if (T->isBitIntType()) {
     if (typeRequiresSplitIntoByteArray(T))
       return llvm::Type::getIntNTy(
           getLLVMContext(), Context.getTypeSizeInChars(T).getQuantity() * 8);
+  } else if (LLVMTy->isIntegerTy(1)) {
+    return llvm::IntegerType::get(getLLVMContext(),
+                                  (unsigned)Context.getTypeSize(T));
   }
 
-  if (T->isExtVectorBoolType()) {
+  if (T->isExtVectorBoolType())
     return ConvertTypeForMem(T);
-  }
 
   return LLVMTy;
 }
diff --git a/clang/lib/CodeGen/CodeGenTypes.h b/clang/lib/CodeGen/CodeGenTypes.h
index 56037fe6a82b6..3b9e30e5f268c 100644
--- a/clang/lib/CodeGen/CodeGenTypes.h
+++ b/clang/lib/CodeGen/CodeGenTypes.h
@@ -128,9 +128,12 @@ class CodeGenTypes {
   /// memory representation is usually i8 or i32, depending on the target.
   llvm::Type *ConvertTypeForMem(QualType T, bool ForBitField = false);
 
-  /// Check that size and ABI alignment of given LLVM type matches size and
-  /// alignment of given AST type. If they don't, values of the type need to be
-  /// emitted as byte array.
+  /// Check whether the given type needs to be laid out in memory
+  /// using an opaque byte-array type because its load/store type
+  /// does not have the correct alloc size in the LLVM data layout.
+  /// If this is false, the load/store type (convertTypeForLoadStore)
+  /// and memory representation type (ConvertTypeForMem) will
+  /// be the same type.
   bool typeRequiresSplitIntoByteArray(QualType ASTTy,
                                       llvm::Type *LLVMTy = nullptr);
 

>From 6ff743626b93073360b37725b69a4a9e3f038287 Mon Sep 17 00:00:00 2001
From: "Podchishchaeva, Mariya" <mariya.podchishchaeva at intel.com>
Date: Tue, 9 Jul 2024 06:04:56 -0700
Subject: [PATCH 11/21] Add comment for convertTypeForLoadStore

---
 clang/lib/CodeGen/CodeGenTypes.h | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/clang/lib/CodeGen/CodeGenTypes.h b/clang/lib/CodeGen/CodeGenTypes.h
index 3b9e30e5f268c..14bf97eb92f41 100644
--- a/clang/lib/CodeGen/CodeGenTypes.h
+++ b/clang/lib/CodeGen/CodeGenTypes.h
@@ -128,12 +128,18 @@ class CodeGenTypes {
   /// memory representation is usually i8 or i32, depending on the target.
   llvm::Type *ConvertTypeForMem(QualType T, bool ForBitField = false);
 
-  /// Check whether the given type needs to be laid out in memory
-  /// using an opaque byte-array type because its load/store type
-  /// does not have the correct alloc size in the LLVM data layout.
-  /// If this is false, the load/store type (convertTypeForLoadStore)
-  /// and memory representation type (ConvertTypeForMem) will
-  /// be the same type.
+  /// Given that T is a scalar type, return the IR type that should
+  /// be used for load and store operations.  For example, this might
+  /// be i8 for _Bool or i96 for _BitInt(65).  The store size of the
+  /// load/store type (as reported by LLVM's data layout) is always
+  /// the same as the alloc size of the memory representation type
+  /// returned by ConvertTypeForMem.
+  ///
+  /// As an optimization, if you already know the scalar value type
+  /// for T (as would be returned by ConvertType), you can pass
+  /// it as the second argument so that it does not need to be
+  /// recomputed in common cases where the value type and
+  /// load/store type are the same.
   bool typeRequiresSplitIntoByteArray(QualType ASTTy,
                                       llvm::Type *LLVMTy = nullptr);
 

>From 13cdeb8c5f9bc79e0cc997f4041f3490c4007d94 Mon Sep 17 00:00:00 2001
From: "Podchishchaeva, Mariya" <mariya.podchishchaeva at intel.com>
Date: Tue, 9 Jul 2024 06:11:39 -0700
Subject: [PATCH 12/21] Make the comments right

---
 clang/lib/CodeGen/CodeGenTypes.h | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/clang/lib/CodeGen/CodeGenTypes.h b/clang/lib/CodeGen/CodeGenTypes.h
index 14bf97eb92f41..f5cd380527e98 100644
--- a/clang/lib/CodeGen/CodeGenTypes.h
+++ b/clang/lib/CodeGen/CodeGenTypes.h
@@ -128,6 +128,15 @@ class CodeGenTypes {
   /// memory representation is usually i8 or i32, depending on the target.
   llvm::Type *ConvertTypeForMem(QualType T, bool ForBitField = false);
 
+  /// Check whether the given type needs to be laid out in memory
+  /// using an opaque byte-array type because its load/store type
+  /// does not have the correct alloc size in the LLVM data layout.
+  /// If this is false, the load/store type (convertTypeForLoadStore)
+  /// and memory representation type (ConvertTypeForMem) will
+  /// be the same type.
+  bool typeRequiresSplitIntoByteArray(QualType ASTTy,
+                                      llvm::Type *LLVMTy = nullptr);
+
   /// Given that T is a scalar type, return the IR type that should
   /// be used for load and store operations.  For example, this might
   /// be i8 for _Bool or i96 for _BitInt(65).  The store size of the
@@ -140,11 +149,6 @@ class CodeGenTypes {
   /// it as the second argument so that it does not need to be
   /// recomputed in common cases where the value type and
   /// load/store type are the same.
-  bool typeRequiresSplitIntoByteArray(QualType ASTTy,
-                                      llvm::Type *LLVMTy = nullptr);
-
-  /// For AST types with special memory representation returns type
-  /// that ought to be used for load and store operations.
   llvm::Type *convertTypeForLoadStore(QualType T, llvm::Type *LLVMTy = nullptr);
 
   /// GetFunctionType - Get the LLVM function type for \arg Info.

>From dade1bbd57793df84d5ddd97b5d2389f41e7ceba Mon Sep 17 00:00:00 2001
From: "Podchishchaeva, Mariya" <mariya.podchishchaeva at intel.com>
Date: Tue, 9 Jul 2024 06:21:58 -0700
Subject: [PATCH 13/21] Fix format

---
 clang/lib/CodeGen/CGExprConstant.cpp | 6 +++---
 clang/lib/CodeGen/CGStmt.cpp         | 5 +++--
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/clang/lib/CodeGen/CGExprConstant.cpp b/clang/lib/CodeGen/CGExprConstant.cpp
index 65697df0cc53d..60e4508e1fd90 100644
--- a/clang/lib/CodeGen/CGExprConstant.cpp
+++ b/clang/lib/CodeGen/CGExprConstant.cpp
@@ -609,9 +609,9 @@ bool ConstStructBuilder::AppendBytes(CharUnits FieldOffsetInChars,
   return Builder.add(InitCst, StartOffset + FieldOffsetInChars, AllowOverwrite);
 }
 
-bool ConstStructBuilder::AppendBitField(
-    const FieldDecl *Field, uint64_t FieldOffset, llvm::Constant *C,
-    bool AllowOverwrite) {
+bool ConstStructBuilder::AppendBitField(const FieldDecl *Field,
+                                        uint64_t FieldOffset, llvm::Constant *C,
+                                        bool AllowOverwrite) {
 
   llvm::ConstantInt *CI = dyn_cast<llvm::ConstantInt>(C);
   if (!CI) {
diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp
index 07da2a767eade..2e65e9fd26099 100644
--- a/clang/lib/CodeGen/CGStmt.cpp
+++ b/clang/lib/CodeGen/CGStmt.cpp
@@ -1541,10 +1541,11 @@ void CodeGenFunction::EmitReturnStmt(const ReturnStmt &S) {
       llvm::Value *Ret = EmitScalarExpr(RV);
       if (CurFnInfo->getReturnInfo().getKind() == ABIArgInfo::Indirect)
         EmitStoreOfScalar(Ret, MakeAddrLValue(ReturnValue, RV->getType()),
-                  /*isInit*/ true);
+                          /*isInit*/ true);
       else
         Builder.CreateStore(Ret, ReturnValue);
-    } break;
+      break;
+    }
     case TEK_Complex:
       EmitComplexExprIntoLValue(RV, MakeAddrLValue(ReturnValue, RV->getType()),
                                 /*isInit*/ true);

>From 28e120eafc068e641f7c603d495356d5fe6610e3 Mon Sep 17 00:00:00 2001
From: "Podchishchaeva, Mariya" <mariya.podchishchaeva at intel.com>
Date: Wed, 10 Jul 2024 07:24:28 -0700
Subject: [PATCH 14/21] Check VecTy first

---
 clang/lib/CodeGen/CGExpr.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 11c6ed964fa1e..1c0ed7b8bdea6 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -2109,7 +2109,7 @@ void CodeGenFunction::EmitStoreOfScalar(llvm::Value *Value, Address Addr,
     auto *VecTy = dyn_cast<llvm::FixedVectorType>(SrcTy);
     if (!CGM.getCodeGenOpts().PreserveVec3Type) {
       // Handle vec3 special.
-      if (!ClangVecTy->isExtVectorBoolType() && VecTy &&
+      if (VecTy && !ClangVecTy->isExtVectorBoolType() &&
           cast<llvm::FixedVectorType>(VecTy)->getNumElements() == 3) {
         // Our source is a vec3, do a shuffle vector to make it a vec4.
         Value = Builder.CreateShuffleVector(Value, ArrayRef<int>{0, 1, 2, -1},

>From d00b81a77106848dfa1190993fb80f3198968a22 Mon Sep 17 00:00:00 2001
From: "Podchishchaeva, Mariya" <mariya.podchishchaeva at intel.com>
Date: Wed, 10 Jul 2024 10:13:04 -0700
Subject: [PATCH 15/21] Use whole number of bytes to represent _BitInt

---
 clang/lib/CodeGen/CGExprConstant.cpp          | 24 +++---
 clang/lib/CodeGen/CGRecordLayoutBuilder.cpp   |  3 +-
 clang/lib/CodeGen/CodeGenTypes.cpp            | 20 +++--
 clang/lib/CodeGen/CodeGenTypes.h              |  2 +-
 clang/test/CodeGen/aarch64-byval-temp.c       | 40 ++++++----
 clang/test/CodeGen/attr-noundef.cpp           |  9 +--
 clang/test/CodeGen/builtins-bitint.c          | 54 +++++++------
 .../test/CodeGen/builtins-elementwise-math.c  | 78 ++++++++++++-------
 clang/test/CodeGen/builtins-overflow.c        | 16 +++-
 clang/test/CodeGen/ext-int-cc.c               | 26 +++----
 clang/test/CodeGen/ext-int-sanitizer.cpp      | 47 ++++++-----
 clang/test/CodeGen/ext-int.c                  | 27 ++++---
 12 files changed, 196 insertions(+), 150 deletions(-)

diff --git a/clang/lib/CodeGen/CGExprConstant.cpp b/clang/lib/CodeGen/CGExprConstant.cpp
index 5c3aee07b30c4..55d4c4b5e3354 100644
--- a/clang/lib/CodeGen/CGExprConstant.cpp
+++ b/clang/lib/CodeGen/CGExprConstant.cpp
@@ -615,7 +615,7 @@ bool ConstStructBuilder::AppendBitField(const FieldDecl *Field,
 
   llvm::ConstantInt *CI = dyn_cast<llvm::ConstantInt>(C);
   if (!CI) {
-    // Constants for long _BitInt types are split into individual bytes.
+    // Constants long _BitInt types are sometimes split into individual bytes.
     // Try to fold these back into an integer constant. If that doesn't work
     // out, then we are trying to initialize a bitfield with a non-trivial
     // constant, this must require run-time code.
@@ -1897,26 +1897,24 @@ llvm::Constant *ConstantEmitter::emitForMemory(CodeGenModule &CGM,
   }
 
   if (destType->isBitIntType()) {
+    ConstantAggregateBuilder Builder(CGM);
+    llvm::Type *LoadStoreTy = CGM.getTypes().convertTypeForLoadStore(destType);
+    // ptrtoint/inttoptr should not involve _BitInt in constant expressions, so
+    // casting to ConstantInt is safe here.
+    auto *CI = cast<llvm::ConstantInt>(C);
+    llvm::Constant *Res = llvm::ConstantFoldCastOperand(
+        destType->isSignedIntegerOrEnumerationType() ? llvm::Instruction::SExt
+                                                     : llvm::Instruction::ZExt,
+        CI, LoadStoreTy, CGM.getDataLayout());
     if (CGM.getTypes().typeRequiresSplitIntoByteArray(destType, C->getType())) {
       // Long _BitInt has array of bytes as in-memory type.
       // So, split constant into individual bytes.
-      ConstantAggregateBuilder Builder(CGM);
       llvm::Type *DesiredTy = CGM.getTypes().ConvertTypeForMem(destType);
-      llvm::Type *LoadStoreTy =
-          CGM.getTypes().convertTypeForLoadStore(destType);
-      // LLVM type doesn't match AST type only for big enough _BitInts, these
-      // types don't appear in constant expressions involving ptrtoint, so it
-      // is safe to expect a constant int here.
-      auto *CI = cast<llvm::ConstantInt>(C);
-      llvm::Constant *Res = llvm::ConstantFoldCastOperand(
-          destType->isSignedIntegerOrEnumerationType()
-              ? llvm::Instruction::SExt
-              : llvm::Instruction::ZExt,
-          CI, LoadStoreTy, CGM.getDataLayout());
       llvm::APInt Value = cast<llvm::ConstantInt>(Res)->getValue();
       Builder.addBits(Value, /*OffsetInBits=*/0, /*AllowOverwrite=*/false);
       return Builder.build(DesiredTy, /*AllowOversized*/ false);
     }
+    return Res;
   }
 
   return C;
diff --git a/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp b/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp
index 5169be204c14d..875745d4a48e4 100644
--- a/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp
+++ b/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp
@@ -427,8 +427,7 @@ CGRecordLowering::accumulateBitFields(bool isNonVirtualBaseType,
         continue;
       }
       uint64_t BitOffset = getFieldBitOffset(*Field);
-      llvm::Type *Type =
-          Types.ConvertTypeForMem(Field->getType(), /*ForBitField=*/true);
+      llvm::Type *Type = Types.ConvertTypeForMem(Field->getType());
       // If we don't have a run yet, or don't live within the previous run's
       // allocated storage then we allocate some storage and start a new run.
       if (Run == FieldEnd || BitOffset >= Tail) {
diff --git a/clang/lib/CodeGen/CodeGenTypes.cpp b/clang/lib/CodeGen/CodeGenTypes.cpp
index c208aff85d8e5..eb21d6125a1c4 100644
--- a/clang/lib/CodeGen/CodeGenTypes.cpp
+++ b/clang/lib/CodeGen/CodeGenTypes.cpp
@@ -89,7 +89,7 @@ void CodeGenTypes::addRecordTypeName(const RecordDecl *RD,
 /// ConvertType in that it is used to convert to the memory representation for
 /// a type.  For example, the scalar representation for _Bool is i1, but the
 /// memory representation is usually i8 or i32, depending on the target.
-llvm::Type *CodeGenTypes::ConvertTypeForMem(QualType T, bool ForBitField) {
+llvm::Type *CodeGenTypes::ConvertTypeForMem(QualType T) {
   if (T->isConstantMatrixType()) {
     const Type *Ty = Context.getCanonicalType(T).getTypePtr();
     const ConstantMatrixType *MT = cast<ConstantMatrixType>(Ty);
@@ -111,12 +111,11 @@ llvm::Type *CodeGenTypes::ConvertTypeForMem(QualType T, bool ForBitField) {
     if (typeRequiresSplitIntoByteArray(T, R))
       return llvm::ArrayType::get(CGM.Int8Ty,
                                   Context.getTypeSizeInChars(T).getQuantity());
+    return llvm::IntegerType::get(getLLVMContext(),
+                                  (unsigned)Context.getTypeSize(T));
   }
 
-  // If this is a bool type, or a bit-precise integer type in a bitfield
-  // representation, map this integer to the target-specified size.
-  if ((ForBitField && T->isBitIntType()) ||
-      (!T->isBitIntType() && R->isIntegerTy(1)))
+  if (R->isIntegerTy(1))
     return llvm::IntegerType::get(getLLVMContext(),
                                   (unsigned)Context.getTypeSize(T));
 
@@ -140,14 +139,13 @@ llvm::Type *CodeGenTypes::convertTypeForLoadStore(QualType T,
   if (!LLVMTy)
     LLVMTy = ConvertType(T);
 
-  if (T->isBitIntType()) {
-    if (typeRequiresSplitIntoByteArray(T))
-      return llvm::Type::getIntNTy(
-          getLLVMContext(), Context.getTypeSizeInChars(T).getQuantity() * 8);
-  } else if (LLVMTy->isIntegerTy(1)) {
+  if (T->isBitIntType())
+    return llvm::Type::getIntNTy(
+        getLLVMContext(), Context.getTypeSizeInChars(T).getQuantity() * 8);
+
+  if (LLVMTy->isIntegerTy(1))
     return llvm::IntegerType::get(getLLVMContext(),
                                   (unsigned)Context.getTypeSize(T));
-  }
 
   if (T->isExtVectorBoolType())
     return ConvertTypeForMem(T);
diff --git a/clang/lib/CodeGen/CodeGenTypes.h b/clang/lib/CodeGen/CodeGenTypes.h
index f5cd380527e98..cbda2628e9140 100644
--- a/clang/lib/CodeGen/CodeGenTypes.h
+++ b/clang/lib/CodeGen/CodeGenTypes.h
@@ -126,7 +126,7 @@ class CodeGenTypes {
   /// ConvertType in that it is used to convert to the memory representation for
   /// a type.  For example, the scalar representation for _Bool is i1, but the
   /// memory representation is usually i8 or i32, depending on the target.
-  llvm::Type *ConvertTypeForMem(QualType T, bool ForBitField = false);
+  llvm::Type *ConvertTypeForMem(QualType T);
 
   /// Check whether the given type needs to be laid out in memory
   /// using an opaque byte-array type because its load/store type
diff --git a/clang/test/CodeGen/aarch64-byval-temp.c b/clang/test/CodeGen/aarch64-byval-temp.c
index 0384830c69a41..0ee0312b2362d 100644
--- a/clang/test/CodeGen/aarch64-byval-temp.c
+++ b/clang/test/CodeGen/aarch64-byval-temp.c
@@ -80,33 +80,41 @@ void example_BitInt(void) {
 }
 // CHECK-O0-LABEL: define dso_local void @example_BitInt(
 // CHECK-O0-NEXT:  entry:
-// CHECK-O0-NEXT:    [[L:%.*]] = alloca i129, align 16
-// CHECK-O0-NEXT:    [[INDIRECT_ARG_TEMP:%.*]] = alloca i129, align 16
-// CHECK-O0-NEXT:    [[INDIRECT_ARG_TEMP1:%.*]] = alloca i129, align 16
-// CHECK-O0-NEXT:    store i129 0, ptr [[L]], align 16
-// CHECK-O0-NEXT:    [[TMP0:%.*]] = load i129, ptr [[L]], align 16
-// CHECK-O0-NEXT:    store i129 [[TMP0]], ptr [[INDIRECT_ARG_TEMP]], align 16
+// CHECK-O0-NEXT:    [[L:%.*]] = alloca i256, align 16
+// CHECK-O0-NEXT:    [[INDIRECT_ARG_TEMP:%.*]] = alloca i256, align 16
+// CHECK-O0-NEXT:    [[INDIRECT_ARG_TEMP1:%.*]] = alloca i256, align 16
+// CHECK-O0-NEXT:    store i256 0, ptr [[L]], align 16
+// CHECK-O0-NEXT:    [[TMP0:%.*]] = load i256, ptr [[L]], align 16
+// CHECK-O0-NEXT:    [[LOADEDV:%.*]] = trunc i256 [[TMP0]] to i129
+// CHECK-O0-NEXT:    [[STOREDV:%.*]] = sext i129 [[LOADEDV]] to i256
+// CHECK-O0-NEXT:    store i256 [[STOREDV]], ptr [[INDIRECT_ARG_TEMP]], align 16
 // CHECK-O0-NEXT:    call void @pass_large_BitInt(ptr noundef [[INDIRECT_ARG_TEMP]])
-// CHECK-O0-NEXT:    [[TMP1:%.*]] = load i129, ptr [[L]], align 16
-// CHECK-O0-NEXT:    store i129 [[TMP1]], ptr [[INDIRECT_ARG_TEMP1]], align 16
+// CHECK-O0-NEXT:    [[TMP1:%.*]] = load i256, ptr [[L]], align 16
+// CHECK-O0-NEXT:    [[LOADEDV1:%.*]] = trunc i256 [[TMP1]] to i129
+// CHECK-O0-NEXT:    [[STOREDV1:%.*]] = sext i129 [[LOADEDV1]] to i256
+// CHECK-O0-NEXT:    store i256 [[STOREDV1]], ptr [[INDIRECT_ARG_TEMP1]], align 16
 // CHECK-O0-NEXT:    call void @pass_large_BitInt(ptr noundef [[INDIRECT_ARG_TEMP1]])
 // CHECK-O0-NEXT:    ret void
 //
 // CHECK-O3-LABEL: define dso_local void @example_BitInt(
 // CHECK-O3-NEXT:  entry:
-// CHECK-O3-NEXT:    [[L:%.*]] = alloca i129, align 16
-// CHECK-O3-NEXT:    [[INDIRECT_ARG_TEMP:%.*]] = alloca i129, align 16
-// CHECK-O3-NEXT:    [[INDIRECT_ARG_TEMP1:%.*]] = alloca i129, align 16
+// CHECK-O3-NEXT:    [[L:%.*]] = alloca i256, align 16
+// CHECK-O3-NEXT:    [[INDIRECT_ARG_TEMP:%.*]] = alloca i256, align 16
+// CHECK-O3-NEXT:    [[INDIRECT_ARG_TEMP1:%.*]] = alloca i256, align 16
 // CHECK-O3-NEXT:    call void @llvm.lifetime.start.p0(i64 32, ptr [[L]]) 
-// CHECK-O3-NEXT:    store i129 0, ptr [[L]], align 16, !tbaa [[TBAA6:![0-9]+]]
-// CHECK-O3-NEXT:    [[TMP0:%.*]] = load i129, ptr [[L]], align 16, !tbaa [[TBAA6]]
+// CHECK-O3-NEXT:    store i256 0, ptr [[L]], align 16, !tbaa [[TBAA6:![0-9]+]]
+// CHECK-O3-NEXT:    [[TMP0:%.*]] = load i256, ptr [[L]], align 16, !tbaa [[TBAA6]]
+// CHECK-O3-NEXT:    [[LOADEDV:%.*]] = trunc i256 [[TMP0]] to i129
 // CHECK-O3-NEXT:    call void @llvm.lifetime.start.p0(i64 32, ptr [[INDIRECT_ARG_TEMP]]) 
-// CHECK-O3-NEXT:    store i129 [[TMP0]], ptr [[INDIRECT_ARG_TEMP]], align 16, !tbaa [[TBAA6]]
+// CHECK-O3-NEXT:    [[STOREDV:%.*]] = sext i129 [[LOADEDV]] to i256
+// CHECK-O3-NEXT:    store i256 [[STOREDV]], ptr [[INDIRECT_ARG_TEMP]], align 16, !tbaa [[TBAA6]]
 // CHECK-O3-NEXT:    call void @pass_large_BitInt(ptr noundef [[INDIRECT_ARG_TEMP]])
 // CHECK-O3-NEXT:    call void @llvm.lifetime.end.p0(i64 32, ptr [[INDIRECT_ARG_TEMP]]) 
-// CHECK-O3-NEXT:    [[TMP1:%.*]] = load i129, ptr [[L]], align 16, !tbaa [[TBAA6]]
+// CHECK-O3-NEXT:    [[TMP1:%.*]] = load i256, ptr [[L]], align 16, !tbaa [[TBAA6]]
+// CHECK-O3-NEXT:    [[LOADEDV1:%.*]] = trunc i256 [[TMP1]] to i129
 // CHECK-O3-NEXT:    call void @llvm.lifetime.start.p0(i64 32, ptr [[INDIRECT_ARG_TEMP1]]) 
-// CHECK-O3-NEXT:    store i129 [[TMP1]], ptr [[INDIRECT_ARG_TEMP1]], align 16, !tbaa [[TBAA6]]
+// CHECK-O3-NEXT:    [[STOREDV1:%.*]] = sext i129 [[LOADEDV1]] to i256
+// CHECK-O3-NEXT:    store i256 [[STOREDV1]], ptr [[INDIRECT_ARG_TEMP1]], align 16, !tbaa [[TBAA6]]
 // CHECK-O3-NEXT:    call void @pass_large_BitInt(ptr noundef [[INDIRECT_ARG_TEMP1]])
 // CHECK-O3-NEXT:    call void @llvm.lifetime.end.p0(i64 32, ptr [[INDIRECT_ARG_TEMP1]]) 
 // CHECK-O3-NEXT:    call void @llvm.lifetime.end.p0(i64 32, ptr [[L]]) 
diff --git a/clang/test/CodeGen/attr-noundef.cpp b/clang/test/CodeGen/attr-noundef.cpp
index e1cab091bfcbf..abdf9496bd396 100644
--- a/clang/test/CodeGen/attr-noundef.cpp
+++ b/clang/test/CodeGen/attr-noundef.cpp
@@ -157,11 +157,10 @@ void pass_large_BitInt(_BitInt(127) e) {
 // CHECK: [[DEF]] ptr @{{.*}}ret_npt{{.*}}()
 // CHECK: [[DEF]] void @{{.*}}pass_npt{{.*}}(ptr %
 
-// TODO: for now, ExtInt is only noundef if it is sign/zero-extended
 // CHECK-INTEL: [[DEF]] noundef signext i3 @{{.*}}ret_BitInt{{.*}}()
-// CHECK-AARCH: [[DEF]] i3 @{{.*}}ret_BitInt{{.*}}()
+// CHECK-AARCH: [[DEF]] noundef i3 @{{.*}}ret_BitInt{{.*}}()
 // CHECK-INTEL: [[DEF]] void @{{.*}}pass_BitInt{{.*}}(i3 noundef signext %
-// CHECK-AARCH: [[DEF]] void @{{.*}}pass_BitInt{{.*}}(i3 %
-// CHECK-INTEL: [[DEF]] void @{{.*}}pass_large_BitInt{{.*}}(i64 %{{.*}}, i64 %
-// CHECK-AARCH: [[DEF]] void @{{.*}}pass_large_BitInt{{.*}}(i127 %
+// CHECK-AARCH: [[DEF]] void @{{.*}}pass_BitInt{{.*}}(i3 noundef %
+// CHECK-INTEL: [[DEF]] void @{{.*}}pass_large_BitInt{{.*}}(i64 noundef %{{.*}}, i64 noundef %
+// CHECK-AARCH: [[DEF]] void @{{.*}}pass_large_BitInt{{.*}}(i127 noundef %
 } // namespace check_exotic
diff --git a/clang/test/CodeGen/builtins-bitint.c b/clang/test/CodeGen/builtins-bitint.c
index 804e497128773..207ff388a2876 100644
--- a/clang/test/CodeGen/builtins-bitint.c
+++ b/clang/test/CodeGen/builtins-bitint.c
@@ -8,10 +8,11 @@
 // CHECK-O0-LABEL: define dso_local arm_aapcscc i32 @test_popcountg_ubi1(
 // CHECK-O0-SAME: ) #[[ATTR0:[0-9]+]] {
 // CHECK-O0-NEXT:  entry:
-// CHECK-O0-NEXT:    [[A:%.*]] = alloca i1, align 1
-// CHECK-O0-NEXT:    store i1 true, ptr [[A]], align 1
-// CHECK-O0-NEXT:    [[TMP0:%.*]] = load i1, ptr [[A]], align 1
-// CHECK-O0-NEXT:    [[TMP1:%.*]] = call i1 @llvm.ctpop.i1(i1 [[TMP0]])
+// CHECK-O0-NEXT:    [[A:%.*]] = alloca i8, align 1
+// CHECK-O0-NEXT:    store i8 1, ptr [[A]], align 1
+// CHECK-O0-NEXT:    [[TMP0:%.*]] = load i8, ptr [[A]], align 1
+// CHECK-O0-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP0]] to i1
+// CHECK-O0-NEXT:    [[TMP1:%.*]] = call i1 @llvm.ctpop.i1(i1 [[LOADEDV]])
 // CHECK-O0-NEXT:    [[CAST:%.*]] = zext i1 [[TMP1]] to i32
 // CHECK-O0-NEXT:    ret i32 [[CAST]]
 //
@@ -28,10 +29,11 @@ int test_popcountg_ubi1() {
 // CHECK-O0-LABEL: define dso_local arm_aapcscc i32 @test_popcountg_ubi2(
 // CHECK-O0-SAME: ) #[[ATTR0]] {
 // CHECK-O0-NEXT:  entry:
-// CHECK-O0-NEXT:    [[A:%.*]] = alloca i2, align 1
-// CHECK-O0-NEXT:    store i2 -1, ptr [[A]], align 1
-// CHECK-O0-NEXT:    [[TMP0:%.*]] = load i2, ptr [[A]], align 1
-// CHECK-O0-NEXT:    [[TMP1:%.*]] = call i2 @llvm.ctpop.i2(i2 [[TMP0]])
+// CHECK-O0-NEXT:    [[A:%.*]] = alloca i8, align 1
+// CHECK-O0-NEXT:    store i8 3, ptr [[A]], align 1
+// CHECK-O0-NEXT:    [[TMP0:%.*]] = load i8, ptr [[A]], align 1
+// CHECK-O0-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP0]] to i2
+// CHECK-O0-NEXT:    [[TMP1:%.*]] = call i2 @llvm.ctpop.i2(i2 [[LOADEDV]])
 // CHECK-O0-NEXT:    [[CAST:%.*]] = zext i2 [[TMP1]] to i32
 // CHECK-O0-NEXT:    ret i32 [[CAST]]
 //
@@ -48,10 +50,11 @@ int test_popcountg_ubi2() {
 // CHECK-O0-LABEL: define dso_local arm_aapcscc i32 @test_ctzg_ubi1(
 // CHECK-O0-SAME: ) #[[ATTR0]] {
 // CHECK-O0-NEXT:  entry:
-// CHECK-O0-NEXT:    [[A:%.*]] = alloca i1, align 1
-// CHECK-O0-NEXT:    store i1 false, ptr [[A]], align 1
-// CHECK-O0-NEXT:    [[TMP0:%.*]] = load i1, ptr [[A]], align 1
-// CHECK-O0-NEXT:    [[TMP1:%.*]] = call i1 @llvm.cttz.i1(i1 [[TMP0]], i1 false)
+// CHECK-O0-NEXT:    [[A:%.*]] = alloca i8, align 1
+// CHECK-O0-NEXT:    store i8 0, ptr [[A]], align 1
+// CHECK-O0-NEXT:    [[TMP0:%.*]] = load i8, ptr [[A]], align 1
+// CHECK-O0-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP0]] to i1
+// CHECK-O0-NEXT:    [[TMP1:%.*]] = call i1 @llvm.cttz.i1(i1 [[LOADEDV]], i1 false)
 // CHECK-O0-NEXT:    [[CAST:%.*]] = zext i1 [[TMP1]] to i32
 // CHECK-O0-NEXT:    ret i32 [[CAST]]
 //
@@ -68,10 +71,11 @@ int test_ctzg_ubi1() {
 // CHECK-O0-LABEL: define dso_local arm_aapcscc i32 @test_ctzg_ubi2(
 // CHECK-O0-SAME: ) #[[ATTR0]] {
 // CHECK-O0-NEXT:  entry:
-// CHECK-O0-NEXT:    [[A:%.*]] = alloca i2, align 1
-// CHECK-O0-NEXT:    store i2 0, ptr [[A]], align 1
-// CHECK-O0-NEXT:    [[TMP0:%.*]] = load i2, ptr [[A]], align 1
-// CHECK-O0-NEXT:    [[TMP1:%.*]] = call i2 @llvm.cttz.i2(i2 [[TMP0]], i1 false)
+// CHECK-O0-NEXT:    [[A:%.*]] = alloca i8, align 1
+// CHECK-O0-NEXT:    store i8 0, ptr [[A]], align 1
+// CHECK-O0-NEXT:    [[TMP0:%.*]] = load i8, ptr [[A]], align 1
+// CHECK-O0-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP0]] to i2
+// CHECK-O0-NEXT:    [[TMP1:%.*]] = call i2 @llvm.cttz.i2(i2 [[LOADEDV]], i1 false)
 // CHECK-O0-NEXT:    [[CAST:%.*]] = zext i2 [[TMP1]] to i32
 // CHECK-O0-NEXT:    ret i32 [[CAST]]
 //
@@ -88,10 +92,11 @@ int test_ctzg_ubi2() {
 // CHECK-O0-LABEL: define dso_local arm_aapcscc i32 @test_clzg_ubi1(
 // CHECK-O0-SAME: ) #[[ATTR0]] {
 // CHECK-O0-NEXT:  entry:
-// CHECK-O0-NEXT:    [[A:%.*]] = alloca i1, align 1
-// CHECK-O0-NEXT:    store i1 false, ptr [[A]], align 1
-// CHECK-O0-NEXT:    [[TMP0:%.*]] = load i1, ptr [[A]], align 1
-// CHECK-O0-NEXT:    [[TMP1:%.*]] = call i1 @llvm.ctlz.i1(i1 [[TMP0]], i1 false)
+// CHECK-O0-NEXT:    [[A:%.*]] = alloca i8, align 1
+// CHECK-O0-NEXT:    store i8 0, ptr [[A]], align 1
+// CHECK-O0-NEXT:    [[TMP0:%.*]] = load i8, ptr [[A]], align 1
+// CHECK-O0-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP0]] to i1
+// CHECK-O0-NEXT:    [[TMP1:%.*]] = call i1 @llvm.ctlz.i1(i1 [[LOADEDV]], i1 false)
 // CHECK-O0-NEXT:    [[CAST:%.*]] = zext i1 [[TMP1]] to i32
 // CHECK-O0-NEXT:    ret i32 [[CAST]]
 //
@@ -108,10 +113,11 @@ int test_clzg_ubi1() {
 // CHECK-O0-LABEL: define dso_local arm_aapcscc i32 @test_clzg_ubi2(
 // CHECK-O0-SAME: ) #[[ATTR0]] {
 // CHECK-O0-NEXT:  entry:
-// CHECK-O0-NEXT:    [[A:%.*]] = alloca i2, align 1
-// CHECK-O0-NEXT:    store i2 0, ptr [[A]], align 1
-// CHECK-O0-NEXT:    [[TMP0:%.*]] = load i2, ptr [[A]], align 1
-// CHECK-O0-NEXT:    [[TMP1:%.*]] = call i2 @llvm.ctlz.i2(i2 [[TMP0]], i1 false)
+// CHECK-O0-NEXT:    [[A:%.*]] = alloca i8, align 1
+// CHECK-O0-NEXT:    store i8 0, ptr [[A]], align 1
+// CHECK-O0-NEXT:    [[TMP0:%.*]] = load i8, ptr [[A]], align 1
+// CHECK-O0-NEXT:    [[LOADEDV:%.*]] = trunc i8 [[TMP0]] to i2
+// CHECK-O0-NEXT:    [[TMP1:%.*]] = call i2 @llvm.ctlz.i2(i2 [[LOADEDV]], i1 false)
 // CHECK-O0-NEXT:    [[CAST:%.*]] = zext i2 [[TMP1]] to i32
 // CHECK-O0-NEXT:    ret i32 [[CAST]]
 //
diff --git a/clang/test/CodeGen/builtins-elementwise-math.c b/clang/test/CodeGen/builtins-elementwise-math.c
index b52a11cca1990..8fb52992c0fe6 100644
--- a/clang/test/CodeGen/builtins-elementwise-math.c
+++ b/clang/test/CodeGen/builtins-elementwise-math.c
@@ -44,8 +44,9 @@ void test_builtin_elementwise_abs(float f1, float f2, double d1, double d2,
   const si8 cvi2 = vi2;
   vi2 = __builtin_elementwise_abs(cvi2);
 
-  // CHECK:      [[BI1:%.+]] = load i31, ptr %bi1.addr, align 4
-  // CHECK-NEXT: call i31 @llvm.abs.i31(i31 [[BI1]], i1 false)
+  // CHECK:      [[BI1:%.+]] = load i32, ptr %bi1.addr, align 4
+  // CHECK-NEXT: [[LOADEDV:%.+]] = trunc i32 [[BI1]] to i31
+  // CHECK-NEXT: call i31 @llvm.abs.i31(i31 [[LOADEDV]], i1 false)
   bi2 = __builtin_elementwise_abs(bi1);
 
   // CHECK:      [[IA1:%.+]] = load i32, ptr addrspace(1) @int_as_one, align 4
@@ -92,14 +93,18 @@ void test_builtin_elementwise_add_sat(float f1, float f2, double d1, double d2,
   // CHECK-NEXT: call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> [[VU1]], <4 x i32> [[VU2]])
   vu1 = __builtin_elementwise_add_sat(vu1, vu2);
 
-  // CHECK:      [[BI1:%.+]] = load i31, ptr %bi1.addr, align 4
-  // CHECK-NEXT: [[BI2:%.+]] = load i31, ptr %bi2.addr, align 4
-  // CHECK-NEXT: call i31 @llvm.sadd.sat.i31(i31 [[BI1]], i31 [[BI2]])
+  // CHECK:      [[BI1:%.+]] = load i32, ptr %bi1.addr, align 4
+  // CHECK-NEXT: [[LOADEDV:%.+]] = trunc i32 [[BI1]] to i31
+  // CHECK-NEXT: [[BI2:%.+]] = load i32, ptr %bi2.addr, align 4
+  // CHECK-NEXT: [[LOADEDV1:%.+]] = trunc i32 [[BI2]] to i31
+  // CHECK-NEXT: call i31 @llvm.sadd.sat.i31(i31 [[LOADEDV]], i31 [[LOADEDV1]])
   bi1 = __builtin_elementwise_add_sat(bi1, bi2);
 
-  // CHECK:      [[BU1:%.+]] = load i55, ptr %bu1.addr, align 8
-  // CHECK-NEXT: [[BU2:%.+]] = load i55, ptr %bu2.addr, align 8
-  // CHECK-NEXT: call i55 @llvm.uadd.sat.i55(i55 [[BU1]], i55 [[BU2]])
+  // CHECK:      [[BU1:%.+]] = load i64, ptr %bu1.addr, align 8
+  // CHECK-NEXT: [[LOADEDV2:%.+]] = trunc i64 [[BU1]] to i55
+  // CHECK-NEXT: [[BU2:%.+]] = load i64, ptr %bu2.addr, align 8
+  // CHECK-NEXT: [[LOADEDV3:%.+]] = trunc i64 [[BU2]] to i55
+  // CHECK-NEXT: call i55 @llvm.uadd.sat.i55(i55 [[LOADEDV2]], i55 [[LOADEDV3]])
   bu1 = __builtin_elementwise_add_sat(bu1, bu2);
 
   // CHECK:      [[IAS1:%.+]] = load i32, ptr addrspace(1) @int_as_one, align 4
@@ -141,14 +146,18 @@ void test_builtin_elementwise_sub_sat(float f1, float f2, double d1, double d2,
   // CHECK-NEXT: call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> [[VU1]], <4 x i32> [[VU2]])
   vu1 = __builtin_elementwise_sub_sat(vu1, vu2);
 
-  // CHECK:      [[BI1:%.+]] = load i31, ptr %bi1.addr, align 4
-  // CHECK-NEXT: [[BI2:%.+]] = load i31, ptr %bi2.addr, align 4
-  // CHECK-NEXT: call i31 @llvm.ssub.sat.i31(i31 [[BI1]], i31 [[BI2]])
+  // CHECK:      [[BI1:%.+]] = load i32, ptr %bi1.addr, align 4
+  // CHECK-NEXT: [[LOADEDV:%.+]] = trunc i32 [[BI1]] to i31
+  // CHECK-NEXT: [[BI2:%.+]] = load i32, ptr %bi2.addr, align 4
+  // CHECK-NEXT: [[LOADEDV1:%.+]] = trunc i32 [[BI2]] to i31
+  // CHECK-NEXT: call i31 @llvm.ssub.sat.i31(i31 [[LOADEDV]], i31 [[LOADEDV1]])
   bi1 = __builtin_elementwise_sub_sat(bi1, bi2);
 
-  // CHECK:      [[BU1:%.+]] = load i55, ptr %bu1.addr, align 8
-  // CHECK-NEXT: [[BU2:%.+]] = load i55, ptr %bu2.addr, align 8
-  // CHECK-NEXT: call i55 @llvm.usub.sat.i55(i55 [[BU1]], i55 [[BU2]])
+  // CHECK:      [[BU1:%.+]] = load i64, ptr %bu1.addr, align 8
+  // CHECK-NEXT: [[LOADEDV2:%.+]] = trunc i64 [[BU1]] to i55
+  // CHECK-NEXT: [[BU2:%.+]] = load i64, ptr %bu2.addr, align 8
+  // CHECK-NEXT: [[LOADEDV3:%.+]] = trunc i64 [[BU2]] to i55
+  // CHECK-NEXT: call i55 @llvm.usub.sat.i55(i55 [[LOADEDV2]], i55 [[LOADEDV3]])
   bu1 = __builtin_elementwise_sub_sat(bu1, bu2);
 
   // CHECK:      [[IAS1:%.+]] = load i32, ptr addrspace(1) @int_as_one, align 4
@@ -169,7 +178,7 @@ void test_builtin_elementwise_max(float f1, float f2, double d1, double d2,
   // CHECK-LABEL: define void @test_builtin_elementwise_max(
   // CHECK:      [[F1:%.+]] = load float, ptr %f1.addr, align 4
   // CHECK-NEXT: [[F2:%.+]] = load float, ptr %f2.addr, align 4
-  // CHECK-NEXT:  call float @llvm.maxnum.f32(float %0, float %1)
+  // CHECK-NEXT:  call float @llvm.maxnum.f32(float [[F1]], float [[F2]])
   f1 = __builtin_elementwise_max(f1, f2);
 
   // CHECK:      [[D1:%.+]] = load double, ptr %d1.addr, align 8
@@ -210,14 +219,18 @@ void test_builtin_elementwise_max(float f1, float f2, double d1, double d2,
   // CHECK-NEXT: call <4 x i32> @llvm.umax.v4i32(<4 x i32> [[VU1]], <4 x i32> [[VU2]])
   vu1 = __builtin_elementwise_max(vu1, vu2);
 
-  // CHECK:      [[BI1:%.+]] = load i31, ptr %bi1.addr, align 4
-  // CHECK-NEXT: [[BI2:%.+]] = load i31, ptr %bi2.addr, align 4
-  // CHECK-NEXT: call i31 @llvm.smax.i31(i31 [[BI1]], i31 [[BI2]])
+  // CHECK:      [[BI1:%.+]] = load i32, ptr %bi1.addr, align 4
+  // CHECK-NEXT: [[LOADEDV:%.+]] = trunc i32 [[BI1]] to i31
+  // CHECK-NEXT: [[BI2:%.+]] = load i32, ptr %bi2.addr, align 4
+  // CHECK-NEXT: [[LOADEDV1:%.+]] = trunc i32 [[BI2]] to i31
+  // CHECK-NEXT: call i31 @llvm.smax.i31(i31 [[LOADEDV]], i31 [[LOADEDV1]])
   bi1 = __builtin_elementwise_max(bi1, bi2);
 
-  // CHECK:      [[BU1:%.+]] = load i55, ptr %bu1.addr, align 8
-  // CHECK-NEXT: [[BU2:%.+]] = load i55, ptr %bu2.addr, align 8
-  // CHECK-NEXT: call i55 @llvm.umax.i55(i55 [[BU1]], i55 [[BU2]])
+  // CHECK:      [[BU1:%.+]] = load i64, ptr %bu1.addr, align 8
+  // CHECK-NEXT: [[LOADEDV2:%.+]] = trunc i64 [[BU1]] to i55
+  // CHECK-NEXT: [[BU2:%.+]] = load i64, ptr %bu2.addr, align 8
+  // CHECK-NEXT: [[LOADEDV3:%.+]] = trunc i64 [[BU2]] to i55
+  // CHECK-NEXT: call i55 @llvm.umax.i55(i55 [[LOADEDV2]], i55 [[LOADEDV3]])
   bu1 = __builtin_elementwise_max(bu1, bu2);
 
   // CHECK:      [[CVF1:%.+]] = load <4 x float>, ptr %cvf1, align 16
@@ -249,7 +262,7 @@ void test_builtin_elementwise_min(float f1, float f2, double d1, double d2,
   // CHECK-LABEL: define void @test_builtin_elementwise_min(
   // CHECK:      [[F1:%.+]] = load float, ptr %f1.addr, align 4
   // CHECK-NEXT: [[F2:%.+]] = load float, ptr %f2.addr, align 4
-  // CHECK-NEXT:  call float @llvm.minnum.f32(float %0, float %1)
+  // CHECK-NEXT:  call float @llvm.minnum.f32(float [[F1]], float [[F2]])
   f1 = __builtin_elementwise_min(f1, f2);
 
   // CHECK:      [[D1:%.+]] = load double, ptr %d1.addr, align 8
@@ -296,14 +309,18 @@ void test_builtin_elementwise_min(float f1, float f2, double d1, double d2,
   // CHECK-NEXT: call <4 x i32> @llvm.umin.v4i32(<4 x i32> [[VU1]], <4 x i32> [[VU2]])
   vu1 = __builtin_elementwise_min(vu1, vu2);
 
-  // CHECK:      [[BI1:%.+]] = load i31, ptr %bi1.addr, align 4
-  // CHECK-NEXT: [[BI2:%.+]] = load i31, ptr %bi2.addr, align 4
-  // CHECK-NEXT: call i31 @llvm.smin.i31(i31 [[BI1]], i31 [[BI2]])
+  // CHECK:      [[BI1:%.+]] = load i32, ptr %bi1.addr, align 4
+  // CHECK-NEXT: [[LOADEDV:%.+]] = trunc i32 [[BI1]] to i31
+  // CHECK-NEXT: [[BI2:%.+]] = load i32, ptr %bi2.addr, align 4
+  // CHECK-NEXT: [[LOADEDV1:%.+]] = trunc i32 [[BI2]] to i31
+  // CHECK-NEXT: call i31 @llvm.smin.i31(i31 [[LOADEDV]], i31 [[LOADEDV1]])
   bi1 = __builtin_elementwise_min(bi1, bi2);
 
-  // CHECK:      [[BU1:%.+]] = load i55, ptr %bu1.addr, align 8
-  // CHECK-NEXT: [[BU2:%.+]] = load i55, ptr %bu2.addr, align 8
-  // CHECK-NEXT: call i55 @llvm.umin.i55(i55 [[BU1]], i55 [[BU2]])
+  // CHECK:      [[BU1:%.+]] = load i64, ptr %bu1.addr, align 8
+  // CHECK-NEXT: [[LOADEDV2:%.+]] = trunc i64 [[BU1]] to i55
+  // CHECK-NEXT: [[BU2:%.+]] = load i64, ptr %bu2.addr, align 8
+  // CHECK-NEXT: [[LOADEDV3:%.+]] = trunc i64 [[BU2]] to i55
+  // CHECK-NEXT: call i55 @llvm.umin.i55(i55 [[LOADEDV2]], i55 [[LOADEDV3]])
   bu1 = __builtin_elementwise_min(bu1, bu2);
 
   // CHECK:      [[CVF1:%.+]] = load <4 x float>, ptr %cvf1, align 16
@@ -341,8 +358,9 @@ void test_builtin_elementwise_bitreverse(si8 vi1, si8 vi2,
   const si8 cvi2 = vi2;
   vi2 = __builtin_elementwise_bitreverse(cvi2);
 
-  // CHECK:      [[BI1:%.+]] = load i31, ptr %bi1.addr, align 4
-  // CHECK-NEXT: call i31 @llvm.bitreverse.i31(i31 [[BI1]])
+  // CHECK:      [[BI1:%.+]] = load i32, ptr %bi1.addr, align 4
+  // CHECK-NEXT: [[LOADEDV:%.+]] = trunc i32 [[BI1]] to i31
+  // CHECK-NEXT: call i31 @llvm.bitreverse.i31(i31 [[LOADEDV]])
   bi2 = __builtin_elementwise_bitreverse(bi1);
 
   // CHECK:      [[IA1:%.+]] = load i32, ptr addrspace(1) @int_as_one, align 4
diff --git a/clang/test/CodeGen/builtins-overflow.c b/clang/test/CodeGen/builtins-overflow.c
index 4babc05759dc8..7c524723f76e8 100644
--- a/clang/test/CodeGen/builtins-overflow.c
+++ b/clang/test/CodeGen/builtins-overflow.c
@@ -42,11 +42,13 @@ int test_add_overflow_int_int_int(int x, int y) {
 
 int test_add_overflow_xint31_xint31_xint31(_BitInt(31) x, _BitInt(31) y) {
   // CHECK-LABEL: define {{(dso_local )?}}i32 @test_add_overflow_xint31_xint31_xint31({{.+}})
+  // CHECK: %loadedv = trunc i32 %{{.*}} to i31
   // CHECK-NOT: ext
   // CHECK: [[S:%.+]] = call { i31, i1 } @llvm.sadd.with.overflow.i31(i31 %{{.+}}, i31 %{{.+}})
   // CHECK-DAG: [[C:%.+]] = extractvalue { i31, i1 } [[S]], 1
   // CHECK-DAG: [[Q:%.+]] = extractvalue { i31, i1 } [[S]], 0
-  // CHECK: store i31 [[Q]], ptr
+  // CHECK: [[STOREDV:%.+]] = sext i31 [[Q]] to i32
+  // CHECK: store i32 [[STOREDV]], ptr
   // CHECK: br i1 [[C]]
   _BitInt(31) r;
   if (__builtin_add_overflow(x, y, &r))
@@ -84,11 +86,13 @@ int test_sub_overflow_int_int_int(int x, int y) {
 
 int test_sub_overflow_xint31_xint31_xint31(_BitInt(31) x, _BitInt(31) y) {
   // CHECK-LABEL: define {{(dso_local )?}}i32 @test_sub_overflow_xint31_xint31_xint31({{.+}})
+  // CHECK: %loadedv = trunc i32 %{{.*}} to i31
   // CHECK-NOT: ext
   // CHECK: [[S:%.+]] = call { i31, i1 } @llvm.ssub.with.overflow.i31(i31 %{{.+}}, i31 %{{.+}})
   // CHECK-DAG: [[C:%.+]] = extractvalue { i31, i1 } [[S]], 1
   // CHECK-DAG: [[Q:%.+]] = extractvalue { i31, i1 } [[S]], 0
-  // CHECK: store i31 [[Q]], ptr
+  // CHECK: [[STOREDV:%.+]] = sext i31 [[Q]] to i32
+  // CHECK: store i32 [[STOREDV]], ptr
   // CHECK: br i1 [[C]]
   _BitInt(31) r;
   if (__builtin_sub_overflow(x, y, &r))
@@ -171,11 +175,13 @@ int test_mul_overflow_int_int_int(int x, int y) {
 
 int test_mul_overflow_xint31_xint31_xint31(_BitInt(31) x, _BitInt(31) y) {
   // CHECK-LABEL: define {{(dso_local )?}}i32 @test_mul_overflow_xint31_xint31_xint31({{.+}})
+  // CHECK: %loadedv = trunc i32 %{{.*}} to i31
   // CHECK-NOT: ext
   // CHECK: [[S:%.+]] = call { i31, i1 } @llvm.smul.with.overflow.i31(i31 %{{.+}}, i31 %{{.+}})
   // CHECK-DAG: [[C:%.+]] = extractvalue { i31, i1 } [[S]], 1
   // CHECK-DAG: [[Q:%.+]] = extractvalue { i31, i1 } [[S]], 0
-  // CHECK: store i31 [[Q]], ptr
+  // CHECK: [[STOREDV:%.+]] = sext i31 [[Q]] to i32
+  // CHECK: store i32 [[STOREDV]], ptr
   // CHECK: br i1 [[C]]
   _BitInt(31) r;
   if (__builtin_mul_overflow(x, y, &r))
@@ -185,11 +191,13 @@ int test_mul_overflow_xint31_xint31_xint31(_BitInt(31) x, _BitInt(31) y) {
 
 int test_mul_overflow_xint127_xint127_xint127(_BitInt(127) x, _BitInt(127) y) {
   // CHECK-LABEL: define {{(dso_local )?}}i32 @test_mul_overflow_xint127_xint127_xint127({{.+}})
+  // CHECK: %loadedv = trunc i128 %{{.*}} to i127
   // CHECK-NOT: ext
   // CHECK: [[S:%.+]] = call { i127, i1 } @llvm.smul.with.overflow.i127(i127 %{{.+}}, i127 %{{.+}})
   // CHECK-DAG: [[C:%.+]] = extractvalue { i127, i1 } [[S]], 1
   // CHECK-DAG: [[Q:%.+]] = extractvalue { i127, i1 } [[S]], 0
-  // CHECK: store i127 [[Q]], ptr
+  // CHECK: [[STOREDV:%.+]] = sext i127 [[Q]] to i128
+  // CHECK: store i128 [[STOREDV]], ptr
   // CHECK: br i1 [[C]]
   _BitInt(127) r;
   if (__builtin_mul_overflow(x, y, &r))
diff --git a/clang/test/CodeGen/ext-int-cc.c b/clang/test/CodeGen/ext-int-cc.c
index a2293c5095cdb..05b2bf1bec81e 100644
--- a/clang/test/CodeGen/ext-int-cc.c
+++ b/clang/test/CodeGen/ext-int-cc.c
@@ -67,29 +67,29 @@ void ParamPassing2(_BitInt(127) b, _BitInt(63) c) {}
 // WIN64: define dso_local void @ParamPassing2(ptr %{{.+}}, i63 %{{.+}})
 // LIN32: define{{.*}} void @ParamPassing2(ptr %{{.+}}, i63 %{{.+}})
 // WIN32: define dso_local void @ParamPassing2(ptr %{{.+}}, i63 %{{.+}})
-// NACL: define{{.*}} void @ParamPassing2(ptr byval(i127) align 8 %{{.+}}, i63 %{{.+}})
+// NACL: define{{.*}} void @ParamPassing2(ptr byval(i128) align 8 %{{.+}}, i63 %{{.+}})
 // NVPTX64: define{{.*}} void @ParamPassing2(i127 %{{.+}}, i63 %{{.+}})
-// NVPTX: define{{.*}} void @ParamPassing2(ptr byval(i127) align 8 %{{.+}}, i63 %{{.+}})
+// NVPTX: define{{.*}} void @ParamPassing2(ptr byval(i128) align 8 %{{.+}}, i63 %{{.+}})
 // SPARCV9: define{{.*}} void @ParamPassing2(i127 %{{.+}}, i63 signext %{{.+}})
-// SPARC: define{{.*}} void @ParamPassing2(ptr byval(i127) align 8 %{{.+}}, i63 %{{.+}})
+// SPARC: define{{.*}} void @ParamPassing2(ptr byval(i128) align 8 %{{.+}}, i63 %{{.+}})
 // MIPS64: define{{.*}} void @ParamPassing2(i127 signext  %{{.+}}, i63 signext %{{.+}})
-// MIPS: define{{.*}} void @ParamPassing2(ptr byval(i127) align 8 %{{.+}}, i63 signext %{{.+}})
-// SPIR64: define{{.*}} spir_func void @ParamPassing2(ptr byval(i127) align 8 %{{.+}}, i63 %{{.+}})
-// SPIR: define{{.*}} spir_func void @ParamPassing2(ptr byval(i127) align 8 %{{.+}}, i63 %{{.+}})
-// HEX: define{{.*}} void @ParamPassing2(ptr byval(i127) align 8 %{{.+}}, i63 %{{.+}})
-// LANAI: define{{.*}} void @ParamPassing2(ptr byval(i127) align 4 %{{.+}}, i63 %{{.+}})
-// R600: define{{.*}} void @ParamPassing2(ptr addrspace(5) byval(i127) align 8 %{{.+}}, i63 %{{.+}})
-// ARC: define{{.*}} void @ParamPassing2(ptr byval(i127) align 4 %{{.+}}, i63 inreg %{{.+}})
-// XCORE: define{{.*}} void @ParamPassing2(ptr byval(i127) align 4 %{{.+}}, i63 %{{.+}})
+// MIPS: define{{.*}} void @ParamPassing2(ptr byval(i128) align 8 %{{.+}}, i63 signext %{{.+}})
+// SPIR64: define{{.*}} spir_func void @ParamPassing2(ptr byval(i128) align 8 %{{.+}}, i63 %{{.+}})
+// SPIR: define{{.*}} spir_func void @ParamPassing2(ptr byval(i128) align 8 %{{.+}}, i63 %{{.+}})
+// HEX: define{{.*}} void @ParamPassing2(ptr byval(i128) align 8 %{{.+}}, i63 %{{.+}})
+// LANAI: define{{.*}} void @ParamPassing2(ptr byval(i128) align 4 %{{.+}}, i63 %{{.+}})
+// R600: define{{.*}} void @ParamPassing2(ptr addrspace(5) byval(i128) align 8 %{{.+}}, i63 %{{.+}})
+// ARC: define{{.*}} void @ParamPassing2(ptr byval(i128) align 4 %{{.+}}, i63 inreg %{{.+}})
+// XCORE: define{{.*}} void @ParamPassing2(ptr byval(i128) align 4 %{{.+}}, i63 %{{.+}})
 // RISCV64: define{{.*}} void @ParamPassing2(i127 %{{.+}}, i63 signext %{{.+}})
 // RISCV32: define{{.*}} void @ParamPassing2(ptr %{{.+}}, i63 %{{.+}})
 // WASM: define{{.*}} void @ParamPassing2(i127 %{{.+}}, i63 %{{.+}})
 // SYSTEMZ: define{{.*}} void @ParamPassing2(ptr %{{.+}}, i63 signext %{{.+}})
 // PPC64: define{{.*}} void @ParamPassing2(i127 %{{.+}}, i63 signext %{{.+}})
-// PPC32: define{{.*}} void @ParamPassing2(ptr byval(i127) align 8 %{{.+}}, i63 %{{.+}})
+// PPC32: define{{.*}} void @ParamPassing2(ptr byval(i128) align 8 %{{.+}}, i63 %{{.+}})
 // AARCH64: define{{.*}} void @ParamPassing2(i127 %{{.+}}, i63 %{{.+}})
 // AARCH64DARWIN: define{{.*}} void @ParamPassing2(i127 %{{.+}}, i63 %{{.+}})
-// ARM: define{{.*}} arm_aapcscc void @ParamPassing2(ptr byval(i127) align 8 %{{.+}}, i63 %{{.+}})
+// ARM: define{{.*}} arm_aapcscc void @ParamPassing2(ptr byval(i128) align 8 %{{.+}}, i63 %{{.+}})
 // LA64: define{{.*}} void @ParamPassing2(i127 %{{.+}}, i63 signext %{{.+}})
 // LA32: define{{.*}} void @ParamPassing2(ptr %{{.+}}, i63 %{{.+}})
 
diff --git a/clang/test/CodeGen/ext-int-sanitizer.cpp b/clang/test/CodeGen/ext-int-sanitizer.cpp
index 85ae26c72f45f..dde8ff9888028 100644
--- a/clang/test/CodeGen/ext-int-sanitizer.cpp
+++ b/clang/test/CodeGen/ext-int-sanitizer.cpp
@@ -55,12 +55,15 @@ void FloatOverflow(float f, double d) {
 void UIntTruncation(unsigned _BitInt(35) E, unsigned int i, unsigned long long ll) {
 
   i = E;
-  // CHECK: %[[LOADE:.+]] = load i35
-  // CHECK: store i35 %[[LOADE]], ptr %[[EADDR:.+]]
-  // CHECK: %[[LOADE2:.+]] = load i35, ptr %[[EADDR]]
-  // CHECK: %[[CONV:.+]] = trunc i35 %[[LOADE2]] to i32
+  // CHECK: %[[LOADE:.+]] = load i64
+  // CHECK: %[[E1:.+]] = trunc i64 %[[LOADE]] to i35
+  // CHECK: %[[STOREDV:.+]] = zext i35 %[[E1]] to i64
+  // CHECK: store i64 %[[STOREDV]], ptr %[[EADDR:.+]]
+  // CHECK: %[[LOADE2:.+]] = load i64, ptr %[[EADDR]]
+  // CHECK: %[[LOADEDV:.+]] = trunc i64 %[[LOADE2]] to i35
+  // CHECK: %[[CONV:.+]] = trunc i35 %[[LOADEDV]] to i32
   // CHECK: %[[EXT:.+]] = zext i32 %[[CONV]] to i35
-  // CHECK: %[[CHECK:.+]] = icmp eq i35 %[[EXT]], %[[LOADE2]]
+  // CHECK: %[[CHECK:.+]] = icmp eq i35 %[[EXT]], %[[LOADEDV]]
   // CHECK: br i1 %[[CHECK]]
   // CHECK: call void @__ubsan_handle_implicit_conversion_abort
 
@@ -77,43 +80,49 @@ void UIntTruncation(unsigned _BitInt(35) E, unsigned int i, unsigned long long l
 void IntTruncation(_BitInt(35) E, unsigned _BitInt(42) UE, int i, unsigned j) {
 
   j = E;
-  // CHECK: %[[LOADE:.+]] = load i35
-  // CHECK: store i35 %[[LOADE]], ptr %[[EADDR:.+]]
-  // CHECK: %[[LOADE2:.+]] = load i35, ptr %[[EADDR]]
-  // CHECK: %[[CONV:.+]] = trunc i35 %[[LOADE2]] to i32
+  // CHECK: %[[LOADE:.+]] = load i64
+  // CHECK: %[[E1:.+]] = trunc i64 %[[LOADE]] to i35
+  // CHECK: %[[STOREDV:.+]] = sext i35 %[[E1]] to i64
+  // CHECK: store i64 %[[STOREDV]], ptr %[[EADDR:.+]]
+  // CHECK: %[[LOADE2:.+]] = load i64, ptr %[[EADDR]]
+  // CHECK: %[[LOADEDV:.+]] = trunc i64 %[[LOADE2]] to i35
+  // CHECK: %[[CONV:.+]] = trunc i35 %[[LOADEDV]] to i32
   // CHECK: %[[EXT:.+]] = zext i32 %[[CONV]] to i35
-  // CHECK: %[[CHECK:.+]] = icmp eq i35 %[[EXT]], %[[LOADE2]]
+  // CHECK: %[[CHECK:.+]] = icmp eq i35 %[[EXT]], %[[LOADEDV]]
   // CHECK: br i1 %[[CHECK]]
   // CHECK: call void @__ubsan_handle_implicit_conversion_abort
 
   j = UE;
-  // CHECK: %[[LOADUE:.+]] = load i42
-  // CHECK: %[[CONV:.+]] = trunc i42 %[[LOADUE]] to i32
+  // CHECK: %[[LOADUE:.+]] = load i64
+  // CHECK: %[[LOADEDV:.+]] = trunc i64 %[[LOADUE]] to i42
+  // CHECK: %[[CONV:.+]] = trunc i42 %[[LOADEDV]] to i32
   // CHECK: %[[EXT:.+]] = zext i32 %[[CONV]] to i42
-  // CHECK: %[[CHECK:.+]] = icmp eq i42 %[[EXT]], %[[LOADUE]]
+  // CHECK: %[[CHECK:.+]] = icmp eq i42 %[[EXT]], %[[LOADEDV]]
   // CHECK: br i1 %[[CHECK]]
   // CHECK: call void @__ubsan_handle_implicit_conversion_abort
 
   // Note: also triggers sign change check.
   i = UE;
-  // CHECK: %[[LOADUE:.+]] = load i42
-  // CHECK: %[[CONV:.+]] = trunc i42 %[[LOADUE]] to i32
+  // CHECK: %[[LOADUE:.+]] = load i64
+  // CHECK: %[[LOADEDV:.+]] = trunc i64 %[[LOADUE]] to i42
+  // CHECK: %[[CONV:.+]] = trunc i42 %[[LOADEDV]] to i32
   // CHECK: %[[NEG:.+]] = icmp slt i32 %[[CONV]], 0
   // CHECK: %[[SIGNCHECK:.+]] = icmp eq i1 false, %[[NEG]]
   // CHECK: %[[EXT:.+]] = sext i32 %[[CONV]] to i42
-  // CHECK: %[[CHECK:.+]] = icmp eq i42 %[[EXT]], %[[LOADUE]]
+  // CHECK: %[[CHECK:.+]] = icmp eq i42 %[[EXT]], %[[LOADEDV]]
   // CHECK: %[[CHECKBOTH:.+]] = and i1 %[[SIGNCHECK]], %[[CHECK]]
   // CHECK: br i1 %[[CHECKBOTH]]
   // CHECK: call void @__ubsan_handle_implicit_conversion_abort
 
   // Note: also triggers sign change check.
   E = UE;
-  // CHECK: %[[LOADUE:.+]] = load i42
-  // CHECK: %[[CONV:.+]] = trunc i42 %[[LOADUE]] to i35
+  // CHECK: %[[LOADUE:.+]] = load i64
+  // CHECK: %[[LOADEDV:.+]] = trunc i64 %[[LOADUE]] to i42
+  // CHECK: %[[CONV:.+]] = trunc i42 %[[LOADEDV]] to i35
   // CHECK: %[[NEG:.+]] = icmp slt i35 %[[CONV]], 0
   // CHECK: %[[SIGNCHECK:.+]] = icmp eq i1 false, %[[NEG]]
   // CHECK: %[[EXT:.+]] = sext i35 %[[CONV]] to i42
-  // CHECK: %[[CHECK:.+]] = icmp eq i42 %[[EXT]], %[[LOADUE]]
+  // CHECK: %[[CHECK:.+]] = icmp eq i42 %[[EXT]], %[[LOADEDV]]
   // CHECK: %[[CHECKBOTH:.+]] = and i1 %[[SIGNCHECK]], %[[CHECK]]
   // CHECK: br i1 %[[CHECKBOTH]]
   // CHECK: call void @__ubsan_handle_implicit_conversion_abort
diff --git a/clang/test/CodeGen/ext-int.c b/clang/test/CodeGen/ext-int.c
index 77a7b94723b4d..a841daff72e08 100644
--- a/clang/test/CodeGen/ext-int.c
+++ b/clang/test/CodeGen/ext-int.c
@@ -3,9 +3,9 @@
 // RUN: %clang_cc1 -std=c23 -triple i386-gnu-linux -O3 -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,LIN32
 // RUN: %clang_cc1 -std=c23 -triple i386-windows-pc -O3 -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,WIN32
 
-// CHECK64: %struct.S1 = type { i17, [4 x i8], [24 x i8] }
-// WIN32: %struct.S1 = type { i17, [4 x i8], [24 x i8] }
-// LIN32: %struct.S1 = type { i17, [20 x i8] }
+// CHECK64: %struct.S1 = type { i32, [4 x i8], [24 x i8] }
+// WIN32: %struct.S1 = type { i32, [4 x i8], [24 x i8] }
+// LIN32: %struct.S1 = type { i32, [20 x i8] }
 // CHECK64: %struct.S2 = type { [40 x i8], i32, [4 x i8] }
 // WIN32: %struct.S2 = type { [40 x i8], i32, [4 x i8] }
 // LIN32: %struct.S2 = type { [36 x i8], i32 }
@@ -14,9 +14,9 @@
 
 //GH62207
 unsigned _BitInt(1) GlobSize1 = 0;
-// CHECK: @GlobSize1 = {{.*}}global i1 false
+// CHECK: @GlobSize1 = {{.*}}global i8 0
 
-// CHECK64: @__const.foo.A = private unnamed_addr constant { i17, [4 x i8], <{ i8, [23 x i8] }> } { i17 1, [4 x i8] undef, <{ i8, [23 x i8] }> <{ i8 -86, [23 x i8] zeroinitializer }> }, align 8
+// CHECK64: @__const.foo.A = private unnamed_addr constant { i32, [4 x i8], <{ i8, [23 x i8] }> } { i32 1, [4 x i8] undef, <{ i8, [23 x i8] }> <{ i8 -86, [23 x i8] zeroinitializer }> }, align 8
 // @BigGlob = global [40 x i8] c"\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF", align 8
 // CHECK64: @f.p = internal global <{ i8, i8, [22 x i8] }> <{ i8 16, i8 39, [22 x i8] zeroinitializer }>, align 8
 
@@ -65,14 +65,17 @@ void OffsetOfTest(void) {
 
 void Size1ExtIntParam(unsigned _BitInt(1) A) {
   // CHECK: define {{.*}}void @Size1ExtIntParam(i1{{.*}}  %[[PARAM:.+]])
-  // CHECK: %[[PARAM_ADDR:.+]] = alloca i1
-  // CHECK: %[[B:.+]] = alloca [5 x i1]
-  // CHECK: store i1 %[[PARAM]], ptr %[[PARAM_ADDR]]
+  // CHECK: %[[PARAM_ADDR:.+]] = alloca i8
+  // CHECK: %[[B:.+]] = alloca [5 x i8]
+  // CHECK: %[[STOREDV:.+]] = zext i1 %[[PARAM]] to i8
+  // CHECK: store i8 %[[STOREDV]], ptr %[[PARAM_ADDR]]
   unsigned _BitInt(1) B[5];
 
-  // CHECK: %[[PARAM_LOAD:.+]] = load i1, ptr %[[PARAM_ADDR]]
-  // CHECK: %[[IDX:.+]] = getelementptr inbounds [5 x i1], ptr %[[B]]
-  // CHECK: store i1 %[[PARAM_LOAD]], ptr %[[IDX]]
+  // CHECK: %[[PARAM_LOAD:.+]] = load i8, ptr %[[PARAM_ADDR]]
+  // CHECK: %[[LOADEDV:.+]] = trunc i8 %0 to i1
+  // CHECK: %[[IDX:.+]] = getelementptr inbounds [5 x i8], ptr %[[B]]
+  // CHECK: %[[STOREDV1:.+]] = zext i1 %[[LOADEDV]] to i8
+  // CHECK: store i8 %[[STOREDV1]], ptr %[[IDX]]
   B[2] = A;
 }
 
@@ -84,7 +87,7 @@ struct S1 {
 
 int foo(int a) {
   // CHECK: %A1 = getelementptr inbounds %struct.S1, ptr %B, i32 0, i32 0
-  // CHECK: store i17 1, ptr %A1
+  // CHECK: store i32 1, ptr %A1
   // CHECK64: %B2 = getelementptr inbounds %struct.S1, ptr %B, i32 0, i32 2
   // WIN32: %B2 = getelementptr inbounds %struct.S1, ptr %B, i32 0, i32 2
   // LIN32: %B2 = getelementptr inbounds %struct.S1, ptr %B, i32 0, i32 1

>From fcd8be017e1e596553edfc4e37bd66efdcb73984 Mon Sep 17 00:00:00 2001
From: "Podchishchaeva, Mariya" <mariya.podchishchaeva at intel.com>
Date: Thu, 11 Jul 2024 08:38:25 -0700
Subject: [PATCH 16/21] Fix remaining tests

---
 clang/test/CodeGen/ext-int-sanitizer.cpp | 119 ++++++++++++++---------
 clang/test/CodeGen/extend-arg-64.c       |   3 +-
 clang/test/CodeGen/ubsan-shift-bitint.c  |  12 ++-
 clang/test/CodeGenCXX/ext-int.cpp        |  58 +++++++----
 4 files changed, 119 insertions(+), 73 deletions(-)

diff --git a/clang/test/CodeGen/ext-int-sanitizer.cpp b/clang/test/CodeGen/ext-int-sanitizer.cpp
index dde8ff9888028..f7c6db7236290 100644
--- a/clang/test/CodeGen/ext-int-sanitizer.cpp
+++ b/clang/test/CodeGen/ext-int-sanitizer.cpp
@@ -131,19 +131,24 @@ void IntTruncation(_BitInt(35) E, unsigned _BitInt(42) UE, int i, unsigned j) {
 // CHECK: define{{.*}} void @_Z15SignChangeCheckDU39_DB39_
 void SignChangeCheck(unsigned _BitInt(39) UE, _BitInt(39) E) {
   UE = E;
-  // CHECK: %[[LOADEU:.+]] = load i39
-  // CHECK: %[[LOADE:.+]] = load i39
-  // CHECK: store i39 %[[LOADE]], ptr %[[EADDR:.+]]
-  // CHECK: %[[LOADE2:.+]] = load i39, ptr %[[EADDR]]
-  // CHECK: %[[NEG:.+]] = icmp slt i39 %[[LOADE2]], 0
+  // CHECK: %[[LOADEU:.+]] = load i64
+  // CHECK: %[[LOADE:.+]] = load i64
+  // CHECK: %[[LOADEDV:.+]] = trunc i64 %[[LOADE]] to i39
+  // CHECK: %[[STOREDV:.+]] = sext i39 %[[LOADEDV]] to i64
+  // CHECK: store i64 %[[STOREDV]], ptr %[[EADDR:.+]]
+  // CHECK: %[[LOADE2:.+]] = load i64, ptr %[[EADDR]]
+  // CHECK: %[[LOADEDV2:.+]] = trunc i64 %[[LOADE2]] to i39
+  // CHECK: %[[NEG:.+]] = icmp slt i39 %[[LOADEDV2]], 0
   // CHECK: %[[SIGNCHECK:.+]] = icmp eq i1 %[[NEG]], false
   // CHECK: br i1 %[[SIGNCHECK]]
   // CHECK: call void @__ubsan_handle_implicit_conversion_abort
 
   E = UE;
-  // CHECK: store i39 %[[LOADE2]], ptr %[[UEADDR:.+]]
-  // CHECK: %[[LOADUE2:.+]] = load i39, ptr %[[UEADDR]]
-  // CHECK: %[[NEG:.+]] = icmp slt i39 %[[LOADUE2]], 0
+  // CHECK: %[[STOREDV2:.+]] = zext i39 %[[LOADEDV2]] to i64
+  // CHECK: store i64 %[[STOREDV2]], ptr %[[UEADDR:.+]]
+  // CHECK: %[[LOADUE2:.+]] = load i64, ptr %[[UEADDR]]
+  // CHECK: %[[LOADEDV3:.+]] = trunc i64 %[[LOADUE2]] to i39
+  // CHECK: %[[NEG:.+]] = icmp slt i39 %[[LOADEDV3]], 0
   // CHECK: %[[SIGNCHECK:.+]] = icmp eq i1 false, %[[NEG]]
   // CHECK: br i1 %[[SIGNCHECK]]
   // CHECK: call void @__ubsan_handle_implicit_conversion_abort
@@ -154,12 +159,14 @@ void DivByZero(_BitInt(11) E, int i) {
 
   // Also triggers signed integer overflow.
   E / E;
-  // CHECK: %[[EADDR:.+]] = alloca i11
-  // CHECK: %[[E:.+]] = load i11, ptr %[[EADDR]]
-  // CHECK: %[[E2:.+]] = load i11, ptr %[[EADDR]]
-  // CHECK: %[[NEZERO:.+]] = icmp ne i11 %[[E2]], 0
-  // CHECK: %[[NEMIN:.+]] = icmp ne i11 %[[E]], -1024
-  // CHECK: %[[NENEG1:.+]] = icmp ne i11 %[[E2]], -1
+  // CHECK: %[[EADDR:.+]] = alloca i16
+  // CHECK: %[[E:.+]] = load i16, ptr %[[EADDR]]
+  // CHECK: %[[LOADEDE:.+]] = trunc i16 %[[E]] to i11
+  // CHECK: %[[E2:.+]] = load i16, ptr %[[EADDR]]
+  // CHECK: %[[LOADEDE2:.+]] = trunc i16 %[[E2]] to i11
+  // CHECK: %[[NEZERO:.+]] = icmp ne i11 %[[LOADEDE2]], 0
+  // CHECK: %[[NEMIN:.+]] = icmp ne i11 %[[LOADEDE]], -1024
+  // CHECK: %[[NENEG1:.+]] = icmp ne i11 %[[LOADEDE2]], -1
   // CHECK: %[[OR:.+]] = or i1 %[[NEMIN]], %[[NENEG1]]
   // CHECK: %[[AND:.+]] = and i1 %[[NEZERO]], %[[OR]]
   // CHECK: br i1 %[[AND]]
@@ -171,20 +178,23 @@ void DivByZero(_BitInt(11) E, int i) {
 // CHECK: define{{.*}} void @_Z6ShiftsDB9_
 void Shifts(_BitInt(9) E) {
   E >> E;
-  // CHECK: %[[EADDR:.+]] = alloca i9
-  // CHECK: %[[LHSE:.+]] = load i9, ptr %[[EADDR]]
-  // CHECK: %[[RHSE:.+]] = load i9, ptr %[[EADDR]]
-  // CHECK: %[[CMP:.+]] = icmp ule i9 %[[RHSE]], 8
+  // CHECK: %[[EADDR:.+]] = alloca i16
+  // CHECK: %[[LHSE:.+]] = load i16, ptr %[[EADDR]]
+  // CHECK: %[[RHSE:.+]] = load i16, ptr %[[EADDR]]
+  // CHECK: %[[LOADED:.+]] = trunc i16 %[[RHSE]] to i9
+  // CHECK: %[[CMP:.+]] = icmp ule i9 %[[LOADED]], 8
   // CHECK: br i1 %[[CMP]]
   // CHECK: call void @__ubsan_handle_shift_out_of_bounds_abort
 
   E << E;
-  // CHECK: %[[LHSE:.+]] = load i9, ptr
-  // CHECK: %[[RHSE:.+]] = load i9, ptr
-  // CHECK: %[[CMP:.+]] = icmp ule i9 %[[RHSE]], 8
+  // CHECK: %[[LHSE:.+]] = load i16, ptr
+  // CHECK: %[[LOADEDL:.+]] = trunc i16 %[[LHSE]] to i9
+  // CHECK: %[[RHSE:.+]] = load i16, ptr
+  // CHECK: %[[LOADED:.+]] = trunc i16 %[[RHSE]] to i9
+  // CHECK: %[[CMP:.+]] = icmp ule i9 %[[LOADED]], 8
   // CHECK: br i1 %[[CMP]]
-  // CHECK: %[[ZEROS:.+]] = sub nuw nsw i9 8, %[[RHSE]]
-  // CHECK: %[[CHECK:.+]] = lshr i9 %[[LHSE]], %[[ZEROS]]
+  // CHECK: %[[ZEROS:.+]] = sub nuw nsw i9 8, %[[LOADED]]
+  // CHECK: %[[CHECK:.+]] = lshr i9 %[[LOADEDL]], %[[ZEROS]]
   // CHECK: %[[SKIPSIGN:.+]] = lshr i9 %[[CHECK]], 1
   // CHECK: %[[CHECK:.+]] = icmp eq i9 %[[SKIPSIGN]]
   // CHECK: %[[PHI:.+]] = phi i1 [ true, %{{.+}} ], [ %[[CHECK]], %{{.+}} ]
@@ -197,11 +207,15 @@ void SignedIntegerOverflow(_BitInt(93) BiggestE,
                            _BitInt(4) SmallestE,
                            _BitInt(31) JustRightE) {
   BiggestE + BiggestE;
-  // CHECK: %[[LOADBIGGESTE2:.+]] = load i93
-  // CHECK: store i93 %[[LOADBIGGESTE2]], ptr %[[BIGGESTEADDR:.+]]
-  // CHECK: %[[LOAD1:.+]] = load i93, ptr %[[BIGGESTEADDR]]
-  // CHECK: %[[LOAD2:.+]] = load i93, ptr %[[BIGGESTEADDR]]
-  // CHECK: %[[OFCALL:.+]] = call { i93, i1 } @llvm.sadd.with.overflow.i93(i93 %[[LOAD1]], i93 %[[LOAD2]])
+  // CHECK: %[[LOADBIGGESTE2:.+]] = load i128
+  // CHECK: %[[LOADEDV:.+]] = trunc i128 %[[LOADBIGGESTE2]] to i93
+  // CHECK: %[[STOREDV:.+]] = sext i93 %[[LOADEDV]] to i128
+  // CHECK: store i128 %[[STOREDV]], ptr %[[BIGGESTEADDR:.+]]
+  // CHECK: %[[LOAD1:.+]] = load i128, ptr %[[BIGGESTEADDR]]
+  // CHECK: %[[LOADEDV1:.+]] = trunc i128 %[[LOAD1]] to i93
+  // CHECK: %[[LOAD2:.+]] = load i128, ptr %[[BIGGESTEADDR]]
+  // CHECK: %[[LOADEDV2:.+]] = trunc i128 %[[LOAD2]] to i93
+  // CHECK: %[[OFCALL:.+]] = call { i93, i1 } @llvm.sadd.with.overflow.i93(i93 %[[LOADEDV1]], i93 %[[LOADEDV2]])
   // CHECK: %[[EXRESULT:.+]] = extractvalue { i93, i1 } %[[OFCALL]], 0
   // CHECK: %[[OFRESULT:.+]] = extractvalue { i93, i1 } %[[OFCALL]], 1
   // CHECK: %[[CHECK:.+]] = xor i1 %[[OFRESULT]], true
@@ -209,9 +223,11 @@ void SignedIntegerOverflow(_BitInt(93) BiggestE,
   // CHECK: call void @__ubsan_handle_add_overflow_abort
 
   SmallestE - SmallestE;
-  // CHECK: %[[LOAD1:.+]] = load i4, ptr
-  // CHECK: %[[LOAD2:.+]] = load i4, ptr
-  // CHECK: %[[OFCALL:.+]] = call { i4, i1 } @llvm.ssub.with.overflow.i4(i4 %[[LOAD1]], i4 %[[LOAD2]])
+  // CHECK: %[[LOAD1:.+]] = load i8, ptr
+  // CHECK: %[[LOADEDV1:.+]] = trunc i8 %[[LOAD1]] to i4
+  // CHECK: %[[LOAD2:.+]] = load i8, ptr
+  // CHECK: %[[LOADEDV2:.+]] = trunc i8 %[[LOAD2]] to i4
+  // CHECK: %[[OFCALL:.+]] = call { i4, i1 } @llvm.ssub.with.overflow.i4(i4 %[[LOADEDV1]], i4 %[[LOADEDV2]])
   // CHECK: %[[EXRESULT:.+]] = extractvalue { i4, i1 } %[[OFCALL]], 0
   // CHECK: %[[OFRESULT:.+]] = extractvalue { i4, i1 } %[[OFCALL]], 1
   // CHECK: %[[CHECK:.+]] = xor i1 %[[OFRESULT]], true
@@ -219,9 +235,11 @@ void SignedIntegerOverflow(_BitInt(93) BiggestE,
   // CHECK: call void @__ubsan_handle_sub_overflow_abort
 
   JustRightE * JustRightE;
-  // CHECK: %[[LOAD1:.+]] = load i31, ptr
-  // CHECK: %[[LOAD2:.+]] = load i31, ptr
-  // CHECK: %[[OFCALL:.+]] = call { i31, i1 } @llvm.smul.with.overflow.i31(i31 %[[LOAD1]], i31 %[[LOAD2]])
+  // CHECK: %[[LOAD1:.+]] = load i32, ptr
+  // CHECK: %[[LOADEDV1:.+]] = trunc i32 %[[LOAD1]] to i31
+  // CHECK: %[[LOAD2:.+]] = load i32, ptr
+  // CHECK: %[[LOADEDV2:.+]] = trunc i32 %[[LOAD2]] to i31
+  // CHECK: %[[OFCALL:.+]] = call { i31, i1 } @llvm.smul.with.overflow.i31(i31 %[[LOADEDV1]], i31 %[[LOADEDV2]])
   // CHECK: %[[EXRESULT:.+]] = extractvalue { i31, i1 } %[[OFCALL]], 0
   // CHECK: %[[OFRESULT:.+]] = extractvalue { i31, i1 } %[[OFCALL]], 1
   // CHECK: %[[CHECK:.+]] = xor i1 %[[OFRESULT]], true
@@ -234,10 +252,11 @@ void UnsignedIntegerOverflow(unsigned u,
                              unsigned _BitInt(23) SmallE,
                              unsigned _BitInt(35) BigE) {
   u = SmallE + SmallE;
-  // CHECK: %[[BIGGESTEADDR:.+]] = alloca i23
-  // CHECK: %[[LOADE1:.+]] = load i23, ptr %[[BIGGESTEADDR]]
-  // CHECK: %[[LOADE2:.+]] = load i23, ptr %[[BIGGESTEADDR]]
-  // CHECK: %[[OFCALL:.+]] = call { i23, i1 } @llvm.uadd.with.overflow.i23(i23 %[[LOADE1]], i23 %[[LOADE2]])
+  // CHECK: %[[LOADE1:.+]] = load i32, ptr
+  // CHECK-NEXT: %[[LOADEDV1:.+]] = trunc i32 %[[LOADE1]] to i23
+  // CHECK: %[[LOADE2:.+]] = load i32, ptr
+  // CHECK-NEXT: %[[LOADEDV2:.+]] = trunc i32 %[[LOADE2]] to i23
+  // CHECK: %[[OFCALL:.+]] = call { i23, i1 } @llvm.uadd.with.overflow.i23(i23 %[[LOADEDV1]], i23 %[[LOADEDV2]])
   // CHECK: %[[EXRESULT:.+]] = extractvalue { i23, i1 } %[[OFCALL]], 0
   // CHECK: %[[OFRESULT:.+]] = extractvalue { i23, i1 } %[[OFCALL]], 1
   // CHECK: %[[CHECK:.+]] = xor i1 %[[OFRESULT]], true
@@ -255,9 +274,11 @@ void UnsignedIntegerOverflow(unsigned u,
   // CHECK: call void @__ubsan_handle_add_overflow_abort
 
   SmallE = SmallE + SmallE;
-  // CHECK: %[[LOADE1:.+]] = load i23, ptr
-  // CHECK: %[[LOADE2:.+]] = load i23, ptr
-  // CHECK: %[[OFCALL:.+]] = call { i23, i1 } @llvm.uadd.with.overflow.i23(i23 %[[LOADE1]], i23 %[[LOADE2]])
+  // CHECK: %[[LOADE1:.+]] = load i32, ptr
+  // CHECK-NEXT: %[[LOADEDV1:.+]] = trunc i32 %[[LOADE1]] to i23
+  // CHECK: %[[LOADE2:.+]] = load i32, ptr
+  // CHECK-NEXT: %[[LOADEDV2:.+]] = trunc i32 %[[LOADE2]] to i23
+  // CHECK: %[[OFCALL:.+]] = call { i23, i1 } @llvm.uadd.with.overflow.i23(i23 %[[LOADEDV1]], i23 %[[LOADEDV2]])
   // CHECK: %[[EXRESULT:.+]] = extractvalue { i23, i1 } %[[OFCALL]], 0
   // CHECK: %[[OFRESULT:.+]] = extractvalue { i23, i1 } %[[OFCALL]], 1
   // CHECK: %[[CHECK:.+]] = xor i1 %[[OFRESULT]], true
@@ -265,9 +286,11 @@ void UnsignedIntegerOverflow(unsigned u,
   // CHECK: call void @__ubsan_handle_add_overflow_abort
 
   SmallE = BigE + BigE;
-  // CHECK: %[[LOADE1:.+]] = load i35, ptr
-  // CHECK: %[[LOADE2:.+]] = load i35, ptr
-  // CHECK: %[[OFCALL:.+]] = call { i35, i1 } @llvm.uadd.with.overflow.i35(i35 %[[LOADE1]], i35 %[[LOADE2]])
+  // CHECK: %[[LOADE1:.+]] = load i64, ptr
+  // CHECK-NEXT: %[[LOADEDV1:.+]] = trunc i64 %[[LOADE1]] to i35
+  // CHECK: %[[LOADE2:.+]] = load i64, ptr
+  // CHECK-NEXT: %[[LOADEDV2:.+]] = trunc i64 %[[LOADE2]] to i35
+  // CHECK: %[[OFCALL:.+]] = call { i35, i1 } @llvm.uadd.with.overflow.i35(i35 %[[LOADEDV1]], i35 %[[LOADEDV2]])
   // CHECK: %[[EXRESULT:.+]] = extractvalue { i35, i1 } %[[OFCALL]], 0
   // CHECK: %[[OFRESULT:.+]] = extractvalue { i35, i1 } %[[OFCALL]], 1
   // CHECK: %[[CHECK:.+]] = xor i1 %[[OFRESULT]], true
@@ -275,9 +298,11 @@ void UnsignedIntegerOverflow(unsigned u,
   // CHECK: call void @__ubsan_handle_add_overflow_abort
 
   BigE = BigE + BigE;
-  // CHECK: %[[LOADE1:.+]] = load i35, ptr
-  // CHECK: %[[LOADE2:.+]] = load i35, ptr
-  // CHECK: %[[OFCALL:.+]] = call { i35, i1 } @llvm.uadd.with.overflow.i35(i35 %[[LOADE1]], i35 %[[LOADE2]])
+  // CHECK: %[[LOADE1:.+]] = load i64, ptr
+  // CHECK-NEXT: %[[LOADEDV1:.+]] = trunc i64 %[[LOADE1]] to i35
+  // CHECK: %[[LOADE2:.+]] = load i64, ptr
+  // CHECK-NEXT: %[[LOADEDV2:.+]] = trunc i64 %[[LOADE2]] to i35
+  // CHECK: %[[OFCALL:.+]] = call { i35, i1 } @llvm.uadd.with.overflow.i35(i35 %[[LOADEDV1]], i35 %[[LOADEDV2]])
   // CHECK: %[[EXRESULT:.+]] = extractvalue { i35, i1 } %[[OFCALL]], 0
   // CHECK: %[[OFRESULT:.+]] = extractvalue { i35, i1 } %[[OFCALL]], 1
   // CHECK: %[[CHECK:.+]] = xor i1 %[[OFRESULT]], true
diff --git a/clang/test/CodeGen/extend-arg-64.c b/clang/test/CodeGen/extend-arg-64.c
index 0749523b9ab3d..2cb56d35af21d 100644
--- a/clang/test/CodeGen/extend-arg-64.c
+++ b/clang/test/CodeGen/extend-arg-64.c
@@ -68,7 +68,8 @@ int test(void) {
   // CHECKEXT-NEXT: call void (i64, ...) @knr
 
   knr(ei23);
-  // CHECKEXT: load i23, ptr @ei23
+  // CHECKEXT: load i32, ptr @ei23
+  // CHECKEXT: trunc i32
   // CHECKEXT-NEXT: call void (i23, ...) @knr
 
   knr(ff);
diff --git a/clang/test/CodeGen/ubsan-shift-bitint.c b/clang/test/CodeGen/ubsan-shift-bitint.c
index af65ed60918b0..9e4ec15060b3f 100644
--- a/clang/test/CodeGen/ubsan-shift-bitint.c
+++ b/clang/test/CodeGen/ubsan-shift-bitint.c
@@ -5,14 +5,16 @@
 
 // CHECK-LABEL: define{{.*}} i32 @test_left_variable
 int test_left_variable(unsigned _BitInt(5) b, unsigned _BitInt(2) e) {
-  // CHECK: [[E_REG:%.+]] = load [[E_SIZE:i2]]
+  // CHECK: load i8
+  // CHECK: [[E_REG:%.+]] = trunc i8 {{.*}} to [[E_SIZE:i2]]
   // CHECK: icmp ule [[E_SIZE]] [[E_REG]], -1,
   return b << e;
 }
 
 // CHECK-LABEL: define{{.*}} i32 @test_right_variable
 int test_right_variable(unsigned _BitInt(2) b, unsigned _BitInt(3) e) {
-  // CHECK: [[E_REG:%.+]] = load [[E_SIZE:i3]]
+  // CHECK: load i8
+  // CHECK: [[E_REG:%.+]] = trunc i8 {{.*}} to [[E_SIZE:i3]]
   // CHECK: icmp ule [[E_SIZE]] [[E_REG]], 1,
   return b >> e;
 }
@@ -37,14 +39,16 @@ int test_right_literal(unsigned _BitInt(2) b) {
 
 // CHECK-LABEL: define{{.*}} i32 @test_signed_left_variable
 int test_signed_left_variable(unsigned _BitInt(15) b, _BitInt(2) e) {
-  // CHECK: [[E_REG:%.+]] = load [[E_SIZE:i2]]
+  // CHECK: load i8
+  // CHECK: [[E_REG:%.+]] = trunc i8 {{.*}} to [[E_SIZE:i2]]
   // CHECK: icmp ule [[E_SIZE]] [[E_REG]], 1,
   return b << e;
 }
 
 // CHECK-LABEL: define{{.*}} i32 @test_signed_right_variable
 int test_signed_right_variable(unsigned _BitInt(32) b, _BitInt(4) e) {
-  // CHECK: [[E_REG:%.+]] = load [[E_SIZE:i4]]
+  // CHECK: load i8
+  // CHECK: [[E_REG:%.+]] = trunc i8 {{.*}} to [[E_SIZE:i4]]
   // CHECK: icmp ule [[E_SIZE]] [[E_REG]], 7,
   return b >> e;
 }
diff --git a/clang/test/CodeGenCXX/ext-int.cpp b/clang/test/CodeGenCXX/ext-int.cpp
index 41fb6274d42c0..e58375ca66996 100644
--- a/clang/test/CodeGenCXX/ext-int.cpp
+++ b/clang/test/CodeGenCXX/ext-int.cpp
@@ -52,20 +52,20 @@ struct HasBitIntFirst {
   _BitInt(35) A;
   int B;
 };
-// CHECK: %struct.HasBitIntFirst = type { i35, i32 }
+// CHECK: %struct.HasBitIntFirst = type { i64, i32 }
 
 struct HasBitIntLast {
   int A;
   _BitInt(35) B;
 };
-// CHECK: %struct.HasBitIntLast = type { i32, i35 }
+// CHECK: %struct.HasBitIntLast = type { i32, i64 }
 
 struct HasBitIntMiddle {
   int A;
   _BitInt(35) B;
   int C;
 };
-// CHECK: %struct.HasBitIntMiddle = type { i32, i35, i32 }
+// CHECK: %struct.HasBitIntMiddle = type { i32, i64, i32 }
 
 // Force emitting of the above structs.
 void StructEmit() {
@@ -170,8 +170,10 @@ void TakesVarargs(int i, ...) {
   // LIN64: %[[FITSINGP:.+]] = icmp ule i32 %[[GPOFFSET]], 32
   // LIN64: br i1 %[[FITSINGP]]
   // LIN64: %[[BC1:.+]] = phi ptr
-  // LIN64: %[[LOAD1:.+]] = load i92, ptr %[[BC1]]
-  // LIN64: store i92 %[[LOAD1]], ptr
+  // LIN64: %[[LOAD1:.+]] = load i128, ptr %[[BC1]]
+  // LIN64: %[[T:.+]] = trunc i128 %[[LOAD1]] to i92
+  // LIN64: %[[S:.+]] = sext i92 %[[T]] to i128
+  // LIN64: store i128 %[[S]], ptr
 
   // LIN32: %[[CUR1:.+]] = load ptr, ptr %[[ARGS]]
   // LIN32: %[[NEXT1:.+]] = getelementptr inbounds i8, ptr %[[CUR1]], i32 12
@@ -185,14 +187,18 @@ void TakesVarargs(int i, ...) {
   // WIN64: %[[NEXT1:.+]] = getelementptr inbounds i8, ptr %[[CUR1]], i64 8
   // WIN64: store ptr %[[NEXT1]], ptr %[[ARGS]]
   // WIN64: %[[LOADP1:.+]] = load ptr, ptr %[[CUR1]]
-  // WIN64: %[[LOADV1:.+]] = load i92, ptr %[[LOADP1]]
-  // WIN64: store i92 %[[LOADV1]], ptr
+  // WIN64: %[[LOADV1:.+]] = load i128, ptr %[[LOADP1]]
+  // WIN64: %[[TR:.+]] = trunc i128 %[[LOADV1]] to i92
+  // WIN64: %[[SEXT:.+]] = sext i92 %[[TR]] to i128
+  // WIN64: store i128 %[[SEXT]], ptr
 
   // WIN32: %[[CUR1:.+]] = load ptr, ptr %[[ARGS]]
   // WIN32: %[[NEXT1:.+]] = getelementptr inbounds i8, ptr %[[CUR1]], i32 16
   // WIN32: store ptr %[[NEXT1]], ptr %[[ARGS]]
-  // WIN32: %[[LOADV1:.+]] = load i92, ptr %[[CUR1]]
-  // WIN32: store i92 %[[LOADV1]], ptr
+  // WIN32: %[[LOADV1:.+]] = load i128, ptr %[[CUR1]]
+  // WIN32: %[[TR:.+]] = trunc i128 %[[LOADV1]] to i92
+  // WIN32: %[[SEXT:.+]] = sext i92 %[[TR]] to i128
+  // WIN32: store i128 %[[SEXT]], ptr
 
 
   _BitInt(31) B = __builtin_va_arg(args, _BitInt(31));
@@ -202,26 +208,34 @@ void TakesVarargs(int i, ...) {
   // LIN64: %[[FITSINGP:.+]] = icmp ule i32 %[[GPOFFSET]], 40
   // LIN64: br i1 %[[FITSINGP]]
   // LIN64: %[[BC1:.+]] = phi ptr
-  // LIN64: %[[LOAD1:.+]] = load i31, ptr %[[BC1]]
-  // LIN64: store i31 %[[LOAD1]], ptr
+  // LIN64: %[[LOAD1:.+]] = load i32, ptr %[[BC1]]
+  // LIN64: %[[T:.+]] = trunc i32 %[[LOAD1]] to i31
+  // LIN64: %[[S:.+]] = sext i31 %[[T]] to i32
+  // LIN64: store i32 %[[S]], ptr
 
   // LIN32: %[[CUR2:.+]] = load ptr, ptr %[[ARGS]]
   // LIN32: %[[NEXT2:.+]] = getelementptr inbounds i8, ptr %[[CUR2]], i32 4
   // LIN32: store ptr %[[NEXT2]], ptr %[[ARGS]]
-  // LIN32: %[[LOADV2:.+]] = load i31, ptr %[[CUR2]]
-  // LIN32: store i31 %[[LOADV2]], ptr
+  // LIN32: %[[LOADV2:.+]] = load i32, ptr %[[CUR2]]
+  // LIN32: %[[T:.+]] = trunc i32 %[[LOADV2]] to i31
+  // LIN32: %[[S:.+]] = sext i31 %[[T]] to i32
+  // LIN32: store i32 %[[S]], ptr
 
   // WIN64: %[[CUR2:.+]] = load ptr, ptr %[[ARGS]]
   // WIN64: %[[NEXT2:.+]] = getelementptr inbounds i8, ptr %[[CUR2]], i64 8
   // WIN64: store ptr %[[NEXT2]], ptr %[[ARGS]]
-  // WIN64: %[[LOADV2:.+]] = load i31, ptr %[[CUR2]]
-  // WIN64: store i31 %[[LOADV2]], ptr
+  // WIN64: %[[LOADV2:.+]] = load i32, ptr %[[CUR2]]
+  // WIN64: %[[T:.+]] = trunc i32 %[[LOADV2]] to i31
+  // WIN64: %[[S:.+]] = sext i31 %[[T]] to i32
+  // WIN64: store i32 %[[S]], ptr
 
   // WIN32: %[[CUR2:.+]] = load ptr, ptr %[[ARGS]]
   // WIN32: %[[NEXT2:.+]] = getelementptr inbounds i8, ptr %[[CUR2]], i32 4
   // WIN32: store ptr %[[NEXT2]], ptr %[[ARGS]]
-  // WIN32: %[[LOADV2:.+]] = load i31, ptr %[[CUR2]]
-  // WIN32: store i31 %[[LOADV2]], ptr
+  // WIN32: %[[LOADV2:.+]] = load i32, ptr %[[CUR2]]
+  // WIN32: %[[T:.+]] = trunc i32 %[[LOADV2]] to i31
+  // WIN32: %[[S:.+]] = sext i31 %[[T]] to i32
+  // WIN32: store i32 %[[S]], ptr
 
   _BitInt(16) C = __builtin_va_arg(args, _BitInt(16));
   // LIN64: %[[AD3:.+]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr %[[ARGS]]
@@ -299,7 +313,7 @@ void TakesVarargs(int i, ...) {
   // WIN: store ptr %[[NEXT5]], ptr %[[ARGS]]
   // WIN64: %[[LOADP5:.+]] = load ptr, ptr %[[CUR5]]
   // WIN64: %[[LOADV5:.+]] = load <8 x i32>, ptr %[[LOADP5]]
-  // WIN32: %[[LOADV5:.+]] = load <8 x i32>, ptr %argp.cur7
+  // WIN32: %[[LOADV5:.+]] = load <8 x i32>, ptr %argp.cur9
   // WIN: store <8 x i32> %[[LOADV5]], ptr
 
   __builtin_va_end(args);
@@ -505,11 +519,13 @@ void Shift(_BitInt(28) Ext, _BitInt(65) LargeExt, int i) {
   // CHECK: ashr i32 {{.+}}, %[[PROMO]]
 
   Ext << i;
+  // CHECK: %[[BI:.+]] = trunc i32 %{{.+}} to i28
   // CHECK: %[[PROMO:.+]] = trunc i32 %{{.+}} to i28
-  // CHECK: shl i28 {{.+}}, %[[PROMO]]
+  // CHECK: shl i28 %[[BI]], %[[PROMO]]
   Ext >> i;
+  // CHECK: %[[BI:.+]] = trunc i32 %{{.+}} to i28
   // CHECK: %[[PROMO:.+]] = trunc i32 %{{.+}} to i28
-  // CHECK: ashr i28 {{.+}}, %[[PROMO]]
+  // CHECK: ashr i28 %[[BI]], %[[PROMO]]
 
   LargeExt << i;
   // CHECK: %[[PROMO:.+]] = zext i32 %{{.+}} to i65
@@ -579,7 +595,7 @@ void TBAATest(_BitInt(sizeof(int) * 8) ExtInt,
               _BitInt(6) Other) {
   // CHECK-DAG: store i32 %{{.+}}, ptr %{{.+}}, align 4, !tbaa ![[EXTINT_TBAA:.+]]
   // CHECK-DAG: store i32 %{{.+}}, ptr %{{.+}}, align 4, !tbaa ![[EXTINT_TBAA]]
-  // CHECK-DAG: store i6 %{{.+}}, ptr %{{.+}}, align 1, !tbaa ![[EXTINT6_TBAA:.+]]
+  // CHECK-DAG: store i8 %{{.+}}, ptr %{{.+}}, align 1, !tbaa ![[EXTINT6_TBAA:.+]]
   ExtInt = 5;
   ExtUInt = 5;
   Other = 5;

>From f43ac1c130d6f72d0338ae8ebd7e6d709feeefc4 Mon Sep 17 00:00:00 2001
From: Mariya Podchishchaeva <mariya.podchishchaeva at intel.com>
Date: Thu, 11 Jul 2024 17:47:26 +0200
Subject: [PATCH 17/21] Update clang/lib/CodeGen/CodeGenTypes.cpp

Co-authored-by: John McCall <rjmccall at gmail.com>
---
 clang/lib/CodeGen/CodeGenTypes.cpp | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/clang/lib/CodeGen/CodeGenTypes.cpp b/clang/lib/CodeGen/CodeGenTypes.cpp
index eb21d6125a1c4..938ba589c8f83 100644
--- a/clang/lib/CodeGen/CodeGenTypes.cpp
+++ b/clang/lib/CodeGen/CodeGenTypes.cpp
@@ -89,6 +89,13 @@ void CodeGenTypes::addRecordTypeName(const RecordDecl *RD,
 /// ConvertType in that it is used to convert to the memory representation for
 /// a type.  For example, the scalar representation for _Bool is i1, but the
 /// memory representation is usually i8 or i32, depending on the target.
+///
+/// We generally assume that the alloc size of this type under the LLVM
+/// data layout is the same as the size of the AST type.  The alignment
+/// does not have to match: Clang should always use explicit alignments
+/// and packed structs as necessary to produce the layout it needs.
+/// But the size does need to be exactly right or else things like struct
+/// layout will break.
 llvm::Type *CodeGenTypes::ConvertTypeForMem(QualType T) {
   if (T->isConstantMatrixType()) {
     const Type *Ty = Context.getCanonicalType(T).getTypePtr();

>From 0ec1be5ddff89648eeb0302f33324dfc8f13b3ba Mon Sep 17 00:00:00 2001
From: "Podchishchaeva, Mariya" <mariya.podchishchaeva at intel.com>
Date: Thu, 11 Jul 2024 08:48:53 -0700
Subject: [PATCH 18/21] Add the comment.

---
 clang/lib/CodeGen/CodeGenTypes.cpp | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/clang/lib/CodeGen/CodeGenTypes.cpp b/clang/lib/CodeGen/CodeGenTypes.cpp
index eb21d6125a1c4..085d1d0d61e9b 100644
--- a/clang/lib/CodeGen/CodeGenTypes.cpp
+++ b/clang/lib/CodeGen/CodeGenTypes.cpp
@@ -107,6 +107,19 @@ llvm::Type *CodeGenTypes::ConvertTypeForMem(QualType T) {
     return llvm::IntegerType::get(FixedVT->getContext(), BytePadded);
   }
 
+  // If T is _Bool or a _BitInt type, ConvertType will produce an IR type
+  // with the exact semantic bit-width of the AST type; for example,
+  // _BitInt(17) will turn into i17. In memory, however, we need to store
+  // such values extended to their full storage size as decided by AST
+  // layout; this is an ABI requirement. Ideally, we would always use an
+  // integer type that's just the bit-size of the AST type; for example, if
+  // sizeof(_BitInt(17)) == 4, _BitInt(17) would turn into i32. That is what's
+  // returned by convertTypeForLoadStore. However, that type does not
+  // always satisfy the size requirement on memory representation types
+  // describe above. For example, a 32-bit platform might reasonably set
+  // sizeof(_BitInt(65)) == 12, but i96 is likely to have to have an alloc size
+  // of 16 bytes in the LLVM data layout. In these cases, we simply return
+  // a byte array of the appropriate size.
   if (T->isBitIntType()) {
     if (typeRequiresSplitIntoByteArray(T, R))
       return llvm::ArrayType::get(CGM.Int8Ty,

>From c00be8b62d7c2e34e7113fab3a0a65583988b4d6 Mon Sep 17 00:00:00 2001
From: "Podchishchaeva, Mariya" <mariya.podchishchaeva at intel.com>
Date: Thu, 11 Jul 2024 10:43:01 -0700
Subject: [PATCH 19/21] Attempt to fix HIP test

---
 clang/test/CodeGenHIP/printf_nonhostcall.cpp | 76 ++++++++++----------
 1 file changed, 40 insertions(+), 36 deletions(-)

diff --git a/clang/test/CodeGenHIP/printf_nonhostcall.cpp b/clang/test/CodeGenHIP/printf_nonhostcall.cpp
index 34904819ae072..2c6d0ecac1e8a 100644
--- a/clang/test/CodeGenHIP/printf_nonhostcall.cpp
+++ b/clang/test/CodeGenHIP/printf_nonhostcall.cpp
@@ -267,8 +267,10 @@ __device__ _BitInt(128) Int128 = 45637;
 // CHECK-NEXT:    [[TMP4:%.*]] = load double, ptr addrspacecast (ptr addrspace(1) @f2 to ptr), align 8
 // CHECK-NEXT:    [[TMP5:%.*]] = load half, ptr addrspacecast (ptr addrspace(1) @f3 to ptr), align 2
 // CHECK-NEXT:    [[TMP6:%.*]] = load bfloat, ptr addrspacecast (ptr addrspace(1) @f4 to ptr), align 2
-// CHECK-NEXT:    [[TMP7:%.*]] = load i55, ptr addrspacecast (ptr addrspace(1) @Int55 to ptr), align 8
-// CHECK-NEXT:    [[TMP8:%.*]] = load i44, ptr addrspacecast (ptr addrspace(1) @Int44 to ptr), align 8
+// CHECK-NEXT:    [[TMP7:%.*]] = load i64, ptr addrspacecast (ptr addrspace(1) @Int55 to ptr), align 8
+// CHECK-NEXT:    [[LOADEDV:%.*]] = trunc i64 [[TMP7]] to i55
+// CHECK-NEXT:    [[TMP8:%.*]] = load i64, ptr addrspacecast (ptr addrspace(1) @Int44 to ptr), align 8
+// CHECK-NEXT:    [[LOADEDV2:%.*]] = trunc i64 [[TMP8]] to i44
 // CHECK-NEXT:    [[TMP9:%.*]] = load i128, ptr addrspacecast (ptr addrspace(1) @Int128 to ptr), align 8
 // CHECK-NEXT:    [[PRINTF_ALLOC_FN:%.*]] = call ptr addrspace(1) @__printf_alloc(i32 108)
 // CHECK-NEXT:    [[TMP10:%.*]] = icmp ne ptr addrspace(1) [[PRINTF_ALLOC_FN]], null
@@ -286,30 +288,30 @@ __device__ _BitInt(128) Int128 = 45637;
 // CHECK-NEXT:    store i64 [[TMP14]], ptr addrspace(1) [[TMP13]], align 8
 // CHECK-NEXT:    [[PRINTBUFFNEXTPTR:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i32 8
 // CHECK-NEXT:    store ptr addrspacecast (ptr addrspace(3) @_ZZ4foo3vE1s to ptr), ptr addrspace(1) [[PRINTBUFFNEXTPTR]], align 8
-// CHECK-NEXT:    [[PRINTBUFFNEXTPTR2:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[PRINTBUFFNEXTPTR]], i32 8
+// CHECK-NEXT:    [[PRINTBUFFNEXTPTR3:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[PRINTBUFFNEXTPTR]], i32 8
 // CHECK-NEXT:    [[TMP15:%.*]] = zext i32 [[CONV]] to i64
-// CHECK-NEXT:    store i64 [[TMP15]], ptr addrspace(1) [[PRINTBUFFNEXTPTR2]], align 8
-// CHECK-NEXT:    [[PRINTBUFFNEXTPTR3:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[PRINTBUFFNEXTPTR2]], i32 8
-// CHECK-NEXT:    store i64 [[TMP2]], ptr addrspace(1) [[PRINTBUFFNEXTPTR3]], align 8
+// CHECK-NEXT:    store i64 [[TMP15]], ptr addrspace(1) [[PRINTBUFFNEXTPTR3]], align 8
 // CHECK-NEXT:    [[PRINTBUFFNEXTPTR4:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[PRINTBUFFNEXTPTR3]], i32 8
-// CHECK-NEXT:    store double [[CONV1]], ptr addrspace(1) [[PRINTBUFFNEXTPTR4]], align 8
+// CHECK-NEXT:    store i64 [[TMP2]], ptr addrspace(1) [[PRINTBUFFNEXTPTR4]], align 8
 // CHECK-NEXT:    [[PRINTBUFFNEXTPTR5:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[PRINTBUFFNEXTPTR4]], i32 8
-// CHECK-NEXT:    store double [[TMP4]], ptr addrspace(1) [[PRINTBUFFNEXTPTR5]], align 8
+// CHECK-NEXT:    store double [[CONV1]], ptr addrspace(1) [[PRINTBUFFNEXTPTR5]], align 8
 // CHECK-NEXT:    [[PRINTBUFFNEXTPTR6:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[PRINTBUFFNEXTPTR5]], i32 8
-// CHECK-NEXT:    [[TMP16:%.*]] = fpext half [[TMP5]] to double
-// CHECK-NEXT:    store double [[TMP16]], ptr addrspace(1) [[PRINTBUFFNEXTPTR6]], align 8
+// CHECK-NEXT:    store double [[TMP4]], ptr addrspace(1) [[PRINTBUFFNEXTPTR6]], align 8
 // CHECK-NEXT:    [[PRINTBUFFNEXTPTR7:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[PRINTBUFFNEXTPTR6]], i32 8
-// CHECK-NEXT:    [[TMP17:%.*]] = fpext bfloat [[TMP6]] to double
-// CHECK-NEXT:    store double [[TMP17]], ptr addrspace(1) [[PRINTBUFFNEXTPTR7]], align 8
+// CHECK-NEXT:    [[TMP16:%.*]] = fpext half [[TMP5]] to double
+// CHECK-NEXT:    store double [[TMP16]], ptr addrspace(1) [[PRINTBUFFNEXTPTR7]], align 8
 // CHECK-NEXT:    [[PRINTBUFFNEXTPTR8:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[PRINTBUFFNEXTPTR7]], i32 8
-// CHECK-NEXT:    [[TMP18:%.*]] = zext i55 [[TMP7]] to i64
-// CHECK-NEXT:    store i64 [[TMP18]], ptr addrspace(1) [[PRINTBUFFNEXTPTR8]], align 8
+// CHECK-NEXT:    [[TMP17:%.*]] = fpext bfloat [[TMP6]] to double
+// CHECK-NEXT:    store double [[TMP17]], ptr addrspace(1) [[PRINTBUFFNEXTPTR8]], align 8
 // CHECK-NEXT:    [[PRINTBUFFNEXTPTR9:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[PRINTBUFFNEXTPTR8]], i32 8
-// CHECK-NEXT:    [[TMP19:%.*]] = zext i44 [[TMP8]] to i64
-// CHECK-NEXT:    store i64 [[TMP19]], ptr addrspace(1) [[PRINTBUFFNEXTPTR9]], align 8
+// CHECK-NEXT:    [[TMP18:%.*]] = zext i55 [[LOADEDV]] to i64
+// CHECK-NEXT:    store i64 [[TMP18]], ptr addrspace(1) [[PRINTBUFFNEXTPTR9]], align 8
 // CHECK-NEXT:    [[PRINTBUFFNEXTPTR10:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[PRINTBUFFNEXTPTR9]], i32 8
-// CHECK-NEXT:    store i128 [[TMP9]], ptr addrspace(1) [[PRINTBUFFNEXTPTR10]], align 8
-// CHECK-NEXT:    [[PRINTBUFFNEXTPTR11:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[PRINTBUFFNEXTPTR10]], i32 16
+// CHECK-NEXT:    [[TMP19:%.*]] = zext i44 [[LOADEDV2]] to i64
+// CHECK-NEXT:    store i64 [[TMP19]], ptr addrspace(1) [[PRINTBUFFNEXTPTR10]], align 8
+// CHECK-NEXT:    [[PRINTBUFFNEXTPTR11:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[PRINTBUFFNEXTPTR10]], i32 8
+// CHECK-NEXT:    store i128 [[TMP9]], ptr addrspace(1) [[PRINTBUFFNEXTPTR11]], align 8
+// CHECK-NEXT:    [[PRINTBUFFNEXTPTR12:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[PRINTBUFFNEXTPTR11]], i32 16
 // CHECK-NEXT:    br label [[END_BLOCK]]
 //
 // CHECK_CONSTRAINED-LABEL: define dso_local noundef i32 @_Z4foo3v
@@ -326,8 +328,10 @@ __device__ _BitInt(128) Int128 = 45637;
 // CHECK_CONSTRAINED-NEXT:    [[TMP4:%.*]] = load double, ptr addrspacecast (ptr addrspace(1) @f2 to ptr), align 8
 // CHECK_CONSTRAINED-NEXT:    [[TMP5:%.*]] = load half, ptr addrspacecast (ptr addrspace(1) @f3 to ptr), align 2
 // CHECK_CONSTRAINED-NEXT:    [[TMP6:%.*]] = load bfloat, ptr addrspacecast (ptr addrspace(1) @f4 to ptr), align 2
-// CHECK_CONSTRAINED-NEXT:    [[TMP7:%.*]] = load i55, ptr addrspacecast (ptr addrspace(1) @Int55 to ptr), align 8
-// CHECK_CONSTRAINED-NEXT:    [[TMP8:%.*]] = load i44, ptr addrspacecast (ptr addrspace(1) @Int44 to ptr), align 8
+// CHECK_CONSTRAINED-NEXT:    [[TMP7:%.*]] = load i64, ptr addrspacecast (ptr addrspace(1) @Int55 to ptr), align 8
+// CHECK_CONSTRAINED-NEXT:    [[LOADEDV:%.*]] = trunc i64 [[TMP7]] to i55
+// CHECK_CONSTRAINED-NEXT:    [[TMP8:%.*]] = load i64, ptr addrspacecast (ptr addrspace(1) @Int44 to ptr), align 8
+// CHECK_CONSTRAINED-NEXT:    [[LOADEDV2:%.*]] = trunc i64 [[TMP8]] to i44
 // CHECK_CONSTRAINED-NEXT:    [[TMP9:%.*]] = load i128, ptr addrspacecast (ptr addrspace(1) @Int128 to ptr), align 8
 // CHECK_CONSTRAINED-NEXT:    [[PRINTF_ALLOC_FN:%.*]] = call ptr addrspace(1) @__printf_alloc(i32 108)
 // CHECK_CONSTRAINED-NEXT:    [[TMP10:%.*]] = icmp ne ptr addrspace(1) [[PRINTF_ALLOC_FN]], null
@@ -345,30 +349,30 @@ __device__ _BitInt(128) Int128 = 45637;
 // CHECK_CONSTRAINED-NEXT:    store i64 [[TMP14]], ptr addrspace(1) [[TMP13]], align 8
 // CHECK_CONSTRAINED-NEXT:    [[PRINTBUFFNEXTPTR:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i32 8
 // CHECK_CONSTRAINED-NEXT:    store ptr addrspacecast (ptr addrspace(3) @_ZZ4foo3vE1s to ptr), ptr addrspace(1) [[PRINTBUFFNEXTPTR]], align 8
-// CHECK_CONSTRAINED-NEXT:    [[PRINTBUFFNEXTPTR2:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[PRINTBUFFNEXTPTR]], i32 8
+// CHECK_CONSTRAINED-NEXT:    [[PRINTBUFFNEXTPTR3:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[PRINTBUFFNEXTPTR]], i32 8
 // CHECK_CONSTRAINED-NEXT:    [[TMP15:%.*]] = zext i32 [[CONV]] to i64
-// CHECK_CONSTRAINED-NEXT:    store i64 [[TMP15]], ptr addrspace(1) [[PRINTBUFFNEXTPTR2]], align 8
-// CHECK_CONSTRAINED-NEXT:    [[PRINTBUFFNEXTPTR3:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[PRINTBUFFNEXTPTR2]], i32 8
-// CHECK_CONSTRAINED-NEXT:    store i64 [[TMP2]], ptr addrspace(1) [[PRINTBUFFNEXTPTR3]], align 8
+// CHECK_CONSTRAINED-NEXT:    store i64 [[TMP15]], ptr addrspace(1) [[PRINTBUFFNEXTPTR3]], align 8
 // CHECK_CONSTRAINED-NEXT:    [[PRINTBUFFNEXTPTR4:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[PRINTBUFFNEXTPTR3]], i32 8
-// CHECK_CONSTRAINED-NEXT:    store double [[CONV1]], ptr addrspace(1) [[PRINTBUFFNEXTPTR4]], align 8
+// CHECK_CONSTRAINED-NEXT:    store i64 [[TMP2]], ptr addrspace(1) [[PRINTBUFFNEXTPTR4]], align 8
 // CHECK_CONSTRAINED-NEXT:    [[PRINTBUFFNEXTPTR5:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[PRINTBUFFNEXTPTR4]], i32 8
-// CHECK_CONSTRAINED-NEXT:    store double [[TMP4]], ptr addrspace(1) [[PRINTBUFFNEXTPTR5]], align 8
+// CHECK_CONSTRAINED-NEXT:    store double [[CONV1]], ptr addrspace(1) [[PRINTBUFFNEXTPTR5]], align 8
 // CHECK_CONSTRAINED-NEXT:    [[PRINTBUFFNEXTPTR6:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[PRINTBUFFNEXTPTR5]], i32 8
-// CHECK_CONSTRAINED-NEXT:    [[TMP16:%.*]] = fpext half [[TMP5]] to double
-// CHECK_CONSTRAINED-NEXT:    store double [[TMP16]], ptr addrspace(1) [[PRINTBUFFNEXTPTR6]], align 8
+// CHECK_CONSTRAINED-NEXT:    store double [[TMP4]], ptr addrspace(1) [[PRINTBUFFNEXTPTR6]], align 8
 // CHECK_CONSTRAINED-NEXT:    [[PRINTBUFFNEXTPTR7:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[PRINTBUFFNEXTPTR6]], i32 8
-// CHECK_CONSTRAINED-NEXT:    [[TMP17:%.*]] = fpext bfloat [[TMP6]] to double
-// CHECK_CONSTRAINED-NEXT:    store double [[TMP17]], ptr addrspace(1) [[PRINTBUFFNEXTPTR7]], align 8
+// CHECK_CONSTRAINED-NEXT:    [[TMP16:%.*]] = fpext half [[TMP5]] to double
+// CHECK_CONSTRAINED-NEXT:    store double [[TMP16]], ptr addrspace(1) [[PRINTBUFFNEXTPTR7]], align 8
 // CHECK_CONSTRAINED-NEXT:    [[PRINTBUFFNEXTPTR8:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[PRINTBUFFNEXTPTR7]], i32 8
-// CHECK_CONSTRAINED-NEXT:    [[TMP18:%.*]] = zext i55 [[TMP7]] to i64
-// CHECK_CONSTRAINED-NEXT:    store i64 [[TMP18]], ptr addrspace(1) [[PRINTBUFFNEXTPTR8]], align 8
+// CHECK_CONSTRAINED-NEXT:    [[TMP17:%.*]] = fpext bfloat [[TMP6]] to double
+// CHECK_CONSTRAINED-NEXT:    store double [[TMP17]], ptr addrspace(1) [[PRINTBUFFNEXTPTR8]], align 8
 // CHECK_CONSTRAINED-NEXT:    [[PRINTBUFFNEXTPTR9:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[PRINTBUFFNEXTPTR8]], i32 8
-// CHECK_CONSTRAINED-NEXT:    [[TMP19:%.*]] = zext i44 [[TMP8]] to i64
-// CHECK_CONSTRAINED-NEXT:    store i64 [[TMP19]], ptr addrspace(1) [[PRINTBUFFNEXTPTR9]], align 8
+// CHECK_CONSTRAINED-NEXT:    [[TMP18:%.*]] = zext i55 [[LOADEDV]] to i64
+// CHECK_CONSTRAINED-NEXT:    store i64 [[TMP18]], ptr addrspace(1) [[PRINTBUFFNEXTPTR9]], align 8
 // CHECK_CONSTRAINED-NEXT:    [[PRINTBUFFNEXTPTR10:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[PRINTBUFFNEXTPTR9]], i32 8
-// CHECK_CONSTRAINED-NEXT:    store i128 [[TMP9]], ptr addrspace(1) [[PRINTBUFFNEXTPTR10]], align 8
-// CHECK_CONSTRAINED-NEXT:    [[PRINTBUFFNEXTPTR11:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[PRINTBUFFNEXTPTR10]], i32 16
+// CHECK_CONSTRAINED-NEXT:    [[TMP19:%.*]] = zext i44 [[LOADEDV2]] to i64
+// CHECK_CONSTRAINED-NEXT:    store i64 [[TMP19]], ptr addrspace(1) [[PRINTBUFFNEXTPTR10]], align 8
+// CHECK_CONSTRAINED-NEXT:    [[PRINTBUFFNEXTPTR11:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[PRINTBUFFNEXTPTR10]], i32 8
+// CHECK_CONSTRAINED-NEXT:    store i128 [[TMP9]], ptr addrspace(1) [[PRINTBUFFNEXTPTR11]], align 8
+// CHECK_CONSTRAINED-NEXT:    [[PRINTBUFFNEXTPTR12:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[PRINTBUFFNEXTPTR11]], i32 16
 // CHECK_CONSTRAINED-NEXT:    br label [[END_BLOCK]]
 //
 __device__ int foo3() {

>From 7956e64b78fb3eb898a66f1623ede75abe88eeca Mon Sep 17 00:00:00 2001
From: Mariya Podchishchaeva <mariya.podchishchaeva at intel.com>
Date: Fri, 12 Jul 2024 09:19:57 +0200
Subject: [PATCH 20/21] Update clang/lib/CodeGen/CGExprConstant.cpp

Co-authored-by: John McCall <rjmccall at gmail.com>
---
 clang/lib/CodeGen/CGExprConstant.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/lib/CodeGen/CGExprConstant.cpp b/clang/lib/CodeGen/CGExprConstant.cpp
index 65effedde47a9..066f99ffa782a 100644
--- a/clang/lib/CodeGen/CGExprConstant.cpp
+++ b/clang/lib/CodeGen/CGExprConstant.cpp
@@ -615,7 +615,7 @@ bool ConstStructBuilder::AppendBitField(const FieldDecl *Field,
 
   llvm::ConstantInt *CI = dyn_cast<llvm::ConstantInt>(C);
   if (!CI) {
-    // Constants long _BitInt types are sometimes split into individual bytes.
+    // Constants for long _BitInt types are sometimes split into individual bytes.
     // Try to fold these back into an integer constant. If that doesn't work
     // out, then we are trying to initialize a bitfield with a non-trivial
     // constant, this must require run-time code.

>From 5c4b58b2bb281529b7d87c32fda5fe21b3451948 Mon Sep 17 00:00:00 2001
From: "Podchishchaeva, Mariya" <mariya.podchishchaeva at intel.com>
Date: Fri, 12 Jul 2024 00:36:38 -0700
Subject: [PATCH 21/21] Fix format

---
 clang/lib/CodeGen/CGExprConstant.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/clang/lib/CodeGen/CGExprConstant.cpp b/clang/lib/CodeGen/CGExprConstant.cpp
index 066f99ffa782a..dba98d0985fb1 100644
--- a/clang/lib/CodeGen/CGExprConstant.cpp
+++ b/clang/lib/CodeGen/CGExprConstant.cpp
@@ -615,9 +615,9 @@ bool ConstStructBuilder::AppendBitField(const FieldDecl *Field,
 
   llvm::ConstantInt *CI = dyn_cast<llvm::ConstantInt>(C);
   if (!CI) {
-    // Constants for long _BitInt types are sometimes split into individual bytes.
-    // Try to fold these back into an integer constant. If that doesn't work
-    // out, then we are trying to initialize a bitfield with a non-trivial
+    // Constants for long _BitInt types are sometimes split into individual
+    // bytes. Try to fold these back into an integer constant. If that doesn't
+    // work out, then we are trying to initialize a bitfield with a non-trivial
     // constant, this must require run-time code.
     llvm::Type *LoadType =
         CGM.getTypes().convertTypeForLoadStore(Field->getType(), C->getType());



More information about the cfe-commits mailing list