[Mlir-commits] [clang] [clang-tools-extra] [flang] [lld] [llvm] [mlir] [mlir][vector] Add deinterleave operation to vector dialect (PR #92409)

Tue May 21 08:26:05 PDT 2024

Timm =?utf-8?q?Bäder?= <tbaeder at redhat.com>,Jacek Caban
 <jacek at codeweavers.com>,Timm =?utf-8?q?Bäder?= <tbaeder at redhat.com>,Vlad
 Serebrennikov <serebrennikov.vladislav at gmail.com>,YunQiang Su
 <syq at debian.org>,Jay Foad <jay.foad at amd.com>,Jay Foad <jay.foad at amd.com>,Vlad
 Serebrennikov <serebrennikov.vladislav at gmail.com>,Jay Foad <jay.foad at amd.com>
 =?utf-8?q?,?=Nikita Popov <npopov at redhat.com>,Nikita Popov
 <npopov at redhat.com>,Krzysztof Parzyszek <Krzysztof.Parzyszek at amd.com>,Nikita
 Popov <npopov at redhat.com>,Nikita Popov <npopov at redhat.com>,Nikita Popov
 <npopov at redhat.com>,Nikita Popov <npopov at redhat.com>,Nikita Popov
 <npopov at redhat.com>,Qiongsi Wu <274595+qiongsiwu at users.noreply.github.com>,Nikita
 Popov <npopov at redhat.com>,Erich Keane <ekeane at nvidia.com>,Nikita Popov
 <npopov at redhat.com>,Nikita Popov <npopov at redhat.com>,Vlad Serebrennikov
 <serebrennikov.vladislav at gmail.com>,Nikita Popov <npopov at redhat.com>,Mubashar.Ahmad at arm.com
 <mubashar.ahmad at arm.com>=?utf-8?q?,?="Mubashar.Ahmad at arm.com"
 <mubashar.ahmad at arm.com>
Message-ID:
In-Reply-To: <llvm.org/llvm/llvm-project/pull/92409 at github.com>


https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/92409

>From a7521fd162cac93da37df9151d233692fd61998f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbaeder at redhat.com>
Date: Fri, 17 May 2024 16:07:11 +0200
Subject: [PATCH 01/27] [clang][Interp] Implement
 __builtin_sycl_unique_stable_name

---
 clang/lib/AST/Interp/ByteCodeExprGen.cpp | 24 ++++++++++++++++++++++++
 clang/lib/AST/Interp/ByteCodeExprGen.h   |  1 +
 clang/test/AST/Interp/sycl.cpp           |  9 +++++++++
 3 files changed, 34 insertions(+)
 create mode 100644 clang/test/AST/Interp/sycl.cpp

diff --git a/clang/lib/AST/Interp/ByteCodeExprGen.cpp b/clang/lib/AST/Interp/ByteCodeExprGen.cpp
index a61270c77ea8f..33d69d04487de 100644
--- a/clang/lib/AST/Interp/ByteCodeExprGen.cpp
+++ b/clang/lib/AST/Interp/ByteCodeExprGen.cpp
@@ -1594,6 +1594,30 @@ bool ByteCodeExprGen<Emitter>::VisitStringLiteral(const StringLiteral *E) {
   return true;
 }
 
+template <class Emitter>
+bool ByteCodeExprGen<Emitter>::VisitSYCLUniqueStableNameExpr(
+    const SYCLUniqueStableNameExpr *E) {
+  if (DiscardResult)
+    return true;
+
+  assert(!Initializing);
+
+  auto &A = Ctx.getASTContext();
+  std::string ResultStr = E->ComputeName(A);
+
+  QualType CharTy = A.CharTy.withConst();
+  APInt Size(A.getTypeSize(A.getSizeType()), ResultStr.size() + 1);
+  QualType ArrayTy = A.getConstantArrayType(CharTy, Size, nullptr,
+                                            ArraySizeModifier::Normal, 0);
+
+  StringLiteral *SL =
+      StringLiteral::Create(A, ResultStr, StringLiteralKind::Ordinary,
+                            /*Pascal=*/false, ArrayTy, E->getLocation());
+
+  unsigned StringIndex = P.createGlobalString(SL);
+  return this->emitGetPtrGlobal(StringIndex, E);
+}
+
 template <class Emitter>
 bool ByteCodeExprGen<Emitter>::VisitCharacterLiteral(
     const CharacterLiteral *E) {
diff --git a/clang/lib/AST/Interp/ByteCodeExprGen.h b/clang/lib/AST/Interp/ByteCodeExprGen.h
index e73a2f0334cf6..a2e283c866332 100644
--- a/clang/lib/AST/Interp/ByteCodeExprGen.h
+++ b/clang/lib/AST/Interp/ByteCodeExprGen.h
@@ -90,6 +90,7 @@ class ByteCodeExprGen : public ConstStmtVisitor<ByteCodeExprGen<Emitter>, bool>,
   bool VisitOpaqueValueExpr(const OpaqueValueExpr *E);
   bool VisitAbstractConditionalOperator(const AbstractConditionalOperator *E);
   bool VisitStringLiteral(const StringLiteral *E);
+  bool VisitSYCLUniqueStableNameExpr(const SYCLUniqueStableNameExpr *E);
   bool VisitCharacterLiteral(const CharacterLiteral *E);
   bool VisitCompoundAssignOperator(const CompoundAssignOperator *E);
   bool VisitFloatCompoundAssignOperator(const CompoundAssignOperator *E);
diff --git a/clang/test/AST/Interp/sycl.cpp b/clang/test/AST/Interp/sycl.cpp
new file mode 100644
index 0000000000000..5c922eca58091
--- /dev/null
+++ b/clang/test/AST/Interp/sycl.cpp
@@ -0,0 +1,9 @@
+// RUN: %clang_cc1 %s -std=c++17 -triple x86_64-linux-gnu -fsycl-is-device -verify=both,ref -fsyntax-only -Wno-unused
+// RUN: %clang_cc1 %s -std=c++17 -triple x86_64-linux-gnu -fsycl-is-device -verify=both,expected -fsyntax-only -Wno-unused -fexperimental-new-constant-interpreter
+
+// both-no-diagnostics
+
+constexpr int a = 0;
+constexpr const char *a_name = __builtin_sycl_unique_stable_name(decltype(a));
+static_assert(__builtin_strcmp(a_name, "_ZTSKi") == 0);
+

>From 18e7bcbae12bc2e2cf9888844a0b3f12075f508c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbaeder at redhat.com>
Date: Fri, 17 May 2024 16:53:24 +0200
Subject: [PATCH 02/27] [clang][Interp] Reject inc/dec ops before C++ 14

---
 clang/lib/AST/Interp/ByteCodeExprGen.cpp | 12 ++++++++++++
 clang/test/AST/Interp/cxx11.cpp          | 16 ++++++++++++++++
 2 files changed, 28 insertions(+)

diff --git a/clang/lib/AST/Interp/ByteCodeExprGen.cpp b/clang/lib/AST/Interp/ByteCodeExprGen.cpp
index 33d69d04487de..859a3fabea32b 100644
--- a/clang/lib/AST/Interp/ByteCodeExprGen.cpp
+++ b/clang/lib/AST/Interp/ByteCodeExprGen.cpp
@@ -3395,6 +3395,9 @@ bool ByteCodeExprGen<Emitter>::VisitUnaryOperator(const UnaryOperator *E) {
 
   switch (E->getOpcode()) {
   case UO_PostInc: { // x++
+    if (!Ctx.getLangOpts().CPlusPlus14)
+      return this->emitInvalid(E);
+
     if (!this->visit(SubExpr))
       return false;
 
@@ -3413,6 +3416,9 @@ bool ByteCodeExprGen<Emitter>::VisitUnaryOperator(const UnaryOperator *E) {
     return DiscardResult ? this->emitIncPop(*T, E) : this->emitInc(*T, E);
   }
   case UO_PostDec: { // x--
+    if (!Ctx.getLangOpts().CPlusPlus14)
+      return this->emitInvalid(E);
+
     if (!this->visit(SubExpr))
       return false;
 
@@ -3431,6 +3437,9 @@ bool ByteCodeExprGen<Emitter>::VisitUnaryOperator(const UnaryOperator *E) {
     return DiscardResult ? this->emitDecPop(*T, E) : this->emitDec(*T, E);
   }
   case UO_PreInc: { // ++x
+    if (!Ctx.getLangOpts().CPlusPlus14)
+      return this->emitInvalid(E);
+
     if (!this->visit(SubExpr))
       return false;
 
@@ -3475,6 +3484,9 @@ bool ByteCodeExprGen<Emitter>::VisitUnaryOperator(const UnaryOperator *E) {
     return E->isGLValue() || this->emitLoadPop(*T, E);
   }
   case UO_PreDec: { // --x
+    if (!Ctx.getLangOpts().CPlusPlus14)
+      return this->emitInvalid(E);
+
     if (!this->visit(SubExpr))
       return false;
 
diff --git a/clang/test/AST/Interp/cxx11.cpp b/clang/test/AST/Interp/cxx11.cpp
index 993e3618a3784..f06a5dd173cba 100644
--- a/clang/test/AST/Interp/cxx11.cpp
+++ b/clang/test/AST/Interp/cxx11.cpp
@@ -30,3 +30,19 @@ constexpr S s = { 5 };
 constexpr const int *p = &s.m + 1;
 
 constexpr const int *np2 = &(*(int(*)[4])nullptr)[0]; // ok
+
+constexpr int preDec(int x) { // both-error {{never produces a constant expression}}
+  return --x;                 // both-note {{subexpression}}
+}
+
+constexpr int postDec(int x) { // both-error {{never produces a constant expression}}
+  return x--;                  // both-note {{subexpression}}
+}
+
+constexpr int preInc(int x) { // both-error {{never produces a constant expression}}
+  return ++x;                  // both-note {{subexpression}}
+}
+
+constexpr int postInc(int x) { // both-error {{never produces a constant expression}}
+  return x++;                  // both-note {{subexpression}}
+}

>From 5693678cae86ac433aa8bd9ed3920c8c93b5817b Mon Sep 17 00:00:00 2001
From: Jacek Caban <jacek at codeweavers.com>
Date: Tue, 21 May 2024 13:33:53 +0200
Subject: [PATCH 03/27] [LLD][COFF] Demangle ARM64EC export names. (#87068)

---
 lld/COFF/DriverUtils.cpp        |  40 ++++++-----
 lld/test/COFF/arm64ec-exports.s | 121 ++++++++++++++++++++++++++++++++
 2 files changed, 145 insertions(+), 16 deletions(-)
 create mode 100644 lld/test/COFF/arm64ec-exports.s

diff --git a/lld/COFF/DriverUtils.cpp b/lld/COFF/DriverUtils.cpp
index b4ff31a606da5..6e8f74c83be4a 100644
--- a/lld/COFF/DriverUtils.cpp
+++ b/lld/COFF/DriverUtils.cpp
@@ -21,6 +21,7 @@
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/BinaryFormat/COFF.h"
+#include "llvm/IR/Mangler.h"
 #include "llvm/Object/COFF.h"
 #include "llvm/Object/WindowsResource.h"
 #include "llvm/Option/Arg.h"
@@ -39,6 +40,7 @@
 #include <optional>
 
 using namespace llvm::COFF;
+using namespace llvm::object;
 using namespace llvm::opt;
 using namespace llvm;
 using llvm::sys::Process;
@@ -632,18 +634,6 @@ Export LinkerDriver::parseExport(StringRef arg) {
   fatal("invalid /export: " + arg);
 }
 
-static StringRef undecorate(COFFLinkerContext &ctx, StringRef sym) {
-  if (ctx.config.machine != I386)
-    return sym;
-  // In MSVC mode, a fully decorated stdcall function is exported
-  // as-is with the leading underscore (with type IMPORT_NAME).
-  // In MinGW mode, a decorated stdcall function gets the underscore
-  // removed, just like normal cdecl functions.
-  if (sym.starts_with("_") && sym.contains('@') && !ctx.config.mingw)
-    return sym;
-  return sym.starts_with("_") ? sym.substr(1) : sym;
-}
-
 // Convert stdcall/fastcall style symbols into unsuffixed symbols,
 // with or without a leading underscore. (MinGW specific.)
 static StringRef killAt(StringRef sym, bool prefix) {
@@ -693,11 +683,29 @@ void LinkerDriver::fixupExports() {
   for (Export &e : ctx.config.exports) {
     if (!e.exportAs.empty()) {
       e.exportName = e.exportAs;
-    } else if (!e.forwardTo.empty()) {
-      e.exportName = undecorate(ctx, e.name);
-    } else {
-      e.exportName = undecorate(ctx, e.extName.empty() ? e.name : e.extName);
+      continue;
+    }
+
+    StringRef sym =
+        !e.forwardTo.empty() || e.extName.empty() ? e.name : e.extName;
+    if (ctx.config.machine == I386 && sym.starts_with("_")) {
+      // In MSVC mode, a fully decorated stdcall function is exported
+      // as-is with the leading underscore (with type IMPORT_NAME).
+      // In MinGW mode, a decorated stdcall function gets the underscore
+      // removed, just like normal cdecl functions.
+      if (ctx.config.mingw || !sym.contains('@')) {
+        e.exportName = sym.substr(1);
+        continue;
+      }
+    }
+    if (isArm64EC(ctx.config.machine) && !e.data && !e.constant) {
+      if (std::optional<std::string> demangledName =
+              getArm64ECDemangledFunctionName(sym)) {
+        e.exportName = saver().save(*demangledName);
+        continue;
+      }
     }
+    e.exportName = sym;
   }
 
   if (ctx.config.killAt && ctx.config.machine == I386) {
diff --git a/lld/test/COFF/arm64ec-exports.s b/lld/test/COFF/arm64ec-exports.s
new file mode 100644
index 0000000000000..a48211e6fb76c
--- /dev/null
+++ b/lld/test/COFF/arm64ec-exports.s
@@ -0,0 +1,121 @@
+; REQUIRES: aarch64
+; RUN: split-file %s %t.dir && cd %t.dir
+
+; RUN: llvm-mc -filetype=obj -triple=arm64ec-windows test.s -o test.obj
+; RUN: llvm-mc -filetype=obj -triple=arm64ec-windows drectve.s -o drectve.obj
+; RUN: llvm-mc -filetype=obj -triple=arm64ec-windows %S/Inputs/loadconfig-arm64ec.s -o loadconfig-arm64ec.obj
+
+; Check various forms of export directive and make sure that function export name is demangled.
+
+; RUN: lld-link -out:out.dll test.obj loadconfig-arm64ec.obj -dll -noentry -machine:arm64ec \
+; RUN:          -export:unmangled_func '-export:#mangled_func' '-export:#exportas_func,EXPORTAS,exportas_func' \
+; RUN:          '-export:?cxx_func@@$$hYAHXZ' -export:data_sym,DATA '-export:#mangled_data_sym,DATA'
+
+
+; RUN: llvm-readobj --coff-exports out.dll | FileCheck --check-prefix=EXP %s
+; EXP:      Export {
+; EXP-NEXT:   Ordinal: 1
+; EXP-NEXT:   Name: #mangled_data_sym
+; EXP-NEXT:   RVA: 0x3000
+; EXP-NEXT: }
+; EXP-NEXT: Export {
+; EXP-NEXT:   Ordinal: 2
+; EXP-NEXT:   Name: ?cxx_func@@YAHXZ
+; EXP-NEXT:   RVA: 0x1018
+; EXP-NEXT: }
+; EXP-NEXT: Export {
+; EXP-NEXT:   Ordinal: 3
+; EXP-NEXT:   Name: data_sym
+; EXP-NEXT:   RVA: 0x3004
+; EXP-NEXT: }
+; EXP-NEXT: Export {
+; EXP-NEXT:   Ordinal: 4
+; EXP-NEXT:   Name: exportas_func
+; EXP-NEXT:   RVA: 0x1010
+; EXP-NEXT: }
+; EXP-NEXT: Export {
+; EXP-NEXT:   Ordinal: 5
+; EXP-NEXT:   Name: mangled_func
+; EXP-NEXT:   RVA: 0x1008
+; EXP-NEXT: }
+; EXP-NEXT: Export {
+; EXP-NEXT:   Ordinal: 6
+; EXP-NEXT:   Name: unmangled_func
+; EXP-NEXT:   RVA: 0x1000
+; EXP-NEXT: }
+
+; RUN: llvm-nm --print-armap out.lib | FileCheck --check-prefix=IMPLIB %s
+; IMPLIB:      Archive EC map
+; IMPLIB-NEXT: #exportas_func in out
+; IMPLIB-NEXT: #mangled_func in out
+; IMPLIB-NEXT: #unmangled_func in out
+; IMPLIB-NEXT: ?cxx_func@@$$hYAHXZ in out
+; IMPLIB-NEXT: ?cxx_func@@YAHXZ in out
+; IMPLIB-NEXT: __IMPORT_DESCRIPTOR_out{{.*}} in out
+; IMPLIB-NEXT: __NULL_IMPORT_DESCRIPTOR in out
+; IMPLIB-NEXT: __imp_?cxx_func@@YAHXZ in out
+; IMPLIB-NEXT: __imp_aux_?cxx_func@@YAHXZ in out
+; IMPLIB-NEXT: __imp_aux_exportas_func in out
+; IMPLIB-NEXT: __imp_aux_mangled_func in out
+; IMPLIB-NEXT: __imp_aux_unmangled_func in out
+; IMPLIB-NEXT: __imp_data_sym in out
+; IMPLIB-NEXT: __imp_exportas_func in out
+; IMPLIB-NEXT: __imp_mangled_data_sym in out
+; IMPLIB-NEXT: __imp_mangled_func in out
+; IMPLIB-NEXT: __imp_unmangled_func in out
+; IMPLIB-NEXT: exportas_func in out
+; IMPLIB-NEXT: mangled_func in out
+; IMPLIB-NEXT: unmangled_func in out
+; IMPLIB-NEXT: out{{.*}}_NULL_THUNK_DATA in out
+
+
+; Check that using .drectve section has the same effect.
+
+; RUN: lld-link -out:out2.dll test.obj loadconfig-arm64ec.obj -dll -noentry -machine:arm64ec drectve.obj
+; RUN: llvm-readobj --coff-exports out2.dll | FileCheck --check-prefix=EXP %s
+; RUN: llvm-nm --print-armap out2.lib | FileCheck --check-prefix=IMPLIB %s
+
+#--- test.s
+        .text
+        .globl unmangled_func
+        .p2align 2, 0x0
+unmangled_func:
+        mov w0, #1
+        ret
+
+        .globl "#mangled_func"
+        .p2align 2, 0x0
+"#mangled_func":
+        mov w0, #2
+        ret
+
+        .globl "#exportas_func"
+        .p2align 2, 0x0
+"#exportas_func":
+        mov w0, #3
+        ret
+
+        .globl "?cxx_func@@$$hYAHXZ"
+        .p2align 2, 0x0
+"?cxx_func@@$$hYAHXZ":
+        mov w0, #4
+        ret
+
+        .data
+        .globl "#mangled_data_sym"
+        .p2align 2, 0x0
+"#mangled_data_sym":
+        .word 0x01010101
+        .globl data_sym
+        .p2align 2, 0x0
+data_sym:
+        .word 0x01010101
+
+#--- drectve.s
+        .section .drectve, "yn"
+        .ascii " -export:unmangled_func"
+        .ascii " -export:#mangled_func"
+        .ascii " -export:#exportas_func,EXPORTAS,exportas_func"
+        .ascii " -export:?cxx_func@@$$hYAHXZ"
+        .ascii " -export:data_sym,DATA"
+        .ascii " -export:#mangled_data_sym,DATA"

>From 46d8bb08cfd3798977b4e22881514dc9d77425c2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbaeder at redhat.com>
Date: Sat, 18 May 2024 06:04:31 +0200
Subject: [PATCH 04/27] [clang][Interp] Fix initializing zero-size arrays

---
 clang/lib/AST/Interp/Pointer.h            |  6 +++++-
 clang/test/AST/Interp/arrays.cpp          |  1 +
 clang/unittests/AST/Interp/Descriptor.cpp | 24 +++++++++++++++++++++--
 3 files changed, 28 insertions(+), 3 deletions(-)

diff --git a/clang/lib/AST/Interp/Pointer.h b/clang/lib/AST/Interp/Pointer.h
index 9900f37e60d4e..3ade5756e580e 100644
--- a/clang/lib/AST/Interp/Pointer.h
+++ b/clang/lib/AST/Interp/Pointer.h
@@ -556,12 +556,16 @@ class Pointer {
     if (!asBlockPointer().Pointee)
       return false;
 
-    return isElementPastEnd() || getSize() == getOffset();
+    return isElementPastEnd() ||
+           (getSize() == getOffset() && !isZeroSizeArray());
   }
 
   /// Checks if the pointer is an out-of-bounds element pointer.
   bool isElementPastEnd() const { return Offset == PastEndMark; }
 
+  /// Checks if the pointer is pointing to a zero-size array.
+  bool isZeroSizeArray() const { return getFieldDesc()->isZeroSizeArray(); }
+
   /// Dereferences the pointer, if it's live.
   template <typename T> T &deref() const {
     assert(isLive() && "Invalid pointer");
diff --git a/clang/test/AST/Interp/arrays.cpp b/clang/test/AST/Interp/arrays.cpp
index 71b6dabf39e45..e936ec6dc894b 100644
--- a/clang/test/AST/Interp/arrays.cpp
+++ b/clang/test/AST/Interp/arrays.cpp
@@ -26,6 +26,7 @@ static_assert(foo[2][2] == nullptr, "");
 static_assert(foo[2][3] == &m, "");
 static_assert(foo[2][4] == nullptr, "");
 
+constexpr int ZeroSizeArray[] = {};
 
 constexpr int SomeInt[] = {1};
 constexpr int getSomeInt() { return *SomeInt; }
diff --git a/clang/unittests/AST/Interp/Descriptor.cpp b/clang/unittests/AST/Interp/Descriptor.cpp
index 053d579ea3919..3157b4d401f98 100644
--- a/clang/unittests/AST/Interp/Descriptor.cpp
+++ b/clang/unittests/AST/Interp/Descriptor.cpp
@@ -22,9 +22,10 @@ TEST(Descriptor, Primitives) {
       "  char s[4];\n"
       "  A a[3];\n"
       "  short l[3][3];\n"
+      "  int EmptyA[0];\n"
       "};\n"
       "constexpr S d = {0.0, \"foo\", {{true, false}, {false, true}, {false, false}},\n"
-      "  {{1, 2, 3}, {4, 5, 6}, {7, 8, 9}}};\n";
+      "  {{1, 2, 3}, {4, 5, 6}, {7, 8, 9}}, {}};\n";
 
   auto AST = tooling::buildASTFromCodeWithArgs(
       Code, {"-fexperimental-new-constant-interpreter"});
@@ -64,7 +65,7 @@ TEST(Descriptor, Primitives) {
   // Test the Record for the struct S.
   const Record *SRecord = GlobalDesc->ElemRecord;
   ASSERT_TRUE(SRecord);
-  ASSERT_TRUE(SRecord->getNumFields() == 4);
+  ASSERT_TRUE(SRecord->getNumFields() == 5);
   ASSERT_TRUE(SRecord->getNumBases() == 0);
   ASSERT_FALSE(SRecord->getDestructor());
 
@@ -113,6 +114,16 @@ TEST(Descriptor, Primitives) {
   ASSERT_TRUE(F4->Desc->getElemSize() > 0);
   ASSERT_TRUE(F4->Desc->ElemDesc->isPrimitiveArray());
 
+  // Fifth field. Zero-size array.
+  const Record::Field *F5 = SRecord->getField(4u);
+  ASSERT_TRUE(F5);
+  ASSERT_FALSE(F5->isBitField());
+  ASSERT_TRUE(F5->Desc->isArray());
+  ASSERT_FALSE(F5->Desc->isCompositeArray());
+  ASSERT_TRUE(F5->Desc->isPrimitiveArray());
+  ASSERT_FALSE(F5->Desc->isPrimitive());
+  ASSERT_EQ(F5->Desc->getNumElems(), 0u);
+
   // Check pointer stuff.
   // Global variables have an inline descriptor.
   ASSERT_TRUE(GlobalPtr.isRoot());
@@ -382,4 +393,13 @@ TEST(Descriptor, Primitives) {
     ASSERT_EQ(PE3.getArray(), NE3);
     ASSERT_EQ(PE3.getIndex(), 2u);
   }
+
+  // Zero-size array.
+  {
+    const Pointer &PF5 = GlobalPtr.atField(F5->Offset);
+
+    ASSERT_TRUE(PF5.isZeroSizeArray());
+    ASSERT_FALSE(PF5.isOnePastEnd());
+    ASSERT_FALSE(PF5.isElementPastEnd());
+  }
 }

>From 1664610130b88ef168e33eddfe973a3f11bd4261 Mon Sep 17 00:00:00 2001
From: Vlad Serebrennikov <serebrennikov.vladislav at gmail.com>
Date: Tue, 21 May 2024 16:03:56 +0400
Subject: [PATCH 05/27] [clang] Introduce `SemaPseudoObject` (#92646)

This patch moves `Sema` functions that handle pseudo-objects into the
new `SemaPseudoObject` class. This continues previous efforts to split
`Sema` up. Additional context can be found in #84184.
As usual, in order to help reviewing this, formatting changes are split
into a separate commit.
---
 clang/include/clang/Sema/Sema.h             | 57 ++++++----------
 clang/include/clang/Sema/SemaPseudoObject.h | 40 +++++++++++
 clang/lib/Sema/Sema.cpp                     |  2 +
 clang/lib/Sema/SemaExpr.cpp                 |  9 +--
 clang/lib/Sema/SemaPseudoObject.cpp         | 75 +++++++++++----------
 clang/lib/Sema/TreeTransform.h              |  9 +--
 6 files changed, 112 insertions(+), 80 deletions(-)
 create mode 100644 clang/include/clang/Sema/SemaPseudoObject.h

diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index 5894239664c15..01ddba5eaf01d 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -174,6 +174,7 @@ class SemaHLSL;
 class SemaObjC;
 class SemaOpenACC;
 class SemaOpenMP;
+class SemaPseudoObject;
 class SemaSYCL;
 class StandardConversionSequence;
 class Stmt;
@@ -471,20 +472,19 @@ class Sema final : public SemaBase {
   // 18. Name Lookup (SemaLookup.cpp)
   // 19. Modules (SemaModule.cpp)
   // 20. C++ Overloading (SemaOverload.cpp)
-  // 21. Pseudo-Object (SemaPseudoObject.cpp)
-  // 22. Statements (SemaStmt.cpp)
-  // 23. `inline asm` Statement (SemaStmtAsm.cpp)
-  // 24. Statement Attribute Handling (SemaStmtAttr.cpp)
-  // 25. C++ Templates (SemaTemplate.cpp)
-  // 26. C++ Template Argument Deduction (SemaTemplateDeduction.cpp)
-  // 27. C++ Template Instantiation (SemaTemplateInstantiate.cpp)
-  // 28. C++ Template Declaration Instantiation
+  // 21. Statements (SemaStmt.cpp)
+  // 22. `inline asm` Statement (SemaStmtAsm.cpp)
+  // 23. Statement Attribute Handling (SemaStmtAttr.cpp)
+  // 24. C++ Templates (SemaTemplate.cpp)
+  // 25. C++ Template Argument Deduction (SemaTemplateDeduction.cpp)
+  // 26. C++ Template Instantiation (SemaTemplateInstantiate.cpp)
+  // 27. C++ Template Declaration Instantiation
   //     (SemaTemplateInstantiateDecl.cpp)
-  // 29. C++ Variadic Templates (SemaTemplateVariadic.cpp)
-  // 30. Constraints and Concepts (SemaConcept.cpp)
-  // 31. Types (SemaType.cpp)
-  // 32. FixIt Helpers (SemaFixItUtils.cpp)
-  // 33. Name Lookup for RISC-V Vector Intrinsic (SemaRISCVVectorLookup.cpp)
+  // 28. C++ Variadic Templates (SemaTemplateVariadic.cpp)
+  // 29. Constraints and Concepts (SemaConcept.cpp)
+  // 30. Types (SemaType.cpp)
+  // 31. FixIt Helpers (SemaFixItUtils.cpp)
+  // 32. Name Lookup for RISC-V Vector Intrinsic (SemaRISCVVectorLookup.cpp)
 
   /// \name Semantic Analysis
   /// Implementations are in Sema.cpp
@@ -1015,6 +1015,11 @@ class Sema final : public SemaBase {
     return *OpenMPPtr;
   }
 
+  SemaPseudoObject &PseudoObject() {
+    assert(PseudoObjectPtr);
+    return *PseudoObjectPtr;
+  }
+
   SemaSYCL &SYCL() {
     assert(SYCLPtr);
     return *SYCLPtr;
@@ -1056,6 +1061,7 @@ class Sema final : public SemaBase {
   std::unique_ptr<SemaObjC> ObjCPtr;
   std::unique_ptr<SemaOpenACC> OpenACCPtr;
   std::unique_ptr<SemaOpenMP> OpenMPPtr;
+  std::unique_ptr<SemaPseudoObject> PseudoObjectPtr;
   std::unique_ptr<SemaSYCL> SYCLPtr;
 
   ///@}
@@ -6369,6 +6375,8 @@ class Sema final : public SemaBase {
   llvm::SmallVector<std::pair<SourceLocation, const BlockDecl *>, 1>
       ImplicitlyRetainedSelfLocs;
 
+  void maybeExtendBlockObject(ExprResult &E);
+
 private:
   static BinaryOperatorKind ConvertTokenKindToBinaryOpcode(tok::TokenKind Kind);
 
@@ -8367,29 +8375,6 @@ class Sema final : public SemaBase {
   //
   //
 
-  /// \name Pseudo-Object
-  /// Implementations are in SemaPseudoObject.cpp
-  ///@{
-
-public:
-  void maybeExtendBlockObject(ExprResult &E);
-
-  ExprResult checkPseudoObjectIncDec(Scope *S, SourceLocation OpLoc,
-                                     UnaryOperatorKind Opcode, Expr *Op);
-  ExprResult checkPseudoObjectAssignment(Scope *S, SourceLocation OpLoc,
-                                         BinaryOperatorKind Opcode, Expr *LHS,
-                                         Expr *RHS);
-  ExprResult checkPseudoObjectRValue(Expr *E);
-  Expr *recreateSyntacticForm(PseudoObjectExpr *E);
-
-  ///@}
-
-  //
-  //
-  // -------------------------------------------------------------------------
-  //
-  //
-
   /// \name Statements
   /// Implementations are in SemaStmt.cpp
   ///@{
diff --git a/clang/include/clang/Sema/SemaPseudoObject.h b/clang/include/clang/Sema/SemaPseudoObject.h
new file mode 100644
index 0000000000000..22d8be2b3726e
--- /dev/null
+++ b/clang/include/clang/Sema/SemaPseudoObject.h
@@ -0,0 +1,40 @@
+//===----- SemaPseudoObject.h --- Semantic Analysis for Pseudo-Objects ----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file declares semantic analysis for expressions involving
+//  pseudo-object references.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_SEMA_SEMAPSEUDOOBJECT_H
+#define LLVM_CLANG_SEMA_SEMAPSEUDOOBJECT_H
+
+#include "clang/AST/Expr.h"
+#include "clang/AST/OperationKinds.h"
+#include "clang/Basic/SourceLocation.h"
+#include "clang/Sema/Ownership.h"
+#include "clang/Sema/Scope.h"
+#include "clang/Sema/SemaBase.h"
+
+namespace clang {
+
+class SemaPseudoObject : public SemaBase {
+public:
+  SemaPseudoObject(Sema &S);
+
+  ExprResult checkIncDec(Scope *S, SourceLocation OpLoc,
+                         UnaryOperatorKind Opcode, Expr *Op);
+  ExprResult checkAssignment(Scope *S, SourceLocation OpLoc,
+                             BinaryOperatorKind Opcode, Expr *LHS, Expr *RHS);
+  ExprResult checkRValue(Expr *E);
+  Expr *recreateSyntacticForm(PseudoObjectExpr *E);
+};
+
+} // namespace clang
+
+#endif // LLVM_CLANG_SEMA_SEMAPSEUDOOBJECT_H
\ No newline at end of file
diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp
index f847c49920cf3..2c5774da3f666 100644
--- a/clang/lib/Sema/Sema.cpp
+++ b/clang/lib/Sema/Sema.cpp
@@ -49,6 +49,7 @@
 #include "clang/Sema/SemaObjC.h"
 #include "clang/Sema/SemaOpenACC.h"
 #include "clang/Sema/SemaOpenMP.h"
+#include "clang/Sema/SemaPseudoObject.h"
 #include "clang/Sema/SemaSYCL.h"
 #include "clang/Sema/TemplateDeduction.h"
 #include "clang/Sema/TemplateInstCallback.h"
@@ -210,6 +211,7 @@ Sema::Sema(Preprocessor &pp, ASTContext &ctxt, ASTConsumer &consumer,
       ObjCPtr(std::make_unique<SemaObjC>(*this)),
       OpenACCPtr(std::make_unique<SemaOpenACC>(*this)),
       OpenMPPtr(std::make_unique<SemaOpenMP>(*this)),
+      PseudoObjectPtr(std::make_unique<SemaPseudoObject>(*this)),
       SYCLPtr(std::make_unique<SemaSYCL>(*this)),
       MSPointerToMemberRepresentationMethod(
           LangOpts.getMSPointerToMemberRepresentationMethod()),
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index e7731e389c1ba..7bb34fd7a4794 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -54,6 +54,7 @@
 #include "clang/Sema/SemaInternal.h"
 #include "clang/Sema/SemaObjC.h"
 #include "clang/Sema/SemaOpenMP.h"
+#include "clang/Sema/SemaPseudoObject.h"
 #include "clang/Sema/Template.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/STLForwardCompat.h"
@@ -15239,7 +15240,7 @@ ExprResult Sema::BuildBinOp(Scope *S, SourceLocation OpLoc,
   LHSExpr = LHS.get();
   RHSExpr = RHS.get();
 
-  // We want to end up calling one of checkPseudoObjectAssignment
+  // We want to end up calling one of SemaPseudoObject::checkAssignment
   // (if the LHS is a pseudo-object), BuildOverloadedBinOp (if
   // both expressions are overloadable or either is type-dependent),
   // or CreateBuiltinBinOp (in any other case).  We also want to get
@@ -15250,7 +15251,7 @@ ExprResult Sema::BuildBinOp(Scope *S, SourceLocation OpLoc,
     // Assignments with a pseudo-object l-value need special analysis.
     if (pty->getKind() == BuiltinType::PseudoObject &&
         BinaryOperator::isAssignmentOp(Opc))
-      return checkPseudoObjectAssignment(S, OpLoc, Opc, LHSExpr, RHSExpr);
+      return PseudoObject().checkAssignment(S, OpLoc, Opc, LHSExpr, RHSExpr);
 
     // Don't resolve overloads if the other type is overloadable.
     if (getLangOpts().CPlusPlus && pty->getKind() == BuiltinType::Overload) {
@@ -15673,7 +15674,7 @@ ExprResult Sema::BuildUnaryOp(Scope *S, SourceLocation OpLoc,
     // Increment and decrement of pseudo-object references.
     if (pty->getKind() == BuiltinType::PseudoObject &&
         UnaryOperator::isIncrementDecrementOp(Opc))
-      return checkPseudoObjectIncDec(S, OpLoc, Opc, Input);
+      return PseudoObject().checkIncDec(S, OpLoc, Opc, Input);
 
     // extension is always a builtin operator.
     if (Opc == UO_Extension)
@@ -20890,7 +20891,7 @@ ExprResult Sema::CheckPlaceholderExpr(Expr *E) {
 
   // Pseudo-objects.
   case BuiltinType::PseudoObject:
-    return checkPseudoObjectRValue(E);
+    return PseudoObject().checkRValue(E);
 
   case BuiltinType::BuiltinFn: {
     // Accept __noop without parens by implicitly converting it to a call expr.
diff --git a/clang/lib/Sema/SemaPseudoObject.cpp b/clang/lib/Sema/SemaPseudoObject.cpp
index 14ed9590afc6c..fdb584ceb8105 100644
--- a/clang/lib/Sema/SemaPseudoObject.cpp
+++ b/clang/lib/Sema/SemaPseudoObject.cpp
@@ -29,6 +29,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "clang/Sema/SemaPseudoObject.h"
 #include "clang/AST/ExprCXX.h"
 #include "clang/AST/ExprObjC.h"
 #include "clang/Basic/CharInfo.h"
@@ -1446,24 +1447,24 @@ ExprResult MSPropertyOpBuilder::buildSet(Expr *op, SourceLocation sl,
 //  General Sema routines.
 //===----------------------------------------------------------------------===//
 
-ExprResult Sema::checkPseudoObjectRValue(Expr *E) {
+ExprResult SemaPseudoObject::checkRValue(Expr *E) {
   Expr *opaqueRef = E->IgnoreParens();
   if (ObjCPropertyRefExpr *refExpr
         = dyn_cast<ObjCPropertyRefExpr>(opaqueRef)) {
-    ObjCPropertyOpBuilder builder(*this, refExpr, true);
+    ObjCPropertyOpBuilder builder(SemaRef, refExpr, true);
     return builder.buildRValueOperation(E);
   }
   else if (ObjCSubscriptRefExpr *refExpr
            = dyn_cast<ObjCSubscriptRefExpr>(opaqueRef)) {
-    ObjCSubscriptOpBuilder builder(*this, refExpr, true);
+    ObjCSubscriptOpBuilder builder(SemaRef, refExpr, true);
     return builder.buildRValueOperation(E);
   } else if (MSPropertyRefExpr *refExpr
              = dyn_cast<MSPropertyRefExpr>(opaqueRef)) {
-    MSPropertyOpBuilder builder(*this, refExpr, true);
+    MSPropertyOpBuilder builder(SemaRef, refExpr, true);
     return builder.buildRValueOperation(E);
   } else if (MSPropertySubscriptExpr *RefExpr =
                  dyn_cast<MSPropertySubscriptExpr>(opaqueRef)) {
-    MSPropertyOpBuilder Builder(*this, RefExpr, true);
+    MSPropertyOpBuilder Builder(SemaRef, RefExpr, true);
     return Builder.buildRValueOperation(E);
   } else {
     llvm_unreachable("unknown pseudo-object kind!");
@@ -1471,48 +1472,48 @@ ExprResult Sema::checkPseudoObjectRValue(Expr *E) {
 }
 
 /// Check an increment or decrement of a pseudo-object expression.
-ExprResult Sema::checkPseudoObjectIncDec(Scope *Sc, SourceLocation opcLoc,
+ExprResult SemaPseudoObject::checkIncDec(Scope *Sc, SourceLocation opcLoc,
                                          UnaryOperatorKind opcode, Expr *op) {
   // Do nothing if the operand is dependent.
   if (op->isTypeDependent())
-    return UnaryOperator::Create(Context, op, opcode, Context.DependentTy,
-                                 VK_PRValue, OK_Ordinary, opcLoc, false,
-                                 CurFPFeatureOverrides());
+    return UnaryOperator::Create(
+        SemaRef.Context, op, opcode, SemaRef.Context.DependentTy, VK_PRValue,
+        OK_Ordinary, opcLoc, false, SemaRef.CurFPFeatureOverrides());
 
   assert(UnaryOperator::isIncrementDecrementOp(opcode));
   Expr *opaqueRef = op->IgnoreParens();
   if (ObjCPropertyRefExpr *refExpr
         = dyn_cast<ObjCPropertyRefExpr>(opaqueRef)) {
-    ObjCPropertyOpBuilder builder(*this, refExpr, false);
+    ObjCPropertyOpBuilder builder(SemaRef, refExpr, false);
     return builder.buildIncDecOperation(Sc, opcLoc, opcode, op);
   } else if (isa<ObjCSubscriptRefExpr>(opaqueRef)) {
     Diag(opcLoc, diag::err_illegal_container_subscripting_op);
     return ExprError();
   } else if (MSPropertyRefExpr *refExpr
              = dyn_cast<MSPropertyRefExpr>(opaqueRef)) {
-    MSPropertyOpBuilder builder(*this, refExpr, false);
+    MSPropertyOpBuilder builder(SemaRef, refExpr, false);
     return builder.buildIncDecOperation(Sc, opcLoc, opcode, op);
   } else if (MSPropertySubscriptExpr *RefExpr
              = dyn_cast<MSPropertySubscriptExpr>(opaqueRef)) {
-    MSPropertyOpBuilder Builder(*this, RefExpr, false);
+    MSPropertyOpBuilder Builder(SemaRef, RefExpr, false);
     return Builder.buildIncDecOperation(Sc, opcLoc, opcode, op);
   } else {
     llvm_unreachable("unknown pseudo-object kind!");
   }
 }
 
-ExprResult Sema::checkPseudoObjectAssignment(Scope *S, SourceLocation opcLoc,
+ExprResult SemaPseudoObject::checkAssignment(Scope *S, SourceLocation opcLoc,
                                              BinaryOperatorKind opcode,
                                              Expr *LHS, Expr *RHS) {
   // Do nothing if either argument is dependent.
   if (LHS->isTypeDependent() || RHS->isTypeDependent())
-    return BinaryOperator::Create(Context, LHS, RHS, opcode,
-                                  Context.DependentTy, VK_PRValue, OK_Ordinary,
-                                  opcLoc, CurFPFeatureOverrides());
+    return BinaryOperator::Create(
+        SemaRef.Context, LHS, RHS, opcode, SemaRef.Context.DependentTy,
+        VK_PRValue, OK_Ordinary, opcLoc, SemaRef.CurFPFeatureOverrides());
 
   // Filter out non-overload placeholder types in the RHS.
   if (RHS->getType()->isNonOverloadPlaceholderType()) {
-    ExprResult result = CheckPlaceholderExpr(RHS);
+    ExprResult result = SemaRef.CheckPlaceholderExpr(RHS);
     if (result.isInvalid()) return ExprError();
     RHS = result.get();
   }
@@ -1521,20 +1522,20 @@ ExprResult Sema::checkPseudoObjectAssignment(Scope *S, SourceLocation opcLoc,
   Expr *opaqueRef = LHS->IgnoreParens();
   if (ObjCPropertyRefExpr *refExpr
         = dyn_cast<ObjCPropertyRefExpr>(opaqueRef)) {
-    ObjCPropertyOpBuilder builder(*this, refExpr, IsSimpleAssign);
+    ObjCPropertyOpBuilder builder(SemaRef, refExpr, IsSimpleAssign);
     return builder.buildAssignmentOperation(S, opcLoc, opcode, LHS, RHS);
   } else if (ObjCSubscriptRefExpr *refExpr
              = dyn_cast<ObjCSubscriptRefExpr>(opaqueRef)) {
-    ObjCSubscriptOpBuilder builder(*this, refExpr, IsSimpleAssign);
+    ObjCSubscriptOpBuilder builder(SemaRef, refExpr, IsSimpleAssign);
     return builder.buildAssignmentOperation(S, opcLoc, opcode, LHS, RHS);
   } else if (MSPropertyRefExpr *refExpr
              = dyn_cast<MSPropertyRefExpr>(opaqueRef)) {
-      MSPropertyOpBuilder builder(*this, refExpr, IsSimpleAssign);
-      return builder.buildAssignmentOperation(S, opcLoc, opcode, LHS, RHS);
+    MSPropertyOpBuilder builder(SemaRef, refExpr, IsSimpleAssign);
+    return builder.buildAssignmentOperation(S, opcLoc, opcode, LHS, RHS);
   } else if (MSPropertySubscriptExpr *RefExpr
              = dyn_cast<MSPropertySubscriptExpr>(opaqueRef)) {
-      MSPropertyOpBuilder Builder(*this, RefExpr, IsSimpleAssign);
-      return Builder.buildAssignmentOperation(S, opcLoc, opcode, LHS, RHS);
+    MSPropertyOpBuilder Builder(SemaRef, RefExpr, IsSimpleAssign);
+    return Builder.buildAssignmentOperation(S, opcLoc, opcode, LHS, RHS);
   } else {
     llvm_unreachable("unknown pseudo-object kind!");
   }
@@ -1557,36 +1558,38 @@ static Expr *stripOpaqueValuesFromPseudoObjectRef(Sema &S, Expr *E) {
 /// This is a hack which should be removed when TreeTransform is
 /// capable of rebuilding a tree without stripping implicit
 /// operations.
-Expr *Sema::recreateSyntacticForm(PseudoObjectExpr *E) {
+Expr *SemaPseudoObject::recreateSyntacticForm(PseudoObjectExpr *E) {
   Expr *syntax = E->getSyntacticForm();
   if (UnaryOperator *uop = dyn_cast<UnaryOperator>(syntax)) {
-    Expr *op = stripOpaqueValuesFromPseudoObjectRef(*this, uop->getSubExpr());
-    return UnaryOperator::Create(Context, op, uop->getOpcode(), uop->getType(),
-                                 uop->getValueKind(), uop->getObjectKind(),
-                                 uop->getOperatorLoc(), uop->canOverflow(),
-                                 CurFPFeatureOverrides());
+    Expr *op = stripOpaqueValuesFromPseudoObjectRef(SemaRef, uop->getSubExpr());
+    return UnaryOperator::Create(
+        SemaRef.Context, op, uop->getOpcode(), uop->getType(),
+        uop->getValueKind(), uop->getObjectKind(), uop->getOperatorLoc(),
+        uop->canOverflow(), SemaRef.CurFPFeatureOverrides());
   } else if (CompoundAssignOperator *cop
                = dyn_cast<CompoundAssignOperator>(syntax)) {
-    Expr *lhs = stripOpaqueValuesFromPseudoObjectRef(*this, cop->getLHS());
+    Expr *lhs = stripOpaqueValuesFromPseudoObjectRef(SemaRef, cop->getLHS());
     Expr *rhs = cast<OpaqueValueExpr>(cop->getRHS())->getSourceExpr();
     return CompoundAssignOperator::Create(
-        Context, lhs, rhs, cop->getOpcode(), cop->getType(),
+        SemaRef.Context, lhs, rhs, cop->getOpcode(), cop->getType(),
         cop->getValueKind(), cop->getObjectKind(), cop->getOperatorLoc(),
-        CurFPFeatureOverrides(), cop->getComputationLHSType(),
+        SemaRef.CurFPFeatureOverrides(), cop->getComputationLHSType(),
         cop->getComputationResultType());
 
   } else if (BinaryOperator *bop = dyn_cast<BinaryOperator>(syntax)) {
-    Expr *lhs = stripOpaqueValuesFromPseudoObjectRef(*this, bop->getLHS());
+    Expr *lhs = stripOpaqueValuesFromPseudoObjectRef(SemaRef, bop->getLHS());
     Expr *rhs = cast<OpaqueValueExpr>(bop->getRHS())->getSourceExpr();
-    return BinaryOperator::Create(Context, lhs, rhs, bop->getOpcode(),
+    return BinaryOperator::Create(SemaRef.Context, lhs, rhs, bop->getOpcode(),
                                   bop->getType(), bop->getValueKind(),
                                   bop->getObjectKind(), bop->getOperatorLoc(),
-                                  CurFPFeatureOverrides());
+                                  SemaRef.CurFPFeatureOverrides());
 
   } else if (isa<CallExpr>(syntax)) {
     return syntax;
   } else {
     assert(syntax->hasPlaceholderType(BuiltinType::PseudoObject));
-    return stripOpaqueValuesFromPseudoObjectRef(*this, syntax);
+    return stripOpaqueValuesFromPseudoObjectRef(SemaRef, syntax);
   }
 }
+
+SemaPseudoObject::SemaPseudoObject(Sema &S) : SemaBase(S) {}
diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h
index 06ed0843ef504..a9e4987eda2b5 100644
--- a/clang/lib/Sema/TreeTransform.h
+++ b/clang/lib/Sema/TreeTransform.h
@@ -42,6 +42,7 @@
 #include "clang/Sema/SemaObjC.h"
 #include "clang/Sema/SemaOpenACC.h"
 #include "clang/Sema/SemaOpenMP.h"
+#include "clang/Sema/SemaPseudoObject.h"
 #include "clang/Sema/SemaSYCL.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -11882,7 +11883,7 @@ TreeTransform<Derived>::TransformPseudoObjectExpr(PseudoObjectExpr *E) {
   // better solution (rebuilding the semantic expressions and
   // rebinding OVEs as necessary) doesn't work; we'd need
   // TreeTransform to not strip away implicit conversions.
-  Expr *newSyntacticForm = SemaRef.recreateSyntacticForm(E);
+  Expr *newSyntacticForm = SemaRef.PseudoObject().recreateSyntacticForm(E);
   ExprResult result = getDerived().TransformExpr(newSyntacticForm);
   if (result.isInvalid()) return ExprError();
 
@@ -11890,7 +11891,7 @@ TreeTransform<Derived>::TransformPseudoObjectExpr(PseudoObjectExpr *E) {
   // expression must have been an lvalue-to-rvalue conversion which we
   // should reapply.
   if (result.get()->hasPlaceholderType(BuiltinType::PseudoObject))
-    result = SemaRef.checkPseudoObjectRValue(result.get());
+    result = SemaRef.PseudoObject().checkRValue(result.get());
 
   return result;
 }
@@ -16186,8 +16187,8 @@ ExprResult TreeTransform<Derived>::RebuildCXXOperatorCallExpr(
   if (First->getObjectKind() == OK_ObjCProperty) {
     BinaryOperatorKind Opc = BinaryOperator::getOverloadedOpcode(Op);
     if (BinaryOperator::isAssignmentOp(Opc))
-      return SemaRef.checkPseudoObjectAssignment(/*Scope=*/nullptr, OpLoc, Opc,
-                                                 First, Second);
+      return SemaRef.PseudoObject().checkAssignment(/*Scope=*/nullptr, OpLoc,
+                                                    Opc, First, Second);
     ExprResult Result = SemaRef.CheckPlaceholderExpr(First);
     if (Result.isInvalid())
       return ExprError();

>From 45293b5edb7f320bc1b14b6ce8ac90ed111baa53 Mon Sep 17 00:00:00 2001
From: YunQiang Su <syq at debian.org>
Date: Tue, 21 May 2024 20:14:46 +0800
Subject: [PATCH 06/27] MIPS/Clang: handleTargetFeatures, add +fp64 if +msa and
 no other +-fp (#92728)

Commit: d59bc6b5c75384aa0b1e78cc85e17e8acaccebaf
Clang/MIPS: Add +fp64 if MSA and no explicit -mfp option (#91949)
added +fp64 for `clang`, while not for `clang -cc1`. So

   clang -cc1 -triple=mips -target-feature +msa -S

will emit an asm source file without ".module fp=64".
---
 clang/lib/Basic/Targets/Mips.h | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/clang/lib/Basic/Targets/Mips.h b/clang/lib/Basic/Targets/Mips.h
index f76c6ece8bf48..b6f110249fa78 100644
--- a/clang/lib/Basic/Targets/Mips.h
+++ b/clang/lib/Basic/Targets/Mips.h
@@ -324,6 +324,7 @@ class LLVM_LIBRARY_VISIBILITY MipsTargetInfo : public TargetInfo {
     FPMode = getDefaultFPMode();
     bool OddSpregGiven = false;
     bool StrictAlign = false;
+    bool FpGiven = false;
 
     for (const auto &Feature : Features) {
       if (Feature == "+single-float")
@@ -348,13 +349,16 @@ class LLVM_LIBRARY_VISIBILITY MipsTargetInfo : public TargetInfo {
         HasMSA = true;
       else if (Feature == "+nomadd4")
         DisableMadd4 = true;
-      else if (Feature == "+fp64")
+      else if (Feature == "+fp64") {
         FPMode = FP64;
-      else if (Feature == "-fp64")
+        FpGiven = true;
+      } else if (Feature == "-fp64") {
         FPMode = FP32;
-      else if (Feature == "+fpxx")
+        FpGiven = true;
+      } else if (Feature == "+fpxx") {
         FPMode = FPXX;
-      else if (Feature == "+nan2008")
+        FpGiven = true;
+      } else if (Feature == "+nan2008")
         IsNan2008 = true;
       else if (Feature == "-nan2008")
         IsNan2008 = false;
@@ -381,6 +385,11 @@ class LLVM_LIBRARY_VISIBILITY MipsTargetInfo : public TargetInfo {
     if (StrictAlign)
       HasUnalignedAccess = false;
 
+    if (HasMSA && !FpGiven) {
+      FPMode = FP64;
+      Features.push_back("+fp64");
+    }
+
     setDataLayout();
 
     return true;

>From f3aaaafe50697a1d9985836adb0b167aab05047c Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad at amd.com>
Date: Tue, 21 May 2024 13:33:20 +0100
Subject: [PATCH 07/27] [AMDGPU] Remove #if 0 code for fences in
 SIInsertWaitcnts (#92902)

We insert required waits for fences in SIMemoryLegalizer.
---
 llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp | 53 ---------------------
 1 file changed, 53 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index 5577ce9eb1282..4799b4cb20892 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -1673,59 +1673,6 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
             AMDGPU::SendMsg::ID_GS_DONE_PreGFX11)) {
     Wait.LoadCnt = 0;
   }
-#if 0 // TODO: the following blocks of logic when we have fence.
-  else if (MI.getOpcode() == SC_FENCE) {
-    const unsigned int group_size =
-      context->shader_info->GetMaxThreadGroupSize();
-    // group_size == 0 means thread group size is unknown at compile time
-    const bool group_is_multi_wave =
-      (group_size == 0 || group_size > target_info->GetWaveFrontSize());
-    const bool fence_is_global = !((SCInstInternalMisc*)Inst)->IsGroupFence();
-
-    for (unsigned int i = 0; i < Inst->NumSrcOperands(); i++) {
-      SCRegType src_type = Inst->GetSrcType(i);
-      switch (src_type) {
-        case SCMEM_LDS:
-          if (group_is_multi_wave ||
-            context->OptFlagIsOn(OPT_R1100_LDSMEM_FENCE_CHICKEN_BIT)) {
-            EmitWaitcnt |= ScoreBrackets->updateByWait(DS_CNT,
-                               ScoreBrackets->getScoreUB(DS_CNT));
-            // LDS may have to wait for VMcnt after buffer load to LDS
-            if (target_info->HasBufferLoadToLDS()) {
-              EmitWaitcnt |= ScoreBrackets->updateByWait(LOAD_CNT,
-                                 ScoreBrackets->getScoreUB(LOAD_CNT));
-            }
-          }
-          break;
-
-        case SCMEM_GDS:
-          if (group_is_multi_wave || fence_is_global) {
-            EmitWaitcnt |= ScoreBrackets->updateByWait(EXP_CNT,
-              ScoreBrackets->getScoreUB(EXP_CNT));
-            EmitWaitcnt |= ScoreBrackets->updateByWait(DS_CNT,
-              ScoreBrackets->getScoreUB(DS_CNT));
-          }
-          break;
-
-        case SCMEM_UAV:
-        case SCMEM_TFBUF:
-        case SCMEM_RING:
-        case SCMEM_SCATTER:
-          if (group_is_multi_wave || fence_is_global) {
-            EmitWaitcnt |= ScoreBrackets->updateByWait(EXP_CNT,
-              ScoreBrackets->getScoreUB(EXP_CNT));
-            EmitWaitcnt |= ScoreBrackets->updateByWait(LOAD_CNT,
-              ScoreBrackets->getScoreUB(LOAD_CNT));
-          }
-          break;
-
-        case SCMEM_SCRATCH:
-        default:
-          break;
-      }
-    }
-  }
-#endif
 
   // Export & GDS instructions do not read the EXEC mask until after the export
   // is granted (which can occur well after the instruction is issued).

>From 4e86b0006b639f10df108a885a54ff0eddb40217 Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad at amd.com>
Date: Tue, 21 May 2024 13:33:49 +0100
Subject: [PATCH 08/27] [AMDGPU] Remove #if 0 code for buffer stores in
 SIInsertWaitcnts (#92903)

---
 llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index 4799b4cb20892..0a50973e939b0 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -900,18 +900,6 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII,
         }
       }
     }
-#if 0 // TODO: check if this is handled by MUBUF code above.
-  } else if (Inst.getOpcode() == AMDGPU::BUFFER_STORE_DWORD ||
-       Inst.getOpcode() == AMDGPU::BUFFER_STORE_DWORDX2 ||
-       Inst.getOpcode() == AMDGPU::BUFFER_STORE_DWORDX4) {
-    MachineOperand *MO = TII->getNamedOperand(Inst, AMDGPU::OpName::data);
-    unsigned OpNo;//TODO: find the OpNo for this operand;
-    RegInterval Interval = getRegInterval(&Inst, MRI, TRI, OpNo);
-    for (int RegNo = Interval.first; RegNo < Interval.second;
-    ++RegNo) {
-      setRegScore(RegNo + NUM_ALL_VGPRS, t, CurrScore);
-    }
-#endif
   } else /* LGKM_CNT || EXP_CNT || VS_CNT || NUM_INST_CNTS */ {
     // Match the score to the destination registers.
     for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {

>From 2117136b2b78ef3b83202909ffaf351598da8bd5 Mon Sep 17 00:00:00 2001
From: Vlad Serebrennikov <serebrennikov.vladislav at gmail.com>
Date: Tue, 21 May 2024 15:41:10 +0300
Subject: [PATCH 09/27] [clangd] Fix bad directory path in
 `infinite-instantiations.test`

Addresses buildbot failure https://lab.llvm.org/buildbot/#/builders/123/builds/26913 caused by #92888
---
 clang-tools-extra/clangd/test/infinite-instantiation.test | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang-tools-extra/clangd/test/infinite-instantiation.test b/clang-tools-extra/clangd/test/infinite-instantiation.test
index 98260c075d240..da1a294142429 100644
--- a/clang-tools-extra/clangd/test/infinite-instantiation.test
+++ b/clang-tools-extra/clangd/test/infinite-instantiation.test
@@ -1,5 +1,5 @@
 // RUN: rm -rf %t.dir && mkdir -p %t.dir
-// RUN: echo '[{"directory": "%/t.dir", "command": "clang -ftemplate-depth=100 -x c++ %s", "file": "%s"}]' > %t.dir/compile_commands.json
+// RUN: echo '[{"directory": "%t.dir", "command": "clang -ftemplate-depth=100 -x c++ %s", "file": "%s"}]' > %t.dir/compile_commands.json
 // RUN: not clangd --compile-commands-dir=%t.dir -check=%s 2>&1 | FileCheck -strict-whitespace %s
 
 // CHECK: [template_recursion_depth_exceeded]

>From 558f3ea4aef718f953d3f38a45363d8a91988292 Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad at amd.com>
Date: Tue, 21 May 2024 13:51:42 +0100
Subject: [PATCH 10/27] [AMDGPU] Remove #if 0 code for indexed resources in
 SIInsertWaitcnts (#92905)

I do not understand what optimization this was supposed to implement.
It has never been enabled. I suspect it no longer applies to GCN/RDNA
architectures.
---
 llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index 0a50973e939b0..230443313d72a 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -2244,17 +2244,6 @@ bool SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF,
 
     updateEventWaitcntAfter(Inst, &ScoreBrackets);
 
-#if 0 // TODO: implement resource type check controlled by options with ub = LB.
-    // If this instruction generates a S_SETVSKIP because it is an
-    // indexed resource, and we are on Tahiti, then it will also force
-    // an S_WAITCNT vmcnt(0)
-    if (RequireCheckResourceType(Inst, context)) {
-      // Force the score to as if an S_WAITCNT vmcnt(0) is emitted.
-      ScoreBrackets->setScoreLB(LOAD_CNT,
-      ScoreBrackets->getScoreUB(LOAD_CNT));
-    }
-#endif
-
     if (ST->isPreciseMemoryEnabled() && Inst.mayLoadOrStore()) {
       AMDGPU::Waitcnt Wait = WCG->getAllZeroWaitcnt(
           Inst.mayStore() && !SIInstrInfo::isAtomicRet(Inst));

>From f78b1a40864470f7eb30dfda44a6ea8e845346a5 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov at redhat.com>
Date: Tue, 21 May 2024 14:56:22 +0200
Subject: [PATCH 11/27] [InstCombine] Add test for #92887 (NFC)

---
 llvm/test/Transforms/InstCombine/vec_shuffle.ll | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/llvm/test/Transforms/InstCombine/vec_shuffle.ll b/llvm/test/Transforms/InstCombine/vec_shuffle.ll
index 919e30f672e44..839279d7420ba 100644
--- a/llvm/test/Transforms/InstCombine/vec_shuffle.ll
+++ b/llvm/test/Transforms/InstCombine/vec_shuffle.ll
@@ -2330,3 +2330,19 @@ define <4 x i16> @blend_elements_from_load(ptr align 8 %_0) {
   %rv = shufflevector <3 x i16> <i16 0, i16 undef, i16 undef>, <3 x i16> %load, <4 x i32> <i32 0, i32 1, i32 3, i32 5>
   ret <4 x i16> %rv
 }
+
+; FIXME: This is a miscompile.
+define i16 @pr92887(<2 x i16> %v) {
+; CHECK-LABEL: @pr92887(
+; CHECK-NEXT:    ret i16 poison
+;
+  %v0 = extractelement <2 x i16> %v, i64 0
+  %v0lo = and i16 %v0, 1
+  %v1 = extractelement <2 x i16> %v, i64 1
+  %v1lo = and i16 %v1, 1
+  %ins1 = insertelement <4 x i16> poison, i16 %v0lo, i64 0
+  %ins2 = insertelement <4 x i16> %ins1, i16 %v1lo, i64 1
+  %shuf = shufflevector <4 x i16> %ins2, <4 x i16> <i16 poison, i16 poison, i16 undef, i16 undef>, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
+  %extract = extractelement <4 x i16> %shuf, i32 2
+  ret i16 %extract
+}

>From 263224e4481b0850539a93a272184aac4abe86d4 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov at redhat.com>
Date: Tue, 21 May 2024 14:57:19 +0200
Subject: [PATCH 12/27] [InstCombine] Require poison operand in
 canEvaluateShuffled transform

This transform works on single-source shuffles, which require that
the second operand is poison, not undef. Otherwise we may convert
undef to poison.

Fixes https://github.com/llvm/llvm-project/issues/92887.
---
 llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp | 2 +-
 llvm/test/Transforms/InstCombine/vec_shuffle.ll          | 3 +--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 99f1f8eb34bb5..745ccbfe9dc74 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -2901,7 +2901,7 @@ Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
   if (Instruction *I = foldIdentityPaddedShuffles(SVI))
     return I;
 
-  if (match(RHS, m_Undef()) && canEvaluateShuffled(LHS, Mask)) {
+  if (match(RHS, m_Poison()) && canEvaluateShuffled(LHS, Mask)) {
     Value *V = evaluateInDifferentElementOrder(LHS, Mask, Builder);
     return replaceInstUsesWith(SVI, V);
   }
diff --git a/llvm/test/Transforms/InstCombine/vec_shuffle.ll b/llvm/test/Transforms/InstCombine/vec_shuffle.ll
index 839279d7420ba..d36ada7756af3 100644
--- a/llvm/test/Transforms/InstCombine/vec_shuffle.ll
+++ b/llvm/test/Transforms/InstCombine/vec_shuffle.ll
@@ -2331,10 +2331,9 @@ define <4 x i16> @blend_elements_from_load(ptr align 8 %_0) {
   ret <4 x i16> %rv
 }
 
-; FIXME: This is a miscompile.
 define i16 @pr92887(<2 x i16> %v) {
 ; CHECK-LABEL: @pr92887(
-; CHECK-NEXT:    ret i16 poison
+; CHECK-NEXT:    ret i16 undef
 ;
   %v0 = extractelement <2 x i16> %v, i64 0
   %v0lo = and i16 %v0, 1

>From c1b5b7c19b76f8d153f7ae9350d217b74888ed93 Mon Sep 17 00:00:00 2001
From: Krzysztof Parzyszek <Krzysztof.Parzyszek at amd.com>
Date: Tue, 21 May 2024 08:19:54 -0500
Subject: [PATCH 13/27] [flang][Lower] Emit exiting branches from within
 constructs (#92455)

When lowering IfConstruct, CaseConstruct, and SelectTypeConstruct, emit
branches that exit the construct in each block that is still
unterminated after the FIR has been generated in it.

The same thing may be needed for SelectRankConstruct, once it's
supported.

This eliminates the need for inserting branches in `genFIR(Evaluation)`.

Follow-up to PR https://github.com/llvm/llvm-project/pull/91614.
---
 flang/lib/Lower/Bridge.cpp                    | 88 ++++++++++++++-----
 flang/test/Lower/branching-directive.f90      | 77 ++++++++++++++--
 .../test/Lower/unstructured-control-flow.f90  | 31 +++++++
 3 files changed, 167 insertions(+), 29 deletions(-)
 create mode 100644 flang/test/Lower/unstructured-control-flow.f90

diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index 4a29c6b8fae78..4e50de3e7ee9c 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -1302,6 +1302,43 @@ class FirConverter : public Fortran::lower::AbstractConverter {
     genBranch(targetEval.block);
   }
 
+  /// A construct contains nested evaluations. Some of these evaluations
+  /// may start a new basic block, others will add code to an existing
+  /// block.
+  /// Collect the list of nested evaluations that are last in their block,
+  /// organize them into two sets:
+  /// 1. Exiting evaluations: they may need a branch exiting from their
+  ///    parent construct,
+  /// 2. Fall-through evaluations: they will continue to the following
+  ///    evaluation. They may still need a branch, but they do not exit
+  ///    the construct. They appear in cases where the following evaluation
+  ///    is a target of some branch.
+  void collectFinalEvaluations(
+      Fortran::lower::pft::Evaluation &construct,
+      llvm::SmallVector<Fortran::lower::pft::Evaluation *> &exits,
+      llvm::SmallVector<Fortran::lower::pft::Evaluation *> &fallThroughs) {
+    Fortran::lower::pft::EvaluationList &nested =
+        construct.getNestedEvaluations();
+    if (nested.empty())
+      return;
+
+    Fortran::lower::pft::Evaluation *exit = construct.constructExit;
+    Fortran::lower::pft::Evaluation *previous = &nested.front();
+
+    for (auto it = ++nested.begin(), end = nested.end(); it != end;
+         previous = &*it++) {
+      if (it->block == nullptr)
+        continue;
+      // "*it" starts a new block, check what to do with "previous"
+      if (it->isIntermediateConstructStmt() && previous != exit)
+        exits.push_back(previous);
+      else if (previous->lexicalSuccessor && previous->lexicalSuccessor->block)
+        fallThroughs.push_back(previous);
+    }
+    if (previous != exit)
+      exits.push_back(previous);
+  }
+
   /// Generate a SelectOp or branch sequence that compares \p selector against
   /// values in \p valueList and targets corresponding labels in \p labelList.
   /// If no value matches the selector, branch to \p defaultEval.
@@ -2109,6 +2146,9 @@ class FirConverter : public Fortran::lower::AbstractConverter {
     }
 
     // Unstructured branch sequence.
+    llvm::SmallVector<Fortran::lower::pft::Evaluation *> exits, fallThroughs;
+    collectFinalEvaluations(eval, exits, fallThroughs);
+
     for (Fortran::lower::pft::Evaluation &e : eval.getNestedEvaluations()) {
       auto genIfBranch = [&](mlir::Value cond) {
         if (e.lexicalSuccessor == e.controlSuccessor) // empty block -> exit
@@ -2129,6 +2169,12 @@ class FirConverter : public Fortran::lower::AbstractConverter {
         genIfBranch(genIfCondition(s));
       } else {
         genFIR(e);
+        if (blockIsUnterminated()) {
+          if (llvm::is_contained(exits, &e))
+            genConstructExitBranch(*eval.constructExit);
+          else if (llvm::is_contained(fallThroughs, &e))
+            genBranch(e.lexicalSuccessor->block);
+        }
       }
     }
   }
@@ -2137,11 +2183,21 @@ class FirConverter : public Fortran::lower::AbstractConverter {
     Fortran::lower::pft::Evaluation &eval = getEval();
     Fortran::lower::StatementContext stmtCtx;
     pushActiveConstruct(eval, stmtCtx);
+
+    llvm::SmallVector<Fortran::lower::pft::Evaluation *> exits, fallThroughs;
+    collectFinalEvaluations(eval, exits, fallThroughs);
+
     for (Fortran::lower::pft::Evaluation &e : eval.getNestedEvaluations()) {
       if (e.getIf<Fortran::parser::EndSelectStmt>())
         maybeStartBlock(e.block);
       else
         genFIR(e);
+      if (blockIsUnterminated()) {
+        if (llvm::is_contained(exits, &e))
+          genConstructExitBranch(*eval.constructExit);
+        else if (llvm::is_contained(fallThroughs, &e))
+          genBranch(e.lexicalSuccessor->block);
+      }
     }
     popActiveConstruct();
   }
@@ -3007,6 +3063,10 @@ class FirConverter : public Fortran::lower::AbstractConverter {
     }
 
     pushActiveConstruct(getEval(), stmtCtx);
+    llvm::SmallVector<Fortran::lower::pft::Evaluation *> exits, fallThroughs;
+    collectFinalEvaluations(getEval(), exits, fallThroughs);
+    Fortran::lower::pft::Evaluation &constructExit = *getEval().constructExit;
+
     for (Fortran::lower::pft::Evaluation &eval :
          getEval().getNestedEvaluations()) {
       setCurrentPosition(eval.position);
@@ -3203,6 +3263,12 @@ class FirConverter : public Fortran::lower::AbstractConverter {
       } else {
         genFIR(eval);
       }
+      if (blockIsUnterminated()) {
+        if (llvm::is_contained(exits, &eval))
+          genConstructExitBranch(constructExit);
+        else if (llvm::is_contained(fallThroughs, &eval))
+          genBranch(eval.lexicalSuccessor->block);
+      }
     }
     popActiveConstruct();
   }
@@ -4535,28 +4601,6 @@ class FirConverter : public Fortran::lower::AbstractConverter {
     setCurrentEval(eval);
     setCurrentPosition(eval.position);
     eval.visit([&](const auto &stmt) { genFIR(stmt); });
-
-    // Generate an end-of-block branch for several special cases. For
-    // constructs, this can be done for either the end construct statement,
-    // or for the construct itself, which will skip this code if the
-    // end statement was visited first and generated a branch.
-    Fortran::lower::pft::Evaluation *successor = [&]() {
-      if (eval.isConstruct() ||
-          (eval.isDirective() && eval.hasNestedEvaluations()))
-        return eval.getLastNestedEvaluation().lexicalSuccessor;
-      return eval.lexicalSuccessor;
-    }();
-
-    if (successor && blockIsUnterminated()) {
-      if (successor->isIntermediateConstructStmt() &&
-          successor->parentConstruct->lowerAsUnstructured())
-        // Exit from an intermediate unstructured IF or SELECT construct block.
-        genBranch(successor->parentConstruct->constructExit->block);
-      else if (unstructuredContext && eval.isConstructStmt() &&
-               successor == eval.controlSuccessor)
-        // Exit from a degenerate, empty construct block.
-        genBranch(eval.parentConstruct->constructExit->block);
-    }
   }
 
   /// Map mlir function block arguments to the corresponding Fortran dummy
diff --git a/flang/test/Lower/branching-directive.f90 b/flang/test/Lower/branching-directive.f90
index a0a147f1053a4..69270d7bcbe96 100644
--- a/flang/test/Lower/branching-directive.f90
+++ b/flang/test/Lower/branching-directive.f90
@@ -1,25 +1,88 @@
-!RUN: flang-new -fc1 -emit-hlfir -fopenmp -o - %s | FileCheck %s
+!RUN: bbc -emit-hlfir -fopenacc -fopenmp -o - %s | FileCheck %s
 
 !https://github.com/llvm/llvm-project/issues/91526
 
+!CHECK-LABEL: func.func @_QPsimple1
 !CHECK:   cf.cond_br %{{[0-9]+}}, ^bb[[THEN:[0-9]+]], ^bb[[ELSE:[0-9]+]]
 !CHECK: ^bb[[THEN]]:
-!CHECK:   cf.br ^bb[[EXIT:[0-9]+]]
+!CHECK:   omp.parallel
+!CHECK:   cf.br ^bb[[ENDIF:[0-9]+]]
 !CHECK: ^bb[[ELSE]]:
 !CHECK:   fir.call @_FortranAStopStatement
 !CHECK:   fir.unreachable
-!CHECK: ^bb[[EXIT]]:
+!CHECK: ^bb[[ENDIF]]:
+!CHECK:   return
 
-subroutine simple(y)
+subroutine simple1(y)
   implicit none
   logical, intent(in) :: y
   integer :: i
   if (y) then
-!$omp parallel
+    !$omp parallel
     i = 1
-!$omp end parallel
+    !$omp end parallel
   else
     stop 1
   end if
-end subroutine simple
+end subroutine
+
+!CHECK-LABEL: func.func @_QPsimple2
+!CHECK:   cf.cond_br %{{[0-9]+}}, ^bb[[THEN:[0-9]+]], ^bb[[ELSE:[0-9]+]]
+!CHECK: ^bb[[THEN]]:
+!CHECK:   omp.parallel
+!CHECK:   cf.br ^bb[[ENDIF:[0-9]+]]
+!CHECK: ^bb[[ELSE]]:
+!CHECK:   fir.call @_FortranAStopStatement
+!CHECK:   fir.unreachable
+!CHECK: ^bb[[ENDIF]]:
+!CHECK:   fir.call @_FortranAioOutputReal64
+!CHECK:   return
+subroutine simple2(x, yn)
+  implicit none
+  logical, intent(in) :: yn
+  integer, intent(in) :: x
+  integer :: i
+  real(8) :: E
+  E = 0d0
+
+  if (yn) then
+     !$omp parallel do private(i) reduction(+:E)
+     do i = 1, x
+        E = E + i
+     end do
+     !$omp end parallel do
+  else
+     stop 1
+  end if
+  print *, E
+end subroutine
+
+!CHECK-LABEL: func.func @_QPacccase
+!CHECK: fir.select_case %{{[0-9]+}} : i32 [{{.*}}, ^bb[[CASE1:[0-9]+]], {{.*}}, ^bb[[CASE2:[0-9]+]], {{.*}}, ^bb[[CASE3:[0-9]+]]]
+!CHECK: ^bb[[CASE1]]:
+!CHECK:   acc.serial
+!CHECK:   cf.br ^bb[[EXIT:[0-9]+]]
+!CHECK: ^bb[[CASE2]]:
+!CHECK:   fir.call @_FortranAioOutputAscii
+!CHECK:   cf.br ^bb[[EXIT]]
+!CHECK: ^bb[[CASE3]]:
+!CHECK:   fir.call @_FortranAioOutputAscii
+!CHECK:   cf.br ^bb[[EXIT]]
+!CHECK: ^bb[[EXIT]]:
+!CHECK:   return
+subroutine acccase(var)
+  integer :: var
+  integer :: res(10)
+  select case (var)
+    case (1)
+      print *, "case 1"
+      !$acc serial
+      res(1) = 1
+      !$acc end serial
+    case (2)
+      print *, "case 2"
+    case default
+      print *, "case default"
+  end select
+end subroutine
 
diff --git a/flang/test/Lower/unstructured-control-flow.f90 b/flang/test/Lower/unstructured-control-flow.f90
new file mode 100644
index 0000000000000..310293381e5f7
--- /dev/null
+++ b/flang/test/Lower/unstructured-control-flow.f90
@@ -0,0 +1,31 @@
+!RUN: bbc -emit-hlfir -o - %s | FileCheck %s
+
+!CHECK-LABEL: func.func @_QPunstructured1
+!CHECK:   fir.select %{{[0-9]+}} : i32 [{{.*}}, ^bb[[BLOCK3:[0-9]+]], {{.*}}, ^bb[[BLOCK4:[0-9]+]], {{.*}}, ^bb[[BLOCK5:[0-9]+]], {{.*}}, ^bb[[BLOCK1:[0-9]+]]]
+!CHECK: ^bb[[BLOCK1]]:
+!CHECK:   cf.cond_br %{{[0-9]+}}, ^bb[[BLOCK2:[0-9]+]], ^bb[[BLOCK4]]
+!CHECK: ^bb[[BLOCK2]]:
+!CHECK:   fir.if
+!CHECK:   cf.br ^bb[[BLOCK3]]
+!CHECK: ^bb[[BLOCK3]]:
+!CHECK:   %[[C10:[a-z0-9_]+]] = arith.constant 10 : i32
+!CHECK:   arith.addi {{.*}}, %[[C10]]
+!CHECK:   cf.br ^bb[[BLOCK4]]
+!CHECK: ^bb[[BLOCK4]]:
+!CHECK:   %[[C100:[a-z0-9_]+]] = arith.constant 100 : i32
+!CHECK:   arith.addi {{.*}}, %[[C100]]
+!CHECK:   cf.br ^bb[[BLOCK5]]
+!CHECK: ^bb[[BLOCK5]]:
+!CHECK:   %[[C1000:[a-z0-9_]+]] = arith.constant 1000 : i32
+!CHECK:   arith.addi {{.*}}, %[[C1000]]
+!CHECK:   return
+subroutine unstructured1(j, k)
+    goto (11, 22, 33) j-3  ! computed goto - an expression outside [1,3] is a nop
+    if (j == 2) goto 22
+    if (j == 1) goto 11
+    k = k + 1
+11  k = k + 10
+22  k = k + 100
+33  k = k + 1000
+end
+

>From 557a0be3af798cf55b1ef54fc98a84fa5a4ade42 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov at redhat.com>
Date: Tue, 21 May 2024 15:16:55 +0200
Subject: [PATCH 14/27] [InstCombine] Add test for splat shuffle miscompile
 (NFC)

---
 .../Transforms/InstCombine/vec_shuffle.ll     | 24 +++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/llvm/test/Transforms/InstCombine/vec_shuffle.ll b/llvm/test/Transforms/InstCombine/vec_shuffle.ll
index d36ada7756af3..7428f7a93d64a 100644
--- a/llvm/test/Transforms/InstCombine/vec_shuffle.ll
+++ b/llvm/test/Transforms/InstCombine/vec_shuffle.ll
@@ -2345,3 +2345,27 @@ define i16 @pr92887(<2 x i16> %v) {
   %extract = extractelement <4 x i16> %shuf, i32 2
   ret i16 %extract
 }
+
+; FIXME: This is a miscompile.
+define <2 x i32> @not_splat_shuffle1(i32 %x) {
+; CHECK-LABEL: @not_splat_shuffle1(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[X:%.*]], i64 0
+; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT:    ret <2 x i32> [[SHUF]]
+;
+  %vec = insertelement <2 x i32> undef, i32 %x, i32 1
+  %shuf = shufflevector <2 x i32> %vec, <2 x i32> poison, <2 x i32> <i32 1, i32 0>
+  ret <2 x i32> %shuf
+}
+
+; FIXME: This is a miscompile.
+define <2 x i32> @not_splat_shuffle2(i32 %x) {
+; CHECK-LABEL: @not_splat_shuffle2(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[X:%.*]], i64 0
+; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT:    ret <2 x i32> [[SHUF]]
+;
+  %vec = insertelement <2 x i32> poison, i32 %x, i32 1
+  %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <2 x i32> <i32 1, i32 3>
+  ret <2 x i32> %shuf
+}

>From ecd269e8305330c185bbedbd5a59e887122333ba Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov at redhat.com>
Date: Tue, 21 May 2024 15:18:17 +0200
Subject: [PATCH 15/27] [InstCombine] Check for poison instead of undef in
 splat shuffle fold

We can't canonicalize these to a splat shuffle, as doing so would
convert undef -> poison.
---
 .../Transforms/InstCombine/InstCombineVectorOps.cpp    | 10 +++++-----
 llvm/test/Transforms/InstCombine/vec_shuffle.ll        |  8 ++------
 2 files changed, 7 insertions(+), 11 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 745ccbfe9dc74..ac062fe55ce4d 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -2214,19 +2214,19 @@ static Instruction *canonicalizeInsertSplat(ShuffleVectorInst &Shuf,
   uint64_t IndexC;
 
   // Match a shuffle that is a splat to a non-zero element.
-  if (!match(Op0, m_OneUse(m_InsertElt(m_Undef(), m_Value(X),
+  if (!match(Op0, m_OneUse(m_InsertElt(m_Poison(), m_Value(X),
                                        m_ConstantInt(IndexC)))) ||
-      !match(Op1, m_Undef()) || match(Mask, m_ZeroMask()) || IndexC == 0)
+      !match(Op1, m_Poison()) || match(Mask, m_ZeroMask()) || IndexC == 0)
     return nullptr;
 
   // Insert into element 0 of a poison vector.
   PoisonValue *PoisonVec = PoisonValue::get(Shuf.getType());
   Value *NewIns = Builder.CreateInsertElement(PoisonVec, X, (uint64_t)0);
 
-  // Splat from element 0. Any mask element that is undefined remains undefined.
+  // Splat from element 0. Any mask element that is poison remains poison.
   // For example:
-  // shuf (inselt undef, X, 2), _, <2,2,undef>
-  //   --> shuf (inselt undef, X, 0), poison, <0,0,undef>
+  // shuf (inselt poison, X, 2), _, <2,2,undef>
+  //   --> shuf (inselt poison, X, 0), poison, <0,0,undef>
   unsigned NumMaskElts =
       cast<FixedVectorType>(Shuf.getType())->getNumElements();
   SmallVector<int, 16> NewMask(NumMaskElts, 0);
diff --git a/llvm/test/Transforms/InstCombine/vec_shuffle.ll b/llvm/test/Transforms/InstCombine/vec_shuffle.ll
index 7428f7a93d64a..7217e1ac4aa92 100644
--- a/llvm/test/Transforms/InstCombine/vec_shuffle.ll
+++ b/llvm/test/Transforms/InstCombine/vec_shuffle.ll
@@ -2346,11 +2346,9 @@ define i16 @pr92887(<2 x i16> %v) {
   ret i16 %extract
 }
 
-; FIXME: This is a miscompile.
 define <2 x i32> @not_splat_shuffle1(i32 %x) {
 ; CHECK-LABEL: @not_splat_shuffle1(
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[X:%.*]], i64 0
-; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[SHUF:%.*]] = insertelement <2 x i32> <i32 poison, i32 undef>, i32 [[X:%.*]], i64 0
 ; CHECK-NEXT:    ret <2 x i32> [[SHUF]]
 ;
   %vec = insertelement <2 x i32> undef, i32 %x, i32 1
@@ -2358,11 +2356,9 @@ define <2 x i32> @not_splat_shuffle1(i32 %x) {
   ret <2 x i32> %shuf
 }
 
-; FIXME: This is a miscompile.
 define <2 x i32> @not_splat_shuffle2(i32 %x) {
 ; CHECK-LABEL: @not_splat_shuffle2(
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[X:%.*]], i64 0
-; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[SHUF:%.*]] = insertelement <2 x i32> <i32 poison, i32 undef>, i32 [[X:%.*]], i64 0
 ; CHECK-NEXT:    ret <2 x i32> [[SHUF]]
 ;
   %vec = insertelement <2 x i32> poison, i32 %x, i32 1

>From 2f1e2325cfd804cc84eafc63dc775995fd2f3a1b Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov at redhat.com>
Date: Tue, 21 May 2024 15:33:25 +0200
Subject: [PATCH 16/27] [InstCombine] Use m_Poison instead of m_Undef in some
 places (NFCI)

I believe that in these cases other conditions already ensure that
the second operand is not used, this is mostly for clarity.
---
 llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index ac062fe55ce4d..1556d61981d51 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -2383,7 +2383,7 @@ static Instruction *foldTruncShuffle(ShuffleVectorInst &Shuf,
   Type *DestType = Shuf.getType();
   Value *X;
   if (!match(Shuf.getOperand(0), m_BitCast(m_Value(X))) ||
-      !match(Shuf.getOperand(1), m_Undef()) || !DestType->isIntOrIntVectorTy())
+      !match(Shuf.getOperand(1), m_Poison()) || !DestType->isIntOrIntVectorTy())
     return nullptr;
 
   // The source type must have the same number of elements as the shuffle,
@@ -2755,17 +2755,17 @@ static Instruction *foldIdentityPaddedShuffles(ShuffleVectorInst &Shuf) {
 // BinOp's operands are the result of a first element splat can be simplified to
 // splatting the first element of the result of the BinOp
 Instruction *InstCombinerImpl::simplifyBinOpSplats(ShuffleVectorInst &SVI) {
-  if (!match(SVI.getOperand(1), m_Undef()) ||
+  if (!match(SVI.getOperand(1), m_Poison()) ||
       !match(SVI.getShuffleMask(), m_ZeroMask()) ||
       !SVI.getOperand(0)->hasOneUse())
     return nullptr;
 
   Value *Op0 = SVI.getOperand(0);
   Value *X, *Y;
-  if (!match(Op0, m_BinOp(m_Shuffle(m_Value(X), m_Undef(), m_ZeroMask()),
+  if (!match(Op0, m_BinOp(m_Shuffle(m_Value(X), m_Poison(), m_ZeroMask()),
                           m_Value(Y))) &&
       !match(Op0, m_BinOp(m_Value(X),
-                          m_Shuffle(m_Value(Y), m_Undef(), m_ZeroMask()))))
+                          m_Shuffle(m_Value(Y), m_Poison(), m_ZeroMask()))))
     return nullptr;
   if (X->getType() != Y->getType())
     return nullptr;

>From b8e3d8021648478229697edeeb8539c99dbe5503 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov at redhat.com>
Date: Tue, 21 May 2024 15:42:26 +0200
Subject: [PATCH 17/27] [InstCombine] Add test for incorrect shuffle of unop
 transform (NFC)

---
 llvm/test/Transforms/InstCombine/vec_shuffle.ll | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/llvm/test/Transforms/InstCombine/vec_shuffle.ll b/llvm/test/Transforms/InstCombine/vec_shuffle.ll
index 7217e1ac4aa92..00c7a4ca32a02 100644
--- a/llvm/test/Transforms/InstCombine/vec_shuffle.ll
+++ b/llvm/test/Transforms/InstCombine/vec_shuffle.ll
@@ -1280,6 +1280,18 @@ define <2 x float> @fneg(<2 x float> %x) {
   ret <2 x float> %r
 }
 
+; FIXME: This is a miscompile.
+define <2 x float> @fneg_not_single_source(<2 x float> %x) {
+; CHECK-LABEL: @fneg_not_single_source(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> poison, <2 x i32> <i32 0, i32 poison>
+; CHECK-NEXT:    [[SPLAT:%.*]] = fneg <2 x float> [[TMP1]]
+; CHECK-NEXT:    ret <2 x float> [[SPLAT]]
+;
+  %neg = fneg <2 x float> %x
+  %splat = shufflevector <2 x float> %neg, <2 x float> undef, <2 x i32> <i32 0, i32 2>
+  ret <2 x float> %splat
+}
+
 define <2 x float> @fmul_splat_constant(<2 x float> %x) {
 ; CHECK-LABEL: @fmul_splat_constant(
 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul <2 x float> [[X:%.*]], <float 4.200000e+01, float poison>

>From 8f1c984325bd679b2634a6173db69548da87ac71 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov at redhat.com>
Date: Tue, 21 May 2024 15:43:50 +0200
Subject: [PATCH 18/27] [InstCombine] Check for poison instead of undef in
 shuffle of unop transform

Otherwise this may not actually be a single-source shuffle.
---
 llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp | 2 +-
 llvm/test/Transforms/InstCombine/vec_shuffle.ll          | 7 +++----
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 1556d61981d51..ef30f28c3da7f 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -2462,7 +2462,7 @@ static Instruction *foldShuffleOfUnaryOps(ShuffleVectorInst &Shuf,
 
   // Match 1-input (unary) shuffle.
   // shuffle (fneg/fabs X), Mask --> fneg/fabs (shuffle X, Mask)
-  if (S0->hasOneUse() && match(Shuf.getOperand(1), m_Undef())) {
+  if (S0->hasOneUse() && match(Shuf.getOperand(1), m_Poison())) {
     Value *NewShuf = Builder.CreateShuffleVector(X, Shuf.getShuffleMask());
     if (IsFNeg)
       return UnaryOperator::CreateFNegFMF(NewShuf, S0);
diff --git a/llvm/test/Transforms/InstCombine/vec_shuffle.ll b/llvm/test/Transforms/InstCombine/vec_shuffle.ll
index 00c7a4ca32a02..4d7e9d9067e75 100644
--- a/llvm/test/Transforms/InstCombine/vec_shuffle.ll
+++ b/llvm/test/Transforms/InstCombine/vec_shuffle.ll
@@ -1280,12 +1280,11 @@ define <2 x float> @fneg(<2 x float> %x) {
   ret <2 x float> %r
 }
 
-; FIXME: This is a miscompile.
 define <2 x float> @fneg_not_single_source(<2 x float> %x) {
 ; CHECK-LABEL: @fneg_not_single_source(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> poison, <2 x i32> <i32 0, i32 poison>
-; CHECK-NEXT:    [[SPLAT:%.*]] = fneg <2 x float> [[TMP1]]
-; CHECK-NEXT:    ret <2 x float> [[SPLAT]]
+; CHECK-NEXT:    [[NEG:%.*]] = fneg <2 x float> [[X:%.*]]
+; CHECK-NEXT:    [[SPLAT1:%.*]] = insertelement <2 x float> [[NEG]], float undef, i64 1
+; CHECK-NEXT:    ret <2 x float> [[SPLAT1]]
 ;
   %neg = fneg <2 x float> %x
   %splat = shufflevector <2 x float> %neg, <2 x float> undef, <2 x i32> <i32 0, i32 2>

>From a53e568b16db60eaeb4886232841c2a884f484fc Mon Sep 17 00:00:00 2001
From: Qiongsi Wu <274595+qiongsiwu at users.noreply.github.com>
Date: Tue, 21 May 2024 09:45:01 -0400
Subject: [PATCH 19/27] [AIX] Set memrchr to unavailable (#92588)

`memrchr` is not available on AIX. This patch turns it off so the
optimizer will not generate it to cause link errors.
---
 llvm/lib/Analysis/TargetLibraryInfo.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/llvm/lib/Analysis/TargetLibraryInfo.cpp b/llvm/lib/Analysis/TargetLibraryInfo.cpp
index 7ce42447b6308..592caf2d0e23a 100644
--- a/llvm/lib/Analysis/TargetLibraryInfo.cpp
+++ b/llvm/lib/Analysis/TargetLibraryInfo.cpp
@@ -882,6 +882,9 @@ static void initializeLibCalls(TargetLibraryInfoImpl &TLI, const Triple &T,
     TLI.setUnavailable(LibFunc_vec_free);
   }
 
+  if (T.isOSAIX())
+    TLI.setUnavailable(LibFunc_memrchr);
+
   TLI.addVectorizableFunctionsFromVecLib(ClVectorLibrary, T);
 }
 

>From fbc798e4426b322ed0e03019c20d929f2eaade22 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov at redhat.com>
Date: Tue, 21 May 2024 15:47:46 +0200
Subject: [PATCH 20/27] [InstCombine] Use m_Poison instead of m_Undef (NFCI)

In this case, isIdentityWithExtract() should already ensure that
this is a single-source shuffle. This just makes things more
explicit.
---
 llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index ef30f28c3da7f..9dcd9ef07d74f 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -2549,7 +2549,7 @@ static Instruction *foldCastShuffle(ShuffleVectorInst &Shuf,
 /// Try to fold an extract subvector operation.
 static Instruction *foldIdentityExtractShuffle(ShuffleVectorInst &Shuf) {
   Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
-  if (!Shuf.isIdentityWithExtract() || !match(Op1, m_Undef()))
+  if (!Shuf.isIdentityWithExtract() || !match(Op1, m_Poison()))
     return nullptr;
 
   // Check if we are extracting all bits of an inserted scalar:
@@ -2578,10 +2578,10 @@ static Instruction *foldIdentityExtractShuffle(ShuffleVectorInst &Shuf) {
   // not allow arbitrary shuffle mask creation as a target-independent transform
   // (because we can't guarantee that will lower efficiently).
   //
-  // If the extracting shuffle has an undef mask element, it transfers to the
+  // If the extracting shuffle has an poison mask element, it transfers to the
   // new shuffle mask. Otherwise, copy the original mask element. Example:
-  //   shuf (shuf X, Y, <C0, C1, C2, undef, C4>), undef, <0, undef, 2, 3> -->
-  //   shuf X, Y, <C0, undef, C2, undef>
+  //   shuf (shuf X, Y, <C0, C1, C2, poison, C4>), poison, <0, poison, 2, 3> -->
+  //   shuf X, Y, <C0, poison, C2, poison>
   unsigned NumElts = cast<FixedVectorType>(Shuf.getType())->getNumElements();
   SmallVector<int, 16> NewMask(NumElts);
   assert(NumElts < Mask.size() &&

>From a15b685c2d868eaf408d05baa50baa3c9f5cc740 Mon Sep 17 00:00:00 2001
From: Erich Keane <ekeane at nvidia.com>
Date: Tue, 21 May 2024 06:51:25 -0700
Subject: [PATCH 21/27] [OpenACC] Implement 'reduction' sema for compute
 constructs (#92808)

'reduction' has a few restrictions over normal 'var-list' clauses:

1- On parallel, a num_gangs can only have 1 argument when combined with
reduction. These two aren't able to be combined on any other of the
compute constructs however.

2- The vars all must be 'numerical data types' types of some sort, or a
'composite of numerical data types'. A list of types is given in the
standard as a minimum, so we choose 'isScalar', which covers all of
these types and keeps types that are actually numeric. Other compilers
don't seem to implement the 'composite of numerical data types', though
we do.

3- Because of the above restrictions, member-of-composite is not
allowed, so any access via a memberexpr is disallowed. Array-element and
sub-arrays (aka array sections) are both permitted, so long as they meet
the requirements of #2.

This patch implements all of these for compute constructs.
---
 clang/include/clang/AST/OpenACCClause.h       |  29 ++
 .../clang/Basic/DiagnosticSemaKinds.td        |  18 +-
 clang/include/clang/Basic/OpenACCClauses.def  |   1 +
 clang/include/clang/Basic/OpenACCKinds.h      |  36 +++
 clang/include/clang/Parse/Parser.h            |   4 +-
 clang/include/clang/Sema/SemaOpenACC.h        |  29 +-
 clang/lib/AST/OpenACCClause.cpp               |  20 +-
 clang/lib/AST/StmtProfile.cpp                 |   6 +
 clang/lib/AST/TextNodeDumper.cpp              |   4 +
 clang/lib/Parse/ParseOpenACC.cpp              |  30 ++-
 clang/lib/Sema/SemaOpenACC.cpp                | 154 ++++++++++-
 clang/lib/Sema/TreeTransform.h                |  21 +-
 clang/lib/Serialization/ASTReader.cpp         |   8 +-
 clang/lib/Serialization/ASTWriter.cpp         |   8 +-
 .../ast-print-openacc-compute-construct.cpp   |  28 ++
 clang/test/ParserOpenACC/parse-clauses.c      |  26 +-
 .../compute-construct-attach-clause.c         |   2 +-
 .../compute-construct-clause-ast.cpp          | 248 ++++++++++++++++++
 .../compute-construct-copy-clause.c           |   8 +-
 .../compute-construct-copy-clause.cpp         |  16 +-
 .../compute-construct-copyin-clause.c         |  10 +-
 .../compute-construct-copyin-clause.cpp       |  16 +-
 .../compute-construct-copyout-clause.c        |  10 +-
 .../compute-construct-copyout-clause.cpp      |  16 +-
 .../compute-construct-create-clause.c         |  10 +-
 .../compute-construct-create-clause.cpp       |  16 +-
 .../compute-construct-device_type-clause.c    |   2 +-
 .../compute-construct-deviceptr-clause.c      |   2 +-
 .../compute-construct-firstprivate-clause.c   |   8 +-
 .../compute-construct-firstprivate-clause.cpp |  16 +-
 .../compute-construct-no_create-clause.c      |   8 +-
 .../compute-construct-no_create-clause.cpp    |  16 +-
 .../compute-construct-present-clause.c        |   8 +-
 .../compute-construct-present-clause.cpp      |  16 +-
 .../compute-construct-private-clause.c        |  10 +-
 .../compute-construct-private-clause.cpp      |  16 +-
 .../compute-construct-reduction-clause.c      | 107 ++++++++
 .../compute-construct-reduction-clause.cpp    | 175 ++++++++++++
 clang/tools/libclang/CIndex.cpp               |   4 +
 39 files changed, 1005 insertions(+), 157 deletions(-)
 create mode 100644 clang/test/SemaOpenACC/compute-construct-reduction-clause.c
 create mode 100644 clang/test/SemaOpenACC/compute-construct-reduction-clause.cpp

diff --git a/clang/include/clang/AST/OpenACCClause.h b/clang/include/clang/AST/OpenACCClause.h
index 607a2b9d65367..28ff8c44bd256 100644
--- a/clang/include/clang/AST/OpenACCClause.h
+++ b/clang/include/clang/AST/OpenACCClause.h
@@ -677,6 +677,35 @@ class OpenACCCreateClause final
          ArrayRef<Expr *> VarList, SourceLocation EndLoc);
 };
 
+class OpenACCReductionClause final
+    : public OpenACCClauseWithVarList,
+      public llvm::TrailingObjects<OpenACCReductionClause, Expr *> {
+  OpenACCReductionOperator Op;
+
+  OpenACCReductionClause(SourceLocation BeginLoc, SourceLocation LParenLoc,
+                         OpenACCReductionOperator Operator,
+                         ArrayRef<Expr *> VarList, SourceLocation EndLoc)
+      : OpenACCClauseWithVarList(OpenACCClauseKind::Reduction, BeginLoc,
+                                 LParenLoc, EndLoc),
+        Op(Operator) {
+    std::uninitialized_copy(VarList.begin(), VarList.end(),
+                            getTrailingObjects<Expr *>());
+    setExprs(MutableArrayRef(getTrailingObjects<Expr *>(), VarList.size()));
+  }
+
+public:
+  static bool classof(const OpenACCClause *C) {
+    return C->getClauseKind() == OpenACCClauseKind::Reduction;
+  }
+
+  static OpenACCReductionClause *
+  Create(const ASTContext &C, SourceLocation BeginLoc, SourceLocation LParenLoc,
+         OpenACCReductionOperator Operator, ArrayRef<Expr *> VarList,
+         SourceLocation EndLoc);
+
+  OpenACCReductionOperator getReductionOp() const { return Op; }
+};
+
 template <class Impl> class OpenACCClauseVisitor {
   Impl &getDerived() { return static_cast<Impl &>(*this); }
 
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index e3c65cba4886a..c7dea1d54d063 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -12343,7 +12343,8 @@ def err_acc_num_gangs_num_args
             "provided}0">;
 def err_acc_not_a_var_ref
     : Error<"OpenACC variable is not a valid variable name, sub-array, array "
-            "element, or composite variable member">;
+            "element,%select{| member of a composite variable,}0 or composite "
+            "variable member">;
 def err_acc_typecheck_subarray_value
     : Error<"OpenACC sub-array subscripted value is not an array or pointer">;
 def err_acc_subarray_function_type
@@ -12374,5 +12375,18 @@ def note_acc_expected_pointer_var : Note<"expected variable of pointer type">;
 def err_acc_clause_after_device_type
     : Error<"OpenACC clause '%0' may not follow a '%1' clause in a "
             "compute construct">;
-
+def err_acc_reduction_num_gangs_conflict
+    : Error<
+          "OpenACC 'reduction' clause may not appear on a 'parallel' construct "
+          "with a 'num_gangs' clause with more than 1 argument, have %0">;
+def err_acc_reduction_type
+    : Error<"OpenACC 'reduction' variable must be of scalar type, sub-array, or a "
+            "composite of scalar types;%select{| sub-array base}1 type is %0">;
+def err_acc_reduction_composite_type
+    : Error<"OpenACC 'reduction' variable must be a composite of scalar types; "
+            "%1 %select{is not a class or struct|is incomplete|is not an "
+            "aggregate}0">;
+def err_acc_reduction_composite_member_type :Error<
+    "OpenACC 'reduction' composite variable must not have non-scalar field">;
+def note_acc_reduction_composite_member_loc : Note<"invalid field is here">;
 } // end of sema component.
diff --git a/clang/include/clang/Basic/OpenACCClauses.def b/clang/include/clang/Basic/OpenACCClauses.def
index 7ecc51799468c..3e464abaafd92 100644
--- a/clang/include/clang/Basic/OpenACCClauses.def
+++ b/clang/include/clang/Basic/OpenACCClauses.def
@@ -46,6 +46,7 @@ VISIT_CLAUSE(NumGangs)
 VISIT_CLAUSE(NumWorkers)
 VISIT_CLAUSE(Present)
 VISIT_CLAUSE(Private)
+VISIT_CLAUSE(Reduction)
 VISIT_CLAUSE(Self)
 VISIT_CLAUSE(VectorLength)
 VISIT_CLAUSE(Wait)
diff --git a/clang/include/clang/Basic/OpenACCKinds.h b/clang/include/clang/Basic/OpenACCKinds.h
index 0e38a04e7164b..7b9d619a8aec6 100644
--- a/clang/include/clang/Basic/OpenACCKinds.h
+++ b/clang/include/clang/Basic/OpenACCKinds.h
@@ -514,6 +514,42 @@ enum class OpenACCReductionOperator {
   /// Invalid Reduction Clause Kind.
   Invalid,
 };
+
+template <typename StreamTy>
+inline StreamTy &printOpenACCReductionOperator(StreamTy &Out,
+                                               OpenACCReductionOperator Op) {
+  switch (Op) {
+  case OpenACCReductionOperator::Addition:
+    return Out << "+";
+  case OpenACCReductionOperator::Multiplication:
+    return Out << "*";
+  case OpenACCReductionOperator::Max:
+    return Out << "max";
+  case OpenACCReductionOperator::Min:
+    return Out << "min";
+  case OpenACCReductionOperator::BitwiseAnd:
+    return Out << "&";
+  case OpenACCReductionOperator::BitwiseOr:
+    return Out << "|";
+  case OpenACCReductionOperator::BitwiseXOr:
+    return Out << "^";
+  case OpenACCReductionOperator::And:
+    return Out << "&&";
+  case OpenACCReductionOperator::Or:
+    return Out << "||";
+  case OpenACCReductionOperator::Invalid:
+    return Out << "<invalid>";
+  }
+  llvm_unreachable("Unknown reduction operator kind");
+}
+inline const StreamingDiagnostic &operator<<(const StreamingDiagnostic &Out,
+                                             OpenACCReductionOperator Op) {
+  return printOpenACCReductionOperator(Out, Op);
+}
+inline llvm::raw_ostream &operator<<(llvm::raw_ostream &Out,
+                                     OpenACCReductionOperator Op) {
+  return printOpenACCReductionOperator(Out, Op);
+}
 } // namespace clang
 
 #endif // LLVM_CLANG_BASIC_OPENACCKINDS_H
diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h
index 5f04664141d29..3c4ab649e3b4c 100644
--- a/clang/include/clang/Parse/Parser.h
+++ b/clang/include/clang/Parse/Parser.h
@@ -3686,9 +3686,9 @@ class Parser : public CodeCompletionHandler {
 
   using OpenACCVarParseResult = std::pair<ExprResult, OpenACCParseCanContinue>;
   /// Parses a single variable in a variable list for OpenACC.
-  OpenACCVarParseResult ParseOpenACCVar();
+  OpenACCVarParseResult ParseOpenACCVar(OpenACCClauseKind CK);
   /// Parses the variable list for the variety of places that take a var-list.
-  llvm::SmallVector<Expr *> ParseOpenACCVarList();
+  llvm::SmallVector<Expr *> ParseOpenACCVarList(OpenACCClauseKind CK);
   /// Parses any parameters for an OpenACC Clause, including required/optional
   /// parens.
   OpenACCClauseParseResult
diff --git a/clang/include/clang/Sema/SemaOpenACC.h b/clang/include/clang/Sema/SemaOpenACC.h
index f838fa97d33a2..6f69fa08939b8 100644
--- a/clang/include/clang/Sema/SemaOpenACC.h
+++ b/clang/include/clang/Sema/SemaOpenACC.h
@@ -66,9 +66,14 @@ class SemaOpenACC : public SemaBase {
     struct DeviceTypeDetails {
       SmallVector<DeviceTypeArgument> Archs;
     };
+    struct ReductionDetails {
+      OpenACCReductionOperator Op;
+      SmallVector<Expr *> VarList;
+    };
 
     std::variant<std::monostate, DefaultDetails, ConditionDetails,
-                 IntExprDetails, VarListDetails, WaitDetails, DeviceTypeDetails>
+                 IntExprDetails, VarListDetails, WaitDetails, DeviceTypeDetails,
+                 ReductionDetails>
         Details = std::monostate{};
 
   public:
@@ -170,6 +175,10 @@ class SemaOpenACC : public SemaBase {
       return const_cast<OpenACCParsedClause *>(this)->getIntExprs();
     }
 
+    OpenACCReductionOperator getReductionOp() const {
+      return std::get<ReductionDetails>(Details).Op;
+    }
+
     ArrayRef<Expr *> getVarList() {
       assert((ClauseKind == OpenACCClauseKind::Private ||
               ClauseKind == OpenACCClauseKind::NoCreate ||
@@ -188,8 +197,13 @@ class SemaOpenACC : public SemaBase {
               ClauseKind == OpenACCClauseKind::PresentOrCreate ||
               ClauseKind == OpenACCClauseKind::Attach ||
               ClauseKind == OpenACCClauseKind::DevicePtr ||
+              ClauseKind == OpenACCClauseKind::Reduction ||
               ClauseKind == OpenACCClauseKind::FirstPrivate) &&
              "Parsed clause kind does not have a var-list");
+
+      if (ClauseKind == OpenACCClauseKind::Reduction)
+        return std::get<ReductionDetails>(Details).VarList;
+
       return std::get<VarListDetails>(Details).VarList;
     }
 
@@ -334,6 +348,13 @@ class SemaOpenACC : public SemaBase {
       Details = VarListDetails{std::move(VarList), IsReadOnly, IsZero};
     }
 
+    void setReductionDetails(OpenACCReductionOperator Op,
+                             llvm::SmallVector<Expr *> &&VarList) {
+      assert(ClauseKind == OpenACCClauseKind::Reduction &&
+             "reduction details only valid on reduction");
+      Details = ReductionDetails{Op, std::move(VarList)};
+    }
+
     void setWaitDetails(Expr *DevNum, SourceLocation QueuesLoc,
                         llvm::SmallVector<Expr *> &&IntExprs) {
       assert(ClauseKind == OpenACCClauseKind::Wait &&
@@ -394,7 +415,11 @@ class SemaOpenACC : public SemaBase {
 
   /// Called when encountering a 'var' for OpenACC, ensures it is actually a
   /// declaration reference to a variable of the correct type.
-  ExprResult ActOnVar(Expr *VarExpr);
+  ExprResult ActOnVar(OpenACCClauseKind CK, Expr *VarExpr);
+
+  /// Called while semantically analyzing the reduction clause, ensuring the var
+  /// is the correct kind of reference.
+  ExprResult CheckReductionVar(Expr *VarExpr);
 
   /// Called to check the 'var' type is a variable of pointer type, necessary
   /// for 'deviceptr' and 'attach' clauses. Returns true on success.
diff --git a/clang/lib/AST/OpenACCClause.cpp b/clang/lib/AST/OpenACCClause.cpp
index 8ff6dabcbc48e..cb2c7f98be75c 100644
--- a/clang/lib/AST/OpenACCClause.cpp
+++ b/clang/lib/AST/OpenACCClause.cpp
@@ -35,7 +35,7 @@ bool OpenACCClauseWithVarList::classof(const OpenACCClause *C) {
          OpenACCAttachClause::classof(C) || OpenACCNoCreateClause::classof(C) ||
          OpenACCPresentClause::classof(C) || OpenACCCopyClause::classof(C) ||
          OpenACCCopyInClause::classof(C) || OpenACCCopyOutClause::classof(C) ||
-         OpenACCCreateClause::classof(C);
+         OpenACCReductionClause::classof(C) || OpenACCCreateClause::classof(C);
 }
 bool OpenACCClauseWithCondition::classof(const OpenACCClause *C) {
   return OpenACCIfClause::classof(C) || OpenACCSelfClause::classof(C);
@@ -310,6 +310,16 @@ OpenACCDeviceTypeClause *OpenACCDeviceTypeClause::Create(
       OpenACCDeviceTypeClause(K, BeginLoc, LParenLoc, Archs, EndLoc);
 }
 
+OpenACCReductionClause *OpenACCReductionClause::Create(
+    const ASTContext &C, SourceLocation BeginLoc, SourceLocation LParenLoc,
+    OpenACCReductionOperator Operator, ArrayRef<Expr *> VarList,
+    SourceLocation EndLoc) {
+  void *Mem = C.Allocate(
+      OpenACCReductionClause::totalSizeToAlloc<Expr *>(VarList.size()));
+  return new (Mem)
+      OpenACCReductionClause(BeginLoc, LParenLoc, Operator, VarList, EndLoc);
+}
+
 //===----------------------------------------------------------------------===//
 //  OpenACC clauses printing methods
 //===----------------------------------------------------------------------===//
@@ -445,6 +455,14 @@ void OpenACCClausePrinter::VisitCreateClause(const OpenACCCreateClause &C) {
   OS << ")";
 }
 
+void OpenACCClausePrinter::VisitReductionClause(
+    const OpenACCReductionClause &C) {
+  OS << "reduction(" << C.getReductionOp() << ": ";
+  llvm::interleaveComma(C.getVarList(), OS,
+                        [&](const Expr *E) { printExpr(E); });
+  OS << ")";
+}
+
 void OpenACCClausePrinter::VisitWaitClause(const OpenACCWaitClause &C) {
   OS << "wait";
   if (!C.getLParenLoc().isInvalid()) {
diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp
index caab4ab0ef160..00b8c43af035c 100644
--- a/clang/lib/AST/StmtProfile.cpp
+++ b/clang/lib/AST/StmtProfile.cpp
@@ -2588,6 +2588,12 @@ void OpenACCClauseProfiler::VisitWaitClause(const OpenACCWaitClause &Clause) {
 /// Nothing to do here, there are no sub-statements.
 void OpenACCClauseProfiler::VisitDeviceTypeClause(
     const OpenACCDeviceTypeClause &Clause) {}
+
+void OpenACCClauseProfiler::VisitReductionClause(
+    const OpenACCReductionClause &Clause) {
+  for (auto *E : Clause.getVarList())
+    Profiler.VisitStmt(E);
+}
 } // namespace
 
 void StmtProfiler::VisitOpenACCComputeConstruct(
diff --git a/clang/lib/AST/TextNodeDumper.cpp b/clang/lib/AST/TextNodeDumper.cpp
index efcd74717a4e2..4a1e94ffe283b 100644
--- a/clang/lib/AST/TextNodeDumper.cpp
+++ b/clang/lib/AST/TextNodeDumper.cpp
@@ -457,6 +457,10 @@ void TextNodeDumper::Visit(const OpenACCClause *C) {
           });
       OS << ")";
       break;
+    case OpenACCClauseKind::Reduction:
+      OS << " clause Operator: "
+         << cast<OpenACCReductionClause>(C)->getReductionOp();
+      break;
     default:
       // Nothing to do here.
       break;
diff --git a/clang/lib/Parse/ParseOpenACC.cpp b/clang/lib/Parse/ParseOpenACC.cpp
index 5db3036b00030..e9c60f76165b6 100644
--- a/clang/lib/Parse/ParseOpenACC.cpp
+++ b/clang/lib/Parse/ParseOpenACC.cpp
@@ -920,7 +920,8 @@ Parser::OpenACCClauseParseResult Parser::ParseOpenACCClauseParams(
     case OpenACCClauseKind::PresentOrCopyIn: {
       bool IsReadOnly = tryParseAndConsumeSpecialTokenKind(
           *this, OpenACCSpecialTokenKind::ReadOnly, ClauseKind);
-      ParsedClause.setVarListDetails(ParseOpenACCVarList(), IsReadOnly,
+      ParsedClause.setVarListDetails(ParseOpenACCVarList(ClauseKind),
+                                     IsReadOnly,
                                      /*IsZero=*/false);
       break;
     }
@@ -932,16 +933,17 @@ Parser::OpenACCClauseParseResult Parser::ParseOpenACCClauseParams(
     case OpenACCClauseKind::PresentOrCopyOut: {
       bool IsZero = tryParseAndConsumeSpecialTokenKind(
           *this, OpenACCSpecialTokenKind::Zero, ClauseKind);
-      ParsedClause.setVarListDetails(ParseOpenACCVarList(),
+      ParsedClause.setVarListDetails(ParseOpenACCVarList(ClauseKind),
                                      /*IsReadOnly=*/false, IsZero);
       break;
     }
-    case OpenACCClauseKind::Reduction:
+    case OpenACCClauseKind::Reduction: {
       // If we're missing a clause-kind (or it is invalid), see if we can parse
       // the var-list anyway.
-      ParseReductionOperator(*this);
-      ParseOpenACCVarList();
+      OpenACCReductionOperator Op = ParseReductionOperator(*this);
+      ParsedClause.setReductionDetails(Op, ParseOpenACCVarList(ClauseKind));
       break;
+    }
     case OpenACCClauseKind::Self:
       // The 'self' clause is a var-list instead of a 'condition' in the case of
       // the 'update' clause, so we have to handle it here.  U se an assert to
@@ -955,11 +957,11 @@ Parser::OpenACCClauseParseResult Parser::ParseOpenACCClauseParams(
     case OpenACCClauseKind::Host:
     case OpenACCClauseKind::Link:
     case OpenACCClauseKind::UseDevice:
-      ParseOpenACCVarList();
+      ParseOpenACCVarList(ClauseKind);
       break;
     case OpenACCClauseKind::Attach:
     case OpenACCClauseKind::DevicePtr:
-      ParsedClause.setVarListDetails(ParseOpenACCVarList(),
+      ParsedClause.setVarListDetails(ParseOpenACCVarList(ClauseKind),
                                      /*IsReadOnly=*/false, /*IsZero=*/false);
       break;
     case OpenACCClauseKind::Copy:
@@ -969,7 +971,7 @@ Parser::OpenACCClauseParseResult Parser::ParseOpenACCClauseParams(
     case OpenACCClauseKind::NoCreate:
     case OpenACCClauseKind::Present:
     case OpenACCClauseKind::Private:
-      ParsedClause.setVarListDetails(ParseOpenACCVarList(),
+      ParsedClause.setVarListDetails(ParseOpenACCVarList(ClauseKind),
                                      /*IsReadOnly=*/false, /*IsZero=*/false);
       break;
     case OpenACCClauseKind::Collapse: {
@@ -1278,7 +1280,7 @@ ExprResult Parser::ParseOpenACCBindClauseArgument() {
 /// - an array element
 /// - a member of a composite variable
 /// - a common block name between slashes (fortran only)
-Parser::OpenACCVarParseResult Parser::ParseOpenACCVar() {
+Parser::OpenACCVarParseResult Parser::ParseOpenACCVar(OpenACCClauseKind CK) {
   OpenACCArraySectionRAII ArraySections(*this);
 
   ExprResult Res = ParseAssignmentExpression();
@@ -1289,15 +1291,15 @@ Parser::OpenACCVarParseResult Parser::ParseOpenACCVar() {
   if (!Res.isUsable())
     return {Res, OpenACCParseCanContinue::Can};
 
-  Res = getActions().OpenACC().ActOnVar(Res.get());
+  Res = getActions().OpenACC().ActOnVar(CK, Res.get());
 
   return {Res, OpenACCParseCanContinue::Can};
 }
 
-llvm::SmallVector<Expr *> Parser::ParseOpenACCVarList() {
+llvm::SmallVector<Expr *> Parser::ParseOpenACCVarList(OpenACCClauseKind CK) {
   llvm::SmallVector<Expr *> Vars;
 
-  auto [Res, CanContinue] = ParseOpenACCVar();
+  auto [Res, CanContinue] = ParseOpenACCVar(CK);
   if (Res.isUsable()) {
     Vars.push_back(Res.get());
   } else if (CanContinue == OpenACCParseCanContinue::Cannot) {
@@ -1308,7 +1310,7 @@ llvm::SmallVector<Expr *> Parser::ParseOpenACCVarList() {
   while (!getCurToken().isOneOf(tok::r_paren, tok::annot_pragma_openacc_end)) {
     ExpectAndConsume(tok::comma);
 
-    auto [Res, CanContinue] = ParseOpenACCVar();
+    auto [Res, CanContinue] = ParseOpenACCVar(CK);
 
     if (Res.isUsable()) {
       Vars.push_back(Res.get());
@@ -1342,7 +1344,7 @@ void Parser::ParseOpenACCCacheVarList() {
 
   // ParseOpenACCVarList should leave us before a r-paren, so no need to skip
   // anything here.
-  ParseOpenACCVarList();
+  ParseOpenACCVarList(OpenACCClauseKind::Invalid);
 }
 
 Parser::OpenACCDirectiveParseInfo Parser::ParseOpenACCDirective() {
diff --git a/clang/lib/Sema/SemaOpenACC.cpp b/clang/lib/Sema/SemaOpenACC.cpp
index f174b2fa63c6a..09d91b31cfe5f 100644
--- a/clang/lib/Sema/SemaOpenACC.cpp
+++ b/clang/lib/Sema/SemaOpenACC.cpp
@@ -233,6 +233,19 @@ bool doesClauseApplyToDirective(OpenACCDirectiveKind DirectiveKind,
       return false;
     }
 
+  case OpenACCClauseKind::Reduction:
+    switch (DirectiveKind) {
+    case OpenACCDirectiveKind::Parallel:
+    case OpenACCDirectiveKind::Serial:
+    case OpenACCDirectiveKind::Loop:
+    case OpenACCDirectiveKind::ParallelLoop:
+    case OpenACCDirectiveKind::SerialLoop:
+    case OpenACCDirectiveKind::KernelsLoop:
+      return true;
+    default:
+      return false;
+    }
+
   default:
     // Do nothing so we can go to the 'unimplemented' diagnostic instead.
     return true;
@@ -281,7 +294,6 @@ bool checkValidAfterDeviceType(
     return true;
   }
 }
-
 } // namespace
 
 SemaOpenACC::SemaOpenACC(Sema &S) : SemaBase(S) {}
@@ -426,6 +438,22 @@ SemaOpenACC::ActOnClause(ArrayRef<const OpenACCClause *> ExistingClauses,
           << /*NoArgs=*/1 << Clause.getDirectiveKind() << MaxArgs
           << Clause.getIntExprs().size();
 
+    // OpenACC 3.3 Section 2.5.4:
+    // A reduction clause may not appear on a parallel construct with a
+    // num_gangs clause that has more than one argument.
+    if (Clause.getDirectiveKind() == OpenACCDirectiveKind::Parallel &&
+        Clause.getIntExprs().size() > 1) {
+      auto *Parallel =
+          llvm::find_if(ExistingClauses, llvm::IsaPred<OpenACCReductionClause>);
+
+      if (Parallel != ExistingClauses.end()) {
+        Diag(Clause.getBeginLoc(), diag::err_acc_reduction_num_gangs_conflict)
+            << Clause.getIntExprs().size();
+        Diag((*Parallel)->getBeginLoc(), diag::note_acc_previous_clause_here);
+        return nullptr;
+      }
+    }
+
     // Create the AST node for the clause even if the number of expressions is
     // incorrect.
     return OpenACCNumGangsClause::Create(
@@ -706,6 +734,46 @@ SemaOpenACC::ActOnClause(ArrayRef<const OpenACCClause *> ExistingClauses,
         Clause.getLParenLoc(), Clause.getDeviceTypeArchitectures(),
         Clause.getEndLoc());
   }
+  case OpenACCClauseKind::Reduction: {
+    // Restrictions only properly implemented on 'compute' constructs, and
+    // 'compute' constructs are the only construct that can do anything with
+    // this yet, so skip/treat as unimplemented in this case.
+    if (!isOpenACCComputeDirectiveKind(Clause.getDirectiveKind()))
+      break;
+
+    // OpenACC 3.3 Section 2.5.4:
+    // A reduction clause may not appear on a parallel construct with a
+    // num_gangs clause that has more than one argument.
+    if (Clause.getDirectiveKind() == OpenACCDirectiveKind::Parallel) {
+      auto NumGangsClauses = llvm::make_filter_range(
+          ExistingClauses, llvm::IsaPred<OpenACCNumGangsClause>);
+
+      for (auto *NGC : NumGangsClauses) {
+        unsigned NumExprs =
+            cast<OpenACCNumGangsClause>(NGC)->getIntExprs().size();
+
+        if (NumExprs > 1) {
+          Diag(Clause.getBeginLoc(), diag::err_acc_reduction_num_gangs_conflict)
+              << NumExprs;
+          Diag(NGC->getBeginLoc(), diag::note_acc_previous_clause_here);
+          return nullptr;
+        }
+      }
+    }
+
+    SmallVector<Expr *> ValidVars;
+
+    for (Expr *Var : Clause.getVarList()) {
+      ExprResult Res = CheckReductionVar(Var);
+
+      if (Res.isUsable())
+        ValidVars.push_back(Res.get());
+    }
+
+    return OpenACCReductionClause::Create(
+        getASTContext(), Clause.getBeginLoc(), Clause.getLParenLoc(),
+        Clause.getReductionOp(), ValidVars, Clause.getEndLoc());
+  }
   default:
     break;
   }
@@ -715,6 +783,66 @@ SemaOpenACC::ActOnClause(ArrayRef<const OpenACCClause *> ExistingClauses,
   return nullptr;
 }
 
+/// OpenACC 3.3 section 2.5.15:
+/// At a mininmum, the supported data types include ... the numerical data types
+/// in C, C++, and Fortran.
+///
+/// If the reduction var is a composite variable, each
+/// member of the composite variable must be a supported datatype for the
+/// reduction operation.
+ExprResult SemaOpenACC::CheckReductionVar(Expr *VarExpr) {
+  VarExpr = VarExpr->IgnoreParenCasts();
+
+  auto TypeIsValid = [](QualType Ty) {
+    return Ty->isDependentType() || Ty->isScalarType();
+  };
+
+  if (isa<ArraySectionExpr>(VarExpr)) {
+    Expr *ASExpr = VarExpr;
+    QualType BaseTy = ArraySectionExpr::getBaseOriginalType(ASExpr);
+    QualType EltTy = getASTContext().getBaseElementType(BaseTy);
+
+    if (!TypeIsValid(EltTy)) {
+      Diag(VarExpr->getExprLoc(), diag::err_acc_reduction_type)
+          << EltTy << /*Sub array base type*/ 1;
+      return ExprError();
+    }
+  } else if (auto *RD = VarExpr->getType()->getAsRecordDecl()) {
+    if (!RD->isStruct() && !RD->isClass()) {
+      Diag(VarExpr->getExprLoc(), diag::err_acc_reduction_composite_type)
+          << /*not class or struct*/ 0 << VarExpr->getType();
+      return ExprError();
+    }
+
+    if (!RD->isCompleteDefinition()) {
+      Diag(VarExpr->getExprLoc(), diag::err_acc_reduction_composite_type)
+          << /*incomplete*/ 1 << VarExpr->getType();
+      return ExprError();
+    }
+    if (const auto *CXXRD = dyn_cast<CXXRecordDecl>(RD);
+        CXXRD && !CXXRD->isAggregate()) {
+      Diag(VarExpr->getExprLoc(), diag::err_acc_reduction_composite_type)
+          << /*aggregate*/ 2 << VarExpr->getType();
+      return ExprError();
+    }
+
+    for (FieldDecl *FD : RD->fields()) {
+      if (!TypeIsValid(FD->getType())) {
+        Diag(VarExpr->getExprLoc(),
+             diag::err_acc_reduction_composite_member_type);
+        Diag(FD->getLocation(), diag::note_acc_reduction_composite_member_loc);
+        return ExprError();
+      }
+    }
+  } else if (!TypeIsValid(VarExpr->getType())) {
+    Diag(VarExpr->getExprLoc(), diag::err_acc_reduction_type)
+        << VarExpr->getType() << /*Sub array base type*/ 0;
+    return ExprError();
+  }
+
+  return VarExpr;
+}
+
 void SemaOpenACC::ActOnConstruct(OpenACCDirectiveKind K,
                                  SourceLocation StartLoc) {
   switch (K) {
@@ -864,9 +992,7 @@ bool SemaOpenACC::CheckVarIsPointerType(OpenACCClauseKind ClauseKind,
   return false;
 }
 
-ExprResult SemaOpenACC::ActOnVar(Expr *VarExpr) {
-  // We still need to retain the array subscript/subarray exprs, so work on a
-  // copy.
+ExprResult SemaOpenACC::ActOnVar(OpenACCClauseKind CK, Expr *VarExpr) {
   Expr *CurVarExpr = VarExpr->IgnoreParenImpCasts();
 
   // Sub-arrays/subscript-exprs are fine as long as the base is a
@@ -882,14 +1008,19 @@ ExprResult SemaOpenACC::ActOnVar(Expr *VarExpr) {
   // References to a VarDecl are fine.
   if (const auto *DRE = dyn_cast<DeclRefExpr>(CurVarExpr)) {
     if (isa<VarDecl, NonTypeTemplateParmDecl>(
-            DRE->getDecl()->getCanonicalDecl()))
+            DRE->getFoundDecl()->getCanonicalDecl()))
       return VarExpr;
   }
 
+  // If CK is a Reduction, this special cases for OpenACC3.3 2.5.15: "A var in a
+  // reduction clause must be a scalar variable name, an aggregate variable
+  // name, an array element, or a subarray.
   // A MemberExpr that references a Field is valid.
-  if (const auto *ME = dyn_cast<MemberExpr>(CurVarExpr)) {
-    if (isa<FieldDecl>(ME->getMemberDecl()->getCanonicalDecl()))
-      return VarExpr;
+  if (CK != OpenACCClauseKind::Reduction) {
+    if (const auto *ME = dyn_cast<MemberExpr>(CurVarExpr)) {
+      if (isa<FieldDecl>(ME->getMemberDecl()->getCanonicalDecl()))
+        return VarExpr;
+    }
   }
 
   // Referring to 'this' is always OK.
@@ -898,7 +1029,9 @@ ExprResult SemaOpenACC::ActOnVar(Expr *VarExpr) {
 
   // Nothing really we can do here, as these are dependent.  So just return they
   // are valid.
-  if (isa<DependentScopeDeclRefExpr, CXXDependentScopeMemberExpr>(CurVarExpr))
+  if (isa<DependentScopeDeclRefExpr>(CurVarExpr) ||
+      (CK != OpenACCClauseKind::Reduction &&
+       isa<CXXDependentScopeMemberExpr>(CurVarExpr)))
     return VarExpr;
 
   // There isn't really anything we can do in the case of a recovery expr, so
@@ -906,7 +1039,8 @@ ExprResult SemaOpenACC::ActOnVar(Expr *VarExpr) {
   if (isa<RecoveryExpr>(CurVarExpr))
     return ExprError();
 
-  Diag(VarExpr->getExprLoc(), diag::err_acc_not_a_var_ref);
+  Diag(VarExpr->getExprLoc(), diag::err_acc_not_a_var_ref)
+      << (CK != OpenACCClauseKind::Reduction);
   return ExprError();
 }
 
diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h
index a9e4987eda2b5..6b53c2490cc49 100644
--- a/clang/lib/Sema/TreeTransform.h
+++ b/clang/lib/Sema/TreeTransform.h
@@ -11126,7 +11126,8 @@ class OpenACCClauseTransform final
       if (!Res.isUsable())
         continue;
 
-      Res = Self.getSema().OpenACC().ActOnVar(Res.get());
+      Res = Self.getSema().OpenACC().ActOnVar(ParsedClause.getClauseKind(),
+                                              Res.get());
 
       if (Res.isUsable())
         InstantiatedVarList.push_back(Res.get());
@@ -11486,6 +11487,24 @@ void OpenACCClauseTransform<Derived>::VisitDeviceTypeClause(
       ParsedClause.getBeginLoc(), ParsedClause.getLParenLoc(),
       C.getArchitectures(), ParsedClause.getEndLoc());
 }
+
+template <typename Derived>
+void OpenACCClauseTransform<Derived>::VisitReductionClause(
+    const OpenACCReductionClause &C) {
+  SmallVector<Expr *> TransformedVars = VisitVarList(C.getVarList());
+  SmallVector<Expr *> ValidVars;
+
+  for (Expr *Var : TransformedVars) {
+    ExprResult Res = Self.getSema().OpenACC().CheckReductionVar(Var);
+    if (Res.isUsable())
+      ValidVars.push_back(Res.get());
+  }
+
+  NewClause = OpenACCReductionClause::Create(
+      Self.getSema().getASTContext(), ParsedClause.getBeginLoc(),
+      ParsedClause.getLParenLoc(), C.getReductionOp(), ValidVars,
+      ParsedClause.getEndLoc());
+}
 } // namespace
 template <typename Derived>
 OpenACCClause *TreeTransform<Derived>::TransformOpenACCClause(
diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index f50f9569c0a5e..d7fc6697eaf74 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -11921,6 +11921,13 @@ OpenACCClause *ASTRecordReader::readOpenACCClause() {
     return OpenACCDeviceTypeClause::Create(getContext(), ClauseKind, BeginLoc,
                                            LParenLoc, Archs, EndLoc);
   }
+  case OpenACCClauseKind::Reduction: {
+    SourceLocation LParenLoc = readSourceLocation();
+    OpenACCReductionOperator Op = readEnum<OpenACCReductionOperator>();
+    llvm::SmallVector<Expr *> VarList = readOpenACCVarList();
+    return OpenACCReductionClause::Create(getContext(), BeginLoc, LParenLoc, Op,
+                                          VarList, EndLoc);
+  }
 
   case OpenACCClauseKind::Finalize:
   case OpenACCClauseKind::IfPresent:
@@ -11937,7 +11944,6 @@ OpenACCClause *ASTRecordReader::readOpenACCClause() {
   case OpenACCClauseKind::DeviceResident:
   case OpenACCClauseKind::Host:
   case OpenACCClauseKind::Link:
-  case OpenACCClauseKind::Reduction:
   case OpenACCClauseKind::Collapse:
   case OpenACCClauseKind::Bind:
   case OpenACCClauseKind::DeviceNum:
diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
index 1d6d96932ba2c..00b0e48083217 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -7959,6 +7959,13 @@ void ASTRecordWriter::writeOpenACCClause(const OpenACCClause *C) {
     }
     return;
   }
+  case OpenACCClauseKind::Reduction: {
+    const auto *RC = cast<OpenACCReductionClause>(C);
+    writeSourceLocation(RC->getLParenLoc());
+    writeEnum(RC->getReductionOp());
+    writeOpenACCVarList(RC);
+    return;
+  }
 
   case OpenACCClauseKind::Finalize:
   case OpenACCClauseKind::IfPresent:
@@ -7975,7 +7982,6 @@ void ASTRecordWriter::writeOpenACCClause(const OpenACCClause *C) {
   case OpenACCClauseKind::DeviceResident:
   case OpenACCClauseKind::Host:
   case OpenACCClauseKind::Link:
-  case OpenACCClauseKind::Reduction:
   case OpenACCClauseKind::Collapse:
   case OpenACCClauseKind::Bind:
   case OpenACCClauseKind::DeviceNum:
diff --git a/clang/test/AST/ast-print-openacc-compute-construct.cpp b/clang/test/AST/ast-print-openacc-compute-construct.cpp
index 19965e7491414..fe580c86ac8ea 100644
--- a/clang/test/AST/ast-print-openacc-compute-construct.cpp
+++ b/clang/test/AST/ast-print-openacc-compute-construct.cpp
@@ -130,5 +130,33 @@ void foo() {
 //CHECK: #pragma acc parallel device_type(SomeStructImpl)
 #pragma acc parallel device_type (SomeStructImpl)
   while(true);
+
+//CHECK: #pragma acc parallel reduction(+: iPtr)
+#pragma acc parallel reduction(+: iPtr)
+  while(true);
+//CHECK: #pragma acc parallel reduction(*: i)
+#pragma acc parallel reduction(*: i)
+  while(true);
+//CHECK: #pragma acc parallel reduction(max: SomeB)
+#pragma acc parallel reduction(max: SomeB)
+  while(true);
+//CHECK: #pragma acc parallel reduction(min: iPtr)
+#pragma acc parallel reduction(min: iPtr)
+  while(true);
+//CHECK: #pragma acc parallel reduction(&: i)
+#pragma acc parallel reduction(&: i)
+  while(true);
+//CHECK: #pragma acc parallel reduction(|: SomeB)
+#pragma acc parallel reduction(|: SomeB)
+  while(true);
+//CHECK: #pragma acc parallel reduction(^: iPtr)
+#pragma acc parallel reduction(^: iPtr)
+  while(true);
+//CHECK: #pragma acc parallel reduction(&&: i)
+#pragma acc parallel reduction(&&: i)
+  while(true);
+//CHECK: #pragma acc parallel reduction(||: SomeB)
+#pragma acc parallel reduction(||: SomeB)
+  while(true);
 }
 
diff --git a/clang/test/ParserOpenACC/parse-clauses.c b/clang/test/ParserOpenACC/parse-clauses.c
index 694f28b86ec9f..49e749feb2ec7 100644
--- a/clang/test/ParserOpenACC/parse-clauses.c
+++ b/clang/test/ParserOpenACC/parse-clauses.c
@@ -831,52 +831,38 @@ void ReductionClauseParsing() {
   // expected-error at +1{{expected '('}}
 #pragma acc serial reduction
   for(;;){}
-  // expected-error at +3{{missing reduction operator, expected '+', '*', 'max', 'min', '&', '|', '^', '&&', or '||', follwed by a ':'}}
-  // expected-error at +2{{expected expression}}
-  // expected-warning at +1{{OpenACC clause 'reduction' not yet implemented, clause ignored}}
+  // expected-error at +2{{missing reduction operator, expected '+', '*', 'max', 'min', '&', '|', '^', '&&', or '||', follwed by a ':'}}
+  // expected-error at +1{{expected expression}}
 #pragma acc serial reduction()
   for(;;){}
-  // expected-error at +2{{missing reduction operator, expected '+', '*', 'max', 'min', '&', '|', '^', '&&', or '||', follwed by a ':'}}
-  // expected-warning at +1{{OpenACC clause 'reduction' not yet implemented, clause ignored}}
+  // expected-error at +1{{missing reduction operator, expected '+', '*', 'max', 'min', '&', '|', '^', '&&', or '||', follwed by a ':'}}
 #pragma acc serial reduction(Begin)
   for(;;){}
-  // expected-error at +2{{missing reduction operator, expected '+', '*', 'max', 'min', '&', '|', '^', '&&', or '||', follwed by a ':'}}
-  // expected-warning at +1{{OpenACC clause 'reduction' not yet implemented, clause ignored}}
+  // expected-error at +1{{missing reduction operator, expected '+', '*', 'max', 'min', '&', '|', '^', '&&', or '||', follwed by a ':'}}
 #pragma acc serial reduction(Begin, End)
   for(;;){}
-  // expected-error at +2{{missing reduction operator, expected '+', '*', 'max', 'min', '&', '|', '^', '&&', or '||', follwed by a ':'}}
-  // expected-warning at +1{{OpenACC clause 'reduction' not yet implemented, clause ignored}}
+  // expected-error at +1{{missing reduction operator, expected '+', '*', 'max', 'min', '&', '|', '^', '&&', or '||', follwed by a ':'}}
 #pragma acc serial reduction(Begin, End)
   for(;;){}
-  // expected-warning at +1{{OpenACC clause 'reduction' not yet implemented, clause ignored}}
 #pragma acc serial reduction(+:Begin)
   for(;;){}
-  // expected-warning at +1{{OpenACC clause 'reduction' not yet implemented, clause ignored}}
 #pragma acc serial reduction(+:Begin, End)
   for(;;){}
-  // expected-warning at +1{{OpenACC clause 'reduction' not yet implemented, clause ignored}}
 #pragma acc serial reduction(*: Begin, End)
   for(;;){}
-  // expected-warning at +1{{OpenACC clause 'reduction' not yet implemented, clause ignored}}
 #pragma acc serial reduction(max : Begin, End)
   for(;;){}
-  // expected-warning at +1{{OpenACC clause 'reduction' not yet implemented, clause ignored}}
 #pragma acc serial reduction(min: Begin, End)
   for(;;){}
-  // expected-warning at +1{{OpenACC clause 'reduction' not yet implemented, clause ignored}}
 #pragma acc serial reduction(&: Begin, End)
   for(;;){}
-  // expected-warning at +1{{OpenACC clause 'reduction' not yet implemented, clause ignored}}
 #pragma acc serial reduction(|: Begin, End)
   for(;;){}
-  // expected-warning at +1{{OpenACC clause 'reduction' not yet implemented, clause ignored}}
 #pragma acc serial reduction(^: Begin, End)
   for(;;){}
-  // expected-warning at +2{{OpenACC clause 'seq' not yet implemented, clause ignored}}
-  // expected-warning at +1{{OpenACC clause 'reduction' not yet implemented, clause ignored}}
+  // expected-warning at +1{{OpenACC clause 'seq' not yet implemented, clause ignored}}
 #pragma acc serial seq, reduction(&&: Begin, End)
   for(;;){}
-  // expected-warning at +2{{OpenACC clause 'reduction' not yet implemented, clause ignored}}
   // expected-warning at +1{{OpenACC clause 'seq' not yet implemented, clause ignored}}
 #pragma acc serial reduction(||: Begin, End), seq
   for(;;){}
diff --git a/clang/test/SemaOpenACC/compute-construct-attach-clause.c b/clang/test/SemaOpenACC/compute-construct-attach-clause.c
index de735308528ad..deca99f5bae47 100644
--- a/clang/test/SemaOpenACC/compute-construct-attach-clause.c
+++ b/clang/test/SemaOpenACC/compute-construct-attach-clause.c
@@ -16,7 +16,7 @@ void uses() {
 #pragma acc parallel attach(LocalInt)
   while (1);
 
-  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
 #pragma acc parallel attach(&LocalInt)
   while (1);
 
diff --git a/clang/test/SemaOpenACC/compute-construct-clause-ast.cpp b/clang/test/SemaOpenACC/compute-construct-clause-ast.cpp
index 6d2efcf81eb6e..69f65f4083ae7 100644
--- a/clang/test/SemaOpenACC/compute-construct-clause-ast.cpp
+++ b/clang/test/SemaOpenACC/compute-construct-clause-ast.cpp
@@ -40,6 +40,89 @@ void NormalFunc(int i, float f) {
   // CHECK-NEXT: WhileStmt
   // CHECK-NEXT: CXXBoolLiteralExpr
   // CHECK-NEXT: NullStmt
+
+#pragma acc parallel reduction(+: i)
+  while(true);
+  // CHECK-NEXT: OpenACCComputeConstruct{{.*}}parallel
+  // CHECK-NEXT: reduction clause Operator: +
+  // CHECK-NEXT: DeclRefExpr{{.*}} 'int' lvalue ParmVar{{.*}} 'i' 'int'
+  // CHECK-NEXT: WhileStmt
+  // CHECK-NEXT: CXXBoolLiteralExpr
+  // CHECK-NEXT: NullStmt
+
+#pragma acc serial reduction(*: f)
+  while(true);
+  // CHECK-NEXT: OpenACCComputeConstruct{{.*}}serial
+  // CHECK-NEXT: reduction clause Operator: *
+  // CHECK-NEXT: DeclRefExpr{{.*}} 'float' lvalue ParmVar{{.*}} 'f' 'float'
+  // CHECK-NEXT: WhileStmt
+  // CHECK-NEXT: CXXBoolLiteralExpr
+  // CHECK-NEXT: NullStmt
+
+#pragma acc parallel reduction(max: i)
+  while(true);
+  // CHECK-NEXT: OpenACCComputeConstruct{{.*}}parallel
+  // CHECK-NEXT: reduction clause Operator: max
+  // CHECK-NEXT: DeclRefExpr{{.*}} 'int' lvalue ParmVar{{.*}} 'i' 'int'
+  // CHECK-NEXT: WhileStmt
+  // CHECK-NEXT: CXXBoolLiteralExpr
+  // CHECK-NEXT: NullStmt
+
+#pragma acc serial reduction(min: f)
+  while(true);
+  // CHECK-NEXT: OpenACCComputeConstruct{{.*}}serial
+  // CHECK-NEXT: reduction clause Operator: min
+  // CHECK-NEXT: DeclRefExpr{{.*}} 'float' lvalue ParmVar{{.*}} 'f' 'float'
+  // CHECK-NEXT: WhileStmt
+  // CHECK-NEXT: CXXBoolLiteralExpr
+  // CHECK-NEXT: NullStmt
+
+#pragma acc parallel reduction(&: i)
+  while(true);
+  // CHECK-NEXT: OpenACCComputeConstruct{{.*}}parallel
+  // CHECK-NEXT: reduction clause Operator: &
+  // CHECK-NEXT: DeclRefExpr{{.*}} 'int' lvalue ParmVar{{.*}} 'i' 'int'
+  // CHECK-NEXT: WhileStmt
+  // CHECK-NEXT: CXXBoolLiteralExpr
+  // CHECK-NEXT: NullStmt
+
+#pragma acc serial reduction(|: f)
+  while(true);
+  // CHECK-NEXT: OpenACCComputeConstruct{{.*}}serial
+  // CHECK-NEXT: reduction clause Operator: |
+  // CHECK-NEXT: DeclRefExpr{{.*}} 'float' lvalue ParmVar{{.*}} 'f' 'float'
+  // CHECK-NEXT: WhileStmt
+  // CHECK-NEXT: CXXBoolLiteralExpr
+  // CHECK-NEXT: NullStmt
+
+
+#pragma acc parallel reduction(^: i)
+  while(true);
+  // CHECK-NEXT: OpenACCComputeConstruct{{.*}}parallel
+  // CHECK-NEXT: reduction clause Operator: ^
+  // CHECK-NEXT: DeclRefExpr{{.*}} 'int' lvalue ParmVar{{.*}} 'i' 'int'
+  // CHECK-NEXT: WhileStmt
+  // CHECK-NEXT: CXXBoolLiteralExpr
+  // CHECK-NEXT: NullStmt
+
+#pragma acc serial reduction(&&: f)
+  while(true);
+  // CHECK-NEXT: OpenACCComputeConstruct{{.*}}serial
+  // CHECK-NEXT: reduction clause Operator: &&
+  // CHECK-NEXT: DeclRefExpr{{.*}} 'float' lvalue ParmVar{{.*}} 'f' 'float'
+  // CHECK-NEXT: WhileStmt
+  // CHECK-NEXT: CXXBoolLiteralExpr
+  // CHECK-NEXT: NullStmt
+
+
+#pragma acc parallel reduction(||: i)
+  while(true);
+  // CHECK-NEXT: OpenACCComputeConstruct{{.*}}parallel
+  // CHECK-NEXT: reduction clause Operator: ||
+  // CHECK-NEXT: DeclRefExpr{{.*}} 'int' lvalue ParmVar{{.*}} 'i' 'int'
+  // CHECK-NEXT: WhileStmt
+  // CHECK-NEXT: CXXBoolLiteralExpr
+  // CHECK-NEXT: NullStmt
 }
 
 template<typename T>
@@ -154,6 +237,98 @@ void TemplFunc() {
   // CHECK-NEXT: CXXBoolLiteralExpr
   // CHECK-NEXT: NullStmt
 
+  T t;
+  // CHECK-NEXT: DeclStmt
+  // CHECK-NEXT: VarDecl{{.*}} t 'T'
+
+#pragma acc parallel reduction(+: t)
+  while(true);
+  // CHECK-NEXT: OpenACCComputeConstruct{{.*}}parallel
+  // CHECK-NEXT: reduction clause Operator: +
+  // CHECK-NEXT: DeclRefExpr{{.*}} 'T' lvalue Var{{.*}} 't' 'T'
+  // CHECK-NEXT: WhileStmt
+  // CHECK-NEXT: CXXBoolLiteralExpr
+  // CHECK-NEXT: NullStmt
+
+#pragma acc serial reduction(*: T::SomeFloat)
+  while(true);
+  // CHECK-NEXT: OpenACCComputeConstruct{{.*}}serial
+  // CHECK-NEXT: reduction clause Operator: *
+  // CHECK-NEXT: DependentScopeDeclRefExpr{{.*}} '<dependent type>' lvalue
+  // CHECK-NEXT: NestedNameSpecifier TypeSpec 'T'
+  // CHECK-NEXT: WhileStmt
+  // CHECK-NEXT: CXXBoolLiteralExpr
+  // CHECK-NEXT: NullStmt
+
+  typename T::IntTy i;
+  // CHECK-NEXT: DeclStmt
+  // CHECK-NEXT: VarDecl{{.*}} i 'typename T::IntTy'
+
+#pragma acc parallel reduction(max: i)
+  while(true);
+  // CHECK-NEXT: OpenACCComputeConstruct{{.*}}parallel
+  // CHECK-NEXT: reduction clause Operator: max
+  // CHECK-NEXT: DeclRefExpr{{.*}} 'typename T::IntTy' lvalue Var{{.*}} 'i' 'typename T::IntTy'
+  // CHECK-NEXT: WhileStmt
+  // CHECK-NEXT: CXXBoolLiteralExpr
+  // CHECK-NEXT: NullStmt
+
+#pragma acc serial reduction(min: t)
+  while(true);
+  // CHECK-NEXT: OpenACCComputeConstruct{{.*}}serial
+  // CHECK-NEXT: reduction clause Operator: min
+  // CHECK-NEXT: DeclRefExpr{{.*}} 'T' lvalue Var{{.*}} 't' 'T'
+  // CHECK-NEXT: WhileStmt
+  // CHECK-NEXT: CXXBoolLiteralExpr
+  // CHECK-NEXT: NullStmt
+
+#pragma acc parallel reduction(&: T::SomeFloat)
+  while(true);
+  // CHECK-NEXT: OpenACCComputeConstruct{{.*}}parallel
+  // CHECK-NEXT: reduction clause Operator: &
+  // CHECK-NEXT: DependentScopeDeclRefExpr{{.*}} '<dependent type>' lvalue
+  // CHECK-NEXT: NestedNameSpecifier TypeSpec 'T'
+  // CHECK-NEXT: WhileStmt
+  // CHECK-NEXT: CXXBoolLiteralExpr
+  // CHECK-NEXT: NullStmt
+
+#pragma acc serial reduction(|: i)
+  while(true);
+  // CHECK-NEXT: OpenACCComputeConstruct{{.*}}serial
+  // CHECK-NEXT: reduction clause Operator: |
+  // CHECK-NEXT: DeclRefExpr{{.*}} 'typename T::IntTy' lvalue Var{{.*}} 'i' 'typename T::IntTy'
+  // CHECK-NEXT: WhileStmt
+  // CHECK-NEXT: CXXBoolLiteralExpr
+  // CHECK-NEXT: NullStmt
+
+#pragma acc parallel reduction(^: t)
+  while(true);
+  // CHECK-NEXT: OpenACCComputeConstruct{{.*}}parallel
+  // CHECK-NEXT: reduction clause Operator: ^
+  // CHECK-NEXT: DeclRefExpr{{.*}} 'T' lvalue Var{{.*}} 't' 'T'
+  // CHECK-NEXT: WhileStmt
+  // CHECK-NEXT: CXXBoolLiteralExpr
+  // CHECK-NEXT: NullStmt
+
+#pragma acc serial reduction(&&: T::SomeFloat)
+  while(true);
+  // CHECK-NEXT: OpenACCComputeConstruct{{.*}}serial
+  // CHECK-NEXT: reduction clause Operator: &&
+  // CHECK-NEXT: DependentScopeDeclRefExpr{{.*}} '<dependent type>' lvalue
+  // CHECK-NEXT: NestedNameSpecifier TypeSpec 'T'
+  // CHECK-NEXT: WhileStmt
+  // CHECK-NEXT: CXXBoolLiteralExpr
+  // CHECK-NEXT: NullStmt
+
+#pragma acc parallel reduction(||: i)
+  while(true);
+  // CHECK-NEXT: OpenACCComputeConstruct{{.*}}parallel
+  // CHECK-NEXT: reduction clause Operator: ||
+  // CHECK-NEXT: DeclRefExpr{{.*}} 'typename T::IntTy' lvalue Var{{.*}} 'i' 'typename T::IntTy'
+  // CHECK-NEXT: WhileStmt
+  // CHECK-NEXT: CXXBoolLiteralExpr
+  // CHECK-NEXT: NullStmt
+
   // Match the instantiation:
   // CHECK: FunctionDecl{{.*}}TemplFunc{{.*}}implicit_instantiation
   // CHECK-NEXT: TemplateArgument type 'InstTy'
@@ -262,6 +437,79 @@ void TemplFunc() {
   // CHECK-NEXT: CXXBoolLiteralExpr
   // CHECK-NEXT: NullStmt
 
+  // CHECK-NEXT: DeclStmt
+  // CHECK-NEXT: VarDecl{{.*}} t 'InstTy'
+  // CHECK-NEXT: CXXConstructExpr
+
+  // CHECK-NEXT: OpenACCComputeConstruct{{.*}}parallel
+  // CHECK-NEXT: reduction clause Operator: +
+  // CHECK-NEXT: DeclRefExpr{{.*}} 'InstTy' lvalue Var{{.*}} 't' 'InstTy'
+  // CHECK-NEXT: WhileStmt
+  // CHECK-NEXT: CXXBoolLiteralExpr
+  // CHECK-NEXT: NullStmt
+
+  // CHECK-NEXT: OpenACCComputeConstruct{{.*}}serial
+  // CHECK-NEXT: reduction clause Operator: *
+  // CHECK-NEXT: DeclRefExpr{{.*}} 'const float' lvalue Var{{.*}} 'SomeFloat' 'const float'
+  // CHECK-NEXT: NestedNameSpecifier TypeSpec 'InstTy'
+  // CHECK-NEXT: WhileStmt
+  // CHECK-NEXT: CXXBoolLiteralExpr
+  // CHECK-NEXT: NullStmt
+
+  // CHECK-NEXT: DeclStmt
+  // CHECK-NEXT: VarDecl{{.*}} i 'typename InstTy::IntTy':'int'
+
+  // CHECK-NEXT: OpenACCComputeConstruct{{.*}}parallel
+  // CHECK-NEXT: reduction clause Operator: max
+  // CHECK-NEXT: DeclRefExpr{{.*}} 'typename InstTy::IntTy':'int' lvalue Var{{.*}} 'i' 'typename InstTy::IntTy':'int'
+  // CHECK-NEXT: WhileStmt
+  // CHECK-NEXT: CXXBoolLiteralExpr
+  // CHECK-NEXT: NullStmt
+
+  // CHECK-NEXT: OpenACCComputeConstruct{{.*}}serial
+  // CHECK-NEXT: reduction clause Operator: min
+  // CHECK-NEXT: DeclRefExpr{{.*}} 'InstTy' lvalue Var{{.*}} 't' 'InstTy'
+  // CHECK-NEXT: WhileStmt
+  // CHECK-NEXT: CXXBoolLiteralExpr
+  // CHECK-NEXT: NullStmt
+
+  // CHECK-NEXT: OpenACCComputeConstruct{{.*}}parallel
+  // CHECK-NEXT: reduction clause Operator: &
+  // CHECK-NEXT: DeclRefExpr{{.*}} 'const float' lvalue Var{{.*}} 'SomeFloat' 'const float'
+  // CHECK-NEXT: NestedNameSpecifier TypeSpec 'InstTy'
+  // CHECK-NEXT: WhileStmt
+  // CHECK-NEXT: CXXBoolLiteralExpr
+  // CHECK-NEXT: NullStmt
+
+  // CHECK-NEXT: OpenACCComputeConstruct{{.*}}serial
+  // CHECK-NEXT: reduction clause Operator: |
+  // CHECK-NEXT: DeclRefExpr{{.*}} 'typename InstTy::IntTy':'int' lvalue Var{{.*}} 'i' 'typename InstTy::IntTy':'int'
+  // CHECK-NEXT: WhileStmt
+  // CHECK-NEXT: CXXBoolLiteralExpr
+  // CHECK-NEXT: NullStmt
+
+  // CHECK-NEXT: OpenACCComputeConstruct{{.*}}parallel
+  // CHECK-NEXT: reduction clause Operator: ^
+  // CHECK-NEXT: DeclRefExpr{{.*}} 'InstTy' lvalue Var{{.*}} 't' 'InstTy'
+  // CHECK-NEXT: WhileStmt
+  // CHECK-NEXT: CXXBoolLiteralExpr
+  // CHECK-NEXT: NullStmt
+
+  // CHECK-NEXT: OpenACCComputeConstruct{{.*}}serial
+  // CHECK-NEXT: reduction clause Operator: &&
+  // CHECK-NEXT: DeclRefExpr{{.*}} 'const float' lvalue Var{{.*}} 'SomeFloat' 'const float'
+  // CHECK-NEXT: NestedNameSpecifier TypeSpec 'InstTy'
+  // CHECK-NEXT: WhileStmt
+  // CHECK-NEXT: CXXBoolLiteralExpr
+  // CHECK-NEXT: NullStmt
+
+  // CHECK-NEXT: OpenACCComputeConstruct{{.*}}parallel
+  // CHECK-NEXT: reduction clause Operator: ||
+  // CHECK-NEXT: DeclRefExpr{{.*}} 'typename InstTy::IntTy':'int' lvalue Var{{.*}} 'i' 'typename InstTy::IntTy':'int'
+  // CHECK-NEXT: WhileStmt
+  // CHECK-NEXT: CXXBoolLiteralExpr
+  // CHECK-NEXT: NullStmt
+
 }
 
 struct BoolConversion{ operator bool() const;};
diff --git a/clang/test/SemaOpenACC/compute-construct-copy-clause.c b/clang/test/SemaOpenACC/compute-construct-copy-clause.c
index accbe43cea406..2b43480be8b4f 100644
--- a/clang/test/SemaOpenACC/compute-construct-copy-clause.c
+++ b/clang/test/SemaOpenACC/compute-construct-copy-clause.c
@@ -36,11 +36,11 @@ void uses(int IntParam, short *PointerParam, float ArrayParam[5], Complete Compo
 #pragma acc parallel copy(LocalComposite.ScalarMember, LocalComposite.ScalarMember)
   while(1);
 
-  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
 #pragma acc parallel copy(1 + IntParam)
   while(1);
 
-  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
 #pragma acc parallel copy(+IntParam)
   while(1);
 
@@ -53,10 +53,10 @@ void uses(int IntParam, short *PointerParam, float ArrayParam[5], Complete Compo
   while(1);
 
   // expected-error at +2{{OpenACC sub-array specified range [2:5] would be out of the range of the subscripted array size of 5}}
-  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
 #pragma acc parallel copy((float*)ArrayParam[2:5])
   while(1);
-  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
 #pragma acc parallel copy((float)ArrayParam[2])
   while(1);
 }
diff --git a/clang/test/SemaOpenACC/compute-construct-copy-clause.cpp b/clang/test/SemaOpenACC/compute-construct-copy-clause.cpp
index 16e78a43026a9..2797927e6e56b 100644
--- a/clang/test/SemaOpenACC/compute-construct-copy-clause.cpp
+++ b/clang/test/SemaOpenACC/compute-construct-copy-clause.cpp
@@ -31,11 +31,11 @@ void uses(int IntParam, char *PointerParam, float ArrayParam[5], Complete Compos
 #pragma acc parallel copy(LocalComposite2.ScalarMember, LocalComposite2.ScalarMember)
   while(1);
 
-  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
 #pragma acc parallel copy(1 + IntParam)
   while(1);
 
-  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
 #pragma acc parallel copy(+IntParam)
   while(1);
 
@@ -48,27 +48,27 @@ void uses(int IntParam, char *PointerParam, float ArrayParam[5], Complete Compos
   while(1);
 
   // expected-error at +2{{OpenACC sub-array specified range [2:5] would be out of the range of the subscripted array size of 5}}
-  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
 #pragma acc parallel copy((float*)ArrayParam[2:5])
   while(1);
-  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
 #pragma acc parallel copy((float)ArrayParam[2])
   while(1);
 }
 
 template<typename T, unsigned I, typename V>
 void TemplUses(T t, T (&arrayT)[I], V TemplComp) {
-  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
 #pragma acc parallel copy(+t)
   while(true);
 
   // NTTP's are only valid if it is a reference to something.
-  // expected-error at +2{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +2{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
   // expected-note@#TEMPL_USES_INST{{in instantiation of}}
 #pragma acc parallel copy(I)
   while(true);
 
-  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
 #pragma acc parallel copy(t, I)
   while(true);
 
@@ -93,7 +93,7 @@ void TemplUses(T t, T (&arrayT)[I], V TemplComp) {
 template<unsigned I, auto &NTTP_REF>
 void NTTP() {
   // NTTP's are only valid if it is a reference to something.
-  // expected-error at +2{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +2{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
   // expected-note@#NTTP_INST{{in instantiation of}}
 #pragma acc parallel copy(I)
   while(true);
diff --git a/clang/test/SemaOpenACC/compute-construct-copyin-clause.c b/clang/test/SemaOpenACC/compute-construct-copyin-clause.c
index 6f200b357f52b..5ea4db9e5fae9 100644
--- a/clang/test/SemaOpenACC/compute-construct-copyin-clause.c
+++ b/clang/test/SemaOpenACC/compute-construct-copyin-clause.c
@@ -38,11 +38,11 @@ void uses(int IntParam, short *PointerParam, float ArrayParam[5], Complete Compo
 #pragma acc parallel copyin(LocalComposite.ScalarMember, LocalComposite.ScalarMember)
   while(1);
 
-  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
 #pragma acc parallel copyin(1 + IntParam)
   while(1);
 
-  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
 #pragma acc parallel copyin(+IntParam)
   while(1);
 
@@ -55,14 +55,14 @@ void uses(int IntParam, short *PointerParam, float ArrayParam[5], Complete Compo
   while(1);
 
   // expected-error at +2{{OpenACC sub-array specified range [2:5] would be out of the range of the subscripted array size of 5}}
-  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
 #pragma acc parallel copyin((float*)ArrayParam[2:5])
   while(1);
-  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
 #pragma acc parallel copyin((float)ArrayParam[2])
   while(1);
   // expected-error at +2{{invalid tag 'invalid' on 'copyin' clause}}
-  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
 #pragma acc parallel copyin(invalid:(float)ArrayParam[2])
   while(1);
 }
diff --git a/clang/test/SemaOpenACC/compute-construct-copyin-clause.cpp b/clang/test/SemaOpenACC/compute-construct-copyin-clause.cpp
index 79275e701161b..74ce74a1368d1 100644
--- a/clang/test/SemaOpenACC/compute-construct-copyin-clause.cpp
+++ b/clang/test/SemaOpenACC/compute-construct-copyin-clause.cpp
@@ -31,11 +31,11 @@ void uses(int IntParam, char *PointerParam, float ArrayParam[5], Complete Compos
 #pragma acc parallel copyin(LocalComposite2.ScalarMember, LocalComposite2.ScalarMember)
   while(1);
 
-  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
 #pragma acc parallel copyin(1 + IntParam)
   while(1);
 
-  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
 #pragma acc parallel copyin(+IntParam)
   while(1);
 
@@ -48,27 +48,27 @@ void uses(int IntParam, char *PointerParam, float ArrayParam[5], Complete Compos
   while(1);
 
   // expected-error at +2{{OpenACC sub-array specified range [2:5] would be out of the range of the subscripted array size of 5}}
-  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
 #pragma acc parallel copyin((float*)ArrayParam[2:5])
   while(1);
-  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
 #pragma acc parallel copyin((float)ArrayParam[2])
   while(1);
 }
 
 template<typename T, unsigned I, typename V>
 void TemplUses(T t, T (&arrayT)[I], V TemplComp) {
-  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
 #pragma acc parallel copyin(+t)
   while(true);
 
   // NTTP's are only valid if it is a reference to something.
-  // expected-error at +2{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +2{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
   // expected-note@#TEMPL_USES_INST{{in instantiation of}}
 #pragma acc parallel copyin(I)
   while(true);
 
-  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
 #pragma acc parallel copyin(t, I)
   while(true);
 
@@ -93,7 +93,7 @@ void TemplUses(T t, T (&arrayT)[I], V TemplComp) {
 template<unsigned I, auto &NTTP_REF>
 void NTTP() {
   // NTTP's are only valid if it is a reference to something.
-  // expected-error at +2{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +2{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
   // expected-note@#NTTP_INST{{in instantiation of}}
 #pragma acc parallel copyin(I)
   while(true);
diff --git a/clang/test/SemaOpenACC/compute-construct-copyout-clause.c b/clang/test/SemaOpenACC/compute-construct-copyout-clause.c
index 38a50f8373e8d..a035ab3242e3a 100644
--- a/clang/test/SemaOpenACC/compute-construct-copyout-clause.c
+++ b/clang/test/SemaOpenACC/compute-construct-copyout-clause.c
@@ -38,11 +38,11 @@ void uses(int IntParam, short *PointerParam, float ArrayParam[5], Complete Compo
 #pragma acc parallel copyout(LocalComposite.ScalarMember, LocalComposite.ScalarMember)
   while(1);
 
-  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
 #pragma acc parallel copyout(1 + IntParam)
   while(1);
 
-  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
 #pragma acc parallel copyout(+IntParam)
   while(1);
 
@@ -55,14 +55,14 @@ void uses(int IntParam, short *PointerParam, float ArrayParam[5], Complete Compo
   while(1);
 
   // expected-error at +2{{OpenACC sub-array specified range [2:5] would be out of the range of the subscripted array size of 5}}
-  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
 #pragma acc parallel copyout((float*)ArrayParam[2:5])
   while(1);
-  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
 #pragma acc parallel copyout((float)ArrayParam[2])
   while(1);
   // expected-error at +2{{invalid tag 'invalid' on 'copyout' clause}}
-  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
 #pragma acc parallel copyout(invalid:(float)ArrayParam[2])
   while(1);
 }
diff --git a/clang/test/SemaOpenACC/compute-construct-copyout-clause.cpp b/clang/test/SemaOpenACC/compute-construct-copyout-clause.cpp
index 3d05a5670092e..c01dc1a39963b 100644
--- a/clang/test/SemaOpenACC/compute-construct-copyout-clause.cpp
+++ b/clang/test/SemaOpenACC/compute-construct-copyout-clause.cpp
@@ -31,11 +31,11 @@ void uses(int IntParam, char *PointerParam, float ArrayParam[5], Complete Compos
 #pragma acc parallel copyout(LocalComposite2.ScalarMember, LocalComposite2.ScalarMember)
   while(1);
 
-  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
 #pragma acc parallel copyout(1 + IntParam)
   while(1);
 
-  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
 #pragma acc parallel copyout(+IntParam)
   while(1);
 
@@ -48,27 +48,27 @@ void uses(int IntParam, char *PointerParam, float ArrayParam[5], Complete Compos
   while(1);
 
   // expected-error at +2{{OpenACC sub-array specified range [2:5] would be out of the range of the subscripted array size of 5}}
-  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
 #pragma acc parallel copyout((float*)ArrayParam[2:5])
   while(1);
-  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
 #pragma acc parallel copyout((float)ArrayParam[2])
   while(1);
 }
 
 template<typename T, unsigned I, typename V>
 void TemplUses(T t, T (&arrayT)[I], V TemplComp) {
-  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
 #pragma acc parallel copyout(+t)
   while(true);
 
   // NTTP's are only valid if it is a reference to something.
-  // expected-error at +2{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +2{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
   // expected-note@#TEMPL_USES_INST{{in instantiation of}}
 #pragma acc parallel copyout(I)
   while(true);
 
-  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
 #pragma acc parallel copyout(t, I)
   while(true);
 
@@ -93,7 +93,7 @@ void TemplUses(T t, T (&arrayT)[I], V TemplComp) {
 template<unsigned I, auto &NTTP_REF>
 void NTTP() {
   // NTTP's are only valid if it is a reference to something.
-  // expected-error at +2{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +2{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
   // expected-note@#NTTP_INST{{in instantiation of}}
 #pragma acc parallel copyout(I)
   while(true);
diff --git a/clang/test/SemaOpenACC/compute-construct-create-clause.c b/clang/test/SemaOpenACC/compute-construct-create-clause.c
index 9c94e3a1a4073..5cfa9b0c5cc3c 100644
--- a/clang/test/SemaOpenACC/compute-construct-create-clause.c
+++ b/clang/test/SemaOpenACC/compute-construct-create-clause.c
@@ -39,11 +39,11 @@ void uses(int IntParam, short *PointerParam, float ArrayParam[5], Complete Compo
 #pragma acc parallel create(LocalComposite.ScalarMember, LocalComposite.ScalarMember)
   while(1);
 
-  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
 #pragma acc parallel create(1 + IntParam)
   while(1);
 
-  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
 #pragma acc parallel create(+IntParam)
   while(1);
 
@@ -56,14 +56,14 @@ void uses(int IntParam, short *PointerParam, float ArrayParam[5], Complete Compo
   while(1);
 
   // expected-error at +2{{OpenACC sub-array specified range [2:5] would be out of the range of the subscripted array size of 5}}
-  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
 #pragma acc parallel create((float*)ArrayParam[2:5])
   while(1);
-  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
 #pragma acc parallel create((float)ArrayParam[2])
   while(1);
   // expected-error at +2{{invalid tag 'invalid' on 'create' clause}}
-  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
 #pragma acc parallel create(invalid:(float)ArrayParam[2])
   while(1);
 }
diff --git a/clang/test/SemaOpenACC/compute-construct-create-clause.cpp b/clang/test/SemaOpenACC/compute-construct-create-clause.cpp
index d0323620b8f70..3ed1e1e9f700d 100644
--- a/clang/test/SemaOpenACC/compute-construct-create-clause.cpp
+++ b/clang/test/SemaOpenACC/compute-construct-create-clause.cpp
@@ -31,11 +31,11 @@ void uses(int IntParam, char *PointerParam, float ArrayParam[5], Complete Compos
 #pragma acc parallel create(LocalComposite2.ScalarMember, LocalComposite2.ScalarMember)
   while(1);
 
-  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
 #pragma acc parallel create(1 + IntParam)
   while(1);
 
-  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
 #pragma acc parallel create(+IntParam)
   while(1);
 
@@ -48,27 +48,27 @@ void uses(int IntParam, char *PointerParam, float ArrayParam[5], Complete Compos
   while(1);
 
   // expected-error at +2{{OpenACC sub-array specified range [2:5] would be out of the range of the subscripted array size of 5}}
-  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
 #pragma acc parallel create((float*)ArrayParam[2:5])
   while(1);
-  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
 #pragma acc parallel create((float)ArrayParam[2])
   while(1);
 }
 
 template<typename T, unsigned I, typename V>
 void TemplUses(T t, T (&arrayT)[I], V TemplComp) {
-  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
 #pragma acc parallel create(+t)
   while(true);
 
   // NTTP's are only valid if it is a reference to something.
-  // expected-error at +2{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +2{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
   // expected-note@#TEMPL_USES_INST{{in instantiation of}}
 #pragma acc parallel create(I)
   while(true);
 
-  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
 #pragma acc parallel create(t, I)
   while(true);
 
@@ -93,7 +93,7 @@ void TemplUses(T t, T (&arrayT)[I], V TemplComp) {
 template<unsigned I, auto &NTTP_REF>
 void NTTP() {
   // NTTP's are only valid if it is a reference to something.
-  // expected-error at +2{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +2{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
   // expected-note@#NTTP_INST{{in instantiation of}}
 #pragma acc parallel create(I)
   while(true);
diff --git a/clang/test/SemaOpenACC/compute-construct-device_type-clause.c b/clang/test/SemaOpenACC/compute-construct-device_type-clause.c
index 15c9cf396c80c..bf2a00a0f7360 100644
--- a/clang/test/SemaOpenACC/compute-construct-device_type-clause.c
+++ b/clang/test/SemaOpenACC/compute-construct-device_type-clause.c
@@ -182,7 +182,7 @@ void uses() {
   while(1);
   // expected-error at +2{{OpenACC clause 'reduction' may not follow a 'device_type' clause in a compute construct}}
   // expected-note at +1{{previous clause is here}}
-#pragma acc kernels device_type(*) reduction(+:Var)
+#pragma acc serial device_type(*) reduction(+:Var)
   while(1);
   // expected-error at +2{{OpenACC clause 'collapse' may not follow a 'device_type' clause in a compute construct}}
   // expected-note at +1{{previous clause is here}}
diff --git a/clang/test/SemaOpenACC/compute-construct-deviceptr-clause.c b/clang/test/SemaOpenACC/compute-construct-deviceptr-clause.c
index e5d328eb0b28b..ae8269b9779a4 100644
--- a/clang/test/SemaOpenACC/compute-construct-deviceptr-clause.c
+++ b/clang/test/SemaOpenACC/compute-construct-deviceptr-clause.c
@@ -16,7 +16,7 @@ void uses() {
 #pragma acc parallel deviceptr(LocalInt)
   while (1);
 
-  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
 #pragma acc parallel deviceptr(&LocalInt)
   while (1);
 
diff --git a/clang/test/SemaOpenACC/compute-construct-firstprivate-clause.c b/clang/test/SemaOpenACC/compute-construct-firstprivate-clause.c
index 4e057bf32c2d6..eacda7bbbbba2 100644
--- a/clang/test/SemaOpenACC/compute-construct-firstprivate-clause.c
+++ b/clang/test/SemaOpenACC/compute-construct-firstprivate-clause.c
@@ -29,11 +29,11 @@ void uses(int IntParam, short *PointerParam, float ArrayParam[5], Complete Compo
 #pragma acc parallel firstprivate(LocalComposite.ScalarMember, LocalComposite.ScalarMember)
   while(1);
 
-  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
 #pragma acc parallel firstprivate(1 + IntParam)
   while(1);
 
-  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
 #pragma acc parallel firstprivate(+IntParam)
   while(1);
 
@@ -46,10 +46,10 @@ void uses(int IntParam, short *PointerParam, float ArrayParam[5], Complete Compo
   while(1);
 
   // expected-error at +2{{OpenACC sub-array specified range [2:5] would be out of the range of the subscripted array size of 5}}
-  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
 #pragma acc parallel firstprivate((float*)ArrayParam[2:5])
   while(1);
-  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
 #pragma acc parallel firstprivate((float)ArrayParam[2])
   while(1);
 }
diff --git a/clang/test/SemaOpenACC/compute-construct-firstprivate-clause.cpp b/clang/test/SemaOpenACC/compute-construct-firstprivate-clause.cpp
index 2fbb80f7b2fbd..161e4012c08d5 100644
--- a/clang/test/SemaOpenACC/compute-construct-firstprivate-clause.cpp
+++ b/clang/test/SemaOpenACC/compute-construct-firstprivate-clause.cpp
@@ -32,11 +32,11 @@ void uses(int IntParam, char *PointerParam, float ArrayParam[5], Complete Compos
 #pragma acc parallel firstprivate(LocalComposite2.ScalarMember, LocalComposite2.ScalarMember)
   while(1);
 
-  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
 #pragma acc parallel firstprivate(1 + IntParam)
   while(1);
 
-  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
 #pragma acc parallel firstprivate(+IntParam)
   while(1);
 
@@ -49,27 +49,27 @@ void uses(int IntParam, char *PointerParam, float ArrayParam[5], Complete Compos
   while(1);
 
   // expected-error at +2{{OpenACC sub-array specified range [2:5] would be out of the range of the subscripted array size of 5}}
-  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
 #pragma acc parallel firstprivate((float*)ArrayParam[2:5])
   while(1);
-  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
 #pragma acc parallel firstprivate((float)ArrayParam[2])
   while(1);
 }
 
 template<typename T, unsigned I, typename V>
 void TemplUses(T t, T (&arrayT)[I], V TemplComp) {
-  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
 #pragma acc parallel private(+t)
   while(true);
 
   // NTTP's are only valid if it is a reference to something.
-  // expected-error at +2{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +2{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
   // expected-note@#TEMPL_USES_INST{{in instantiation of}}
 #pragma acc parallel private(I)
   while(true);
 
-  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
 #pragma acc parallel private(t, I)
   while(true);
 
@@ -94,7 +94,7 @@ void TemplUses(T t, T (&arrayT)[I], V TemplComp) {
 template<unsigned I, auto &NTTP_REF>
 void NTTP() {
   // NTTP's are only valid if it is a reference to something.
-  // expected-error at +2{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +2{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
   // expected-note@#NTTP_INST{{in instantiation of}}
 #pragma acc parallel private(I)
   while(true);
diff --git a/clang/test/SemaOpenACC/compute-construct-no_create-clause.c b/clang/test/SemaOpenACC/compute-construct-no_create-clause.c
index 07a60b73c34f8..4ff06eaf132b0 100644
--- a/clang/test/SemaOpenACC/compute-construct-no_create-clause.c
+++ b/clang/test/SemaOpenACC/compute-construct-no_create-clause.c
@@ -28,11 +28,11 @@ void uses(int IntParam, short *PointerParam, float ArrayParam[5], Complete Compo
 #pragma acc parallel no_create(LocalComposite.ScalarMember, LocalComposite.ScalarMember)
   while(1);
 
-  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
 #pragma acc parallel no_create(1 + IntParam)
   while(1);
 
-  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
 #pragma acc parallel no_create(+IntParam)
   while(1);
 
@@ -45,10 +45,10 @@ void uses(int IntParam, short *PointerParam, float ArrayParam[5], Complete Compo
   while(1);
 
   // expected-error at +2{{OpenACC sub-array specified range [2:5] would be out of the range of the subscripted array size of 5}}
-  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
 #pragma acc parallel no_create((float*)ArrayParam[2:5])
   while(1);
-  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
 #pragma acc parallel no_create((float)ArrayParam[2])
   while(1);
 }
diff --git a/clang/test/SemaOpenACC/compute-construct-no_create-clause.cpp b/clang/test/SemaOpenACC/compute-construct-no_create-clause.cpp
index 3820d5e3999d5..fa84b1fbeda07 100644
--- a/clang/test/SemaOpenACC/compute-construct-no_create-clause.cpp
+++ b/clang/test/SemaOpenACC/compute-construct-no_create-clause.cpp
@@ -31,11 +31,11 @@ void uses(int IntParam, char *PointerParam, float ArrayParam[5], Complete Compos
 #pragma acc parallel no_create(LocalComposite2.ScalarMember, LocalComposite2.ScalarMember)
   while(1);
 
-  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
 #pragma acc parallel no_create(1 + IntParam)
   while(1);
 
-  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
 #pragma acc parallel no_create(+IntParam)
   while(1);
 
@@ -48,27 +48,27 @@ void uses(int IntParam, char *PointerParam, float ArrayParam[5], Complete Compos
   while(1);
 
   // expected-error at +2{{OpenACC sub-array specified range [2:5] would be out of the range of the subscripted array size of 5}}
-  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
 #pragma acc parallel no_create((float*)ArrayParam[2:5])
   while(1);
-  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
 #pragma acc parallel no_create((float)ArrayParam[2])
   while(1);
 }
 
 template<typename T, unsigned I, typename V>
 void TemplUses(T t, T (&arrayT)[I], V TemplComp) {
-  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
 #pragma acc parallel no_create(+t)
   while(true);
 
   // NTTP's are only valid if it is a reference to something.
-  // expected-error at +2{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +2{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
   // expected-note@#TEMPL_USES_INST{{in instantiation of}}
 #pragma acc parallel no_create(I)
   while(true);
 
-  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
 #pragma acc parallel no_create(t, I)
   while(true);
 
@@ -93,7 +93,7 @@ void TemplUses(T t, T (&arrayT)[I], V TemplComp) {
 template<unsigned I, auto &NTTP_REF>
 void NTTP() {
   // NTTP's are only valid if it is a reference to something.
-  // expected-error at +2{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +2{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
   // expected-note@#NTTP_INST{{in instantiation of}}
 #pragma acc parallel no_create(I)
   while(true);
diff --git a/clang/test/SemaOpenACC/compute-construct-present-clause.c b/clang/test/SemaOpenACC/compute-construct-present-clause.c
index 99c4b1dcd19b4..1d50a6b1275b8 100644
--- a/clang/test/SemaOpenACC/compute-construct-present-clause.c
+++ b/clang/test/SemaOpenACC/compute-construct-present-clause.c
@@ -28,11 +28,11 @@ void uses(int IntParam, short *PointerParam, float ArrayParam[5], Complete Compo
 #pragma acc parallel present(LocalComposite.ScalarMember, LocalComposite.ScalarMember)
   while(1);
 
-  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
 #pragma acc parallel present(1 + IntParam)
   while(1);
 
-  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
 #pragma acc parallel present(+IntParam)
   while(1);
 
@@ -45,10 +45,10 @@ void uses(int IntParam, short *PointerParam, float ArrayParam[5], Complete Compo
   while(1);
 
   // expected-error at +2{{OpenACC sub-array specified range [2:5] would be out of the range of the subscripted array size of 5}}
-  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
 #pragma acc parallel present((float*)ArrayParam[2:5])
   while(1);
-  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
 #pragma acc parallel present((float)ArrayParam[2])
   while(1);
 }
diff --git a/clang/test/SemaOpenACC/compute-construct-present-clause.cpp b/clang/test/SemaOpenACC/compute-construct-present-clause.cpp
index 62e481dea3e24..db230d0b1d9da 100644
--- a/clang/test/SemaOpenACC/compute-construct-present-clause.cpp
+++ b/clang/test/SemaOpenACC/compute-construct-present-clause.cpp
@@ -31,11 +31,11 @@ void uses(int IntParam, char *PointerParam, float ArrayParam[5], Complete Compos
 #pragma acc parallel present(LocalComposite2.ScalarMember, LocalComposite2.ScalarMember)
   while(1);
 
-  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
 #pragma acc parallel present(1 + IntParam)
   while(1);
 
-  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
 #pragma acc parallel present(+IntParam)
   while(1);
 
@@ -48,27 +48,27 @@ void uses(int IntParam, char *PointerParam, float ArrayParam[5], Complete Compos
   while(1);
 
   // expected-error at +2{{OpenACC sub-array specified range [2:5] would be out of the range of the subscripted array size of 5}}
-  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
 #pragma acc parallel present((float*)ArrayParam[2:5])
   while(1);
-  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
 #pragma acc parallel present((float)ArrayParam[2])
   while(1);
 }
 
 template<typename T, unsigned I, typename V>
 void TemplUses(T t, T (&arrayT)[I], V TemplComp) {
-  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
 #pragma acc parallel present(+t)
   while(true);
 
   // NTTP's are only valid if it is a reference to something.
-  // expected-error at +2{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +2{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
   // expected-note@#TEMPL_USES_INST{{in instantiation of}}
 #pragma acc parallel present(I)
   while(true);
 
-  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
 #pragma acc parallel present(t, I)
   while(true);
 
@@ -93,7 +93,7 @@ void TemplUses(T t, T (&arrayT)[I], V TemplComp) {
 template<unsigned I, auto &NTTP_REF>
 void NTTP() {
   // NTTP's are only valid if it is a reference to something.
-  // expected-error at +2{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +2{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
   // expected-note@#NTTP_INST{{in instantiation of}}
 #pragma acc parallel present(I)
   while(true);
diff --git a/clang/test/SemaOpenACC/compute-construct-private-clause.c b/clang/test/SemaOpenACC/compute-construct-private-clause.c
index d2615c384cdb1..3e6dbaafbc6fa 100644
--- a/clang/test/SemaOpenACC/compute-construct-private-clause.c
+++ b/clang/test/SemaOpenACC/compute-construct-private-clause.c
@@ -89,13 +89,13 @@ void uses(int IntParam, short *PointerParam, float ArrayParam[5], Complete Compo
 
   // Invalid cases, arbitrary expressions.
   struct Incomplete *I;
-  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
 #pragma acc parallel private(*I)
   while(1);
-  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
 #pragma acc parallel private(GlobalInt + IntParam)
   while(1);
-  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
 #pragma acc parallel private(+GlobalInt)
   while(1);
 
@@ -128,10 +128,10 @@ void uses(int IntParam, short *PointerParam, float ArrayParam[5], Complete Compo
   while(1);
 
   // expected-error at +2{{OpenACC sub-array specified range [2:5] would be out of the range of the subscripted array size of 5}}
-  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
 #pragma acc parallel private((float*)ArrayParam[2:5])
   while(1);
-  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
 #pragma acc parallel private((float)ArrayParam[2])
   while(1);
 }
diff --git a/clang/test/SemaOpenACC/compute-construct-private-clause.cpp b/clang/test/SemaOpenACC/compute-construct-private-clause.cpp
index a776b16f0feb2..fb9e89a21accb 100644
--- a/clang/test/SemaOpenACC/compute-construct-private-clause.cpp
+++ b/clang/test/SemaOpenACC/compute-construct-private-clause.cpp
@@ -64,34 +64,34 @@ void uses(int IntParam, char *PointerParam, float ArrayParam[5], Complete Compos
 
   // Invalid cases, arbitrary expressions.
   Incomplete *I;
-  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
 #pragma acc parallel private(*I)
   while(true);
-  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
 #pragma acc parallel private(GlobalInt + IntParam)
   while(true);
-  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
 #pragma acc parallel private(+GlobalInt)
   while(true);
 }
 
 template<typename T, unsigned I, typename V>
 void TemplUses(T t, T (&arrayT)[I], V TemplComp) {
-  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
 #pragma acc parallel private(+t)
   while(true);
 
-  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
 #pragma acc parallel private(+I)
   while(true);
 
   // NTTP's are only valid if it is a reference to something.
-  // expected-error at +2{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +2{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
   // expected-note@#TEMPL_USES_INST{{in instantiation of}}
 #pragma acc parallel private(I)
   while(true);
 
-  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
 #pragma acc parallel private(t, I)
   while(true);
 
@@ -120,7 +120,7 @@ void TemplUses(T t, T (&arrayT)[I], V TemplComp) {
 template<unsigned I, auto &NTTP_REF>
 void NTTP() {
   // NTTP's are only valid if it is a reference to something.
-  // expected-error at +2{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+  // expected-error at +2{{OpenACC variable is not a valid variable name, sub-array, array element, member of a composite variable, or composite variable member}}
   // expected-note@#NTTP_INST{{in instantiation of}}
 #pragma acc parallel private(I)
   while(true);
diff --git a/clang/test/SemaOpenACC/compute-construct-reduction-clause.c b/clang/test/SemaOpenACC/compute-construct-reduction-clause.c
new file mode 100644
index 0000000000000..9c0debd345031
--- /dev/null
+++ b/clang/test/SemaOpenACC/compute-construct-reduction-clause.c
@@ -0,0 +1,107 @@
+// RUN: %clang_cc1 %s -fopenacc -verify
+
+struct CompositeOfScalars {
+  int I;
+  float F;
+  short J;
+  char C;
+  double D;
+  _Complex float CF;
+  _Complex double CD;
+};
+
+struct CompositeHasComposite {
+  int I;
+  float F;
+  short J;
+  char C;
+  double D;
+  _Complex float CF;
+  _Complex double CD;
+  struct CompositeOfScalars COS; // #COS_FIELD
+};
+
+void uses(unsigned Parm) {
+  float Var;
+  int IVar;
+
+#pragma acc parallel reduction(+:Parm)
+  while (1);
+#pragma acc serial reduction(+:Parm)
+  while (1);
+  // expected-error at +1{{OpenACC 'reduction' clause is not valid on 'kernels' directive}}
+#pragma acc kernels reduction(+:Parm)
+  while (1);
+
+  // On a 'parallel', 'num_gangs' cannot have >1 args. num_gangs not valid on
+  // 'serial', but 'reduction' not valid on 'kernels', other combos cannot be
+  // tested.
+#pragma acc parallel reduction(+:Parm) num_gangs(IVar)
+  while (1);
+#pragma acc parallel num_gangs(IVar) reduction(+:IVar)
+  while (1);
+
+  // expected-error at +2{{OpenACC 'reduction' clause may not appear on a 'parallel' construct with a 'num_gangs' clause with more than 1 argument, have 2}}
+  // expected-note at +1{{previous clause is here}}
+#pragma acc parallel reduction(+:Parm) num_gangs(Parm, IVar)
+  while (1);
+
+  // expected-error at +2{{OpenACC 'reduction' clause may not appear on a 'parallel' construct with a 'num_gangs' clause with more than 1 argument, have 2}}
+  // expected-note at +1{{previous clause is here}}
+#pragma acc parallel num_gangs(Parm, IVar) reduction(+:Var)
+  while (1);
+
+  struct CompositeOfScalars CoS;
+  struct CompositeOfScalars *CoSPtr;
+  struct CompositeHasComposite ChC;
+  struct CompositeHasComposite *ChCPtr;
+
+  int I;
+  float F;
+  int Array[5];
+
+  // Vars in a reduction must be a scalar or a composite of scalars.
+#pragma acc parallel reduction(&: CoS, I, F)
+  while (1);
+  // expected-error at +2{{OpenACC 'reduction' composite variable must not have non-scalar field}}
+  // expected-note@#COS_FIELD{{invalid field is here}}
+#pragma acc parallel reduction(&: ChC)
+  while (1);
+
+  // expected-error at +1{{OpenACC 'reduction' variable must be of scalar type, sub-array, or a composite of scalar types; type is 'int[5]'}}
+#pragma acc parallel reduction(&: Array)
+  while (1);
+
+#pragma acc parallel reduction(&: CoS, Array[I], Array[0:I])
+  while (1);
+
+  struct CompositeHasComposite ChCArray[5];
+  // expected-error at +1{{OpenACC 'reduction' variable must be of scalar type, sub-array, or a composite of scalar types; sub-array base type is 'struct CompositeHasComposite'}}
+#pragma acc parallel reduction(&: CoS, Array[I], ChCArray[0:I])
+  while (1);
+
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+#pragma acc parallel reduction(&: CoS.I)
+  while (1);
+
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+#pragma acc parallel reduction(&: CoSPtr->I)
+
+  while (1);
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+#pragma acc parallel reduction(&: ChC.COS)
+  while (1);
+
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+#pragma acc parallel reduction(&: ChCPtr->COS)
+  while (1);
+
+#pragma acc parallel reduction(&: I) reduction(&:I)
+  while (1);
+
+  struct HasArray { int array[5]; } HA;
+
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+#pragma acc parallel reduction(&:HA.array[1:2])
+  while (1);
+}
diff --git a/clang/test/SemaOpenACC/compute-construct-reduction-clause.cpp b/clang/test/SemaOpenACC/compute-construct-reduction-clause.cpp
new file mode 100644
index 0000000000000..532dbb2387165
--- /dev/null
+++ b/clang/test/SemaOpenACC/compute-construct-reduction-clause.cpp
@@ -0,0 +1,175 @@
+// RUN: %clang_cc1 %s -fopenacc -verify
+
+struct CompositeOfScalars {
+  int I;
+  float F;
+  short J;
+  char C;
+  double D;
+  _Complex float CF;
+  _Complex double CD;
+};
+
+struct CompositeHasComposite {
+  int I;
+  float F;
+  short J;
+  char C;
+  double D;
+  _Complex float CF;
+  _Complex double CD;
+  struct CompositeOfScalars COS; // #COS_FIELD
+};
+
+void uses(unsigned Parm) {
+  float Var;
+  int IVar;
+
+#pragma acc parallel reduction(+:Parm)
+  while (1);
+#pragma acc serial reduction(+:Parm)
+  while (1);
+  // expected-error at +1{{OpenACC 'reduction' clause is not valid on 'kernels' directive}}
+#pragma acc kernels reduction(+:Parm)
+  while (1);
+
+  // On a 'parallel', 'num_gangs' cannot have >1 args. num_gangs not valid on
+  // 'serial', but 'reduction' not valid on 'kernels', other combos cannot be
+  // tested.
+#pragma acc parallel reduction(+:Parm) num_gangs(IVar)
+  while (1);
+#pragma acc parallel num_gangs(IVar) reduction(+:Var)
+  while (1);
+
+  // expected-error at +2{{OpenACC 'reduction' clause may not appear on a 'parallel' construct with a 'num_gangs' clause with more than 1 argument, have 2}}
+  // expected-note at +1{{previous clause is here}}
+#pragma acc parallel reduction(+:Parm) num_gangs(Parm, IVar)
+  while (1);
+
+  // expected-error at +2{{OpenACC 'reduction' clause may not appear on a 'parallel' construct with a 'num_gangs' clause with more than 1 argument, have 2}}
+  // expected-note at +1{{previous clause is here}}
+#pragma acc parallel num_gangs(Parm, IVar) reduction(+:Var)
+  while (1);
+
+#pragma acc parallel reduction(+:Parm) reduction(+:Parm)
+  while (1);
+
+  struct CompositeOfScalars CoS;
+  struct CompositeOfScalars *CoSPtr;
+  struct CompositeHasComposite ChC;
+  struct CompositeHasComposite *ChCPtr;
+
+  int I;
+  float F;
+  int Array[5];
+
+  // Vars in a reduction must be a scalar or a composite of scalars.
+#pragma acc parallel reduction(&: CoS, I, F)
+  while (1);
+  // expected-error at +2{{OpenACC 'reduction' composite variable must not have non-scalar field}}
+  // expected-note@#COS_FIELD{{invalid field is here}}
+#pragma acc parallel reduction(&: ChC)
+  while (1);
+  // expected-error at +1{{OpenACC 'reduction' variable must be of scalar type, sub-array, or a composite of scalar types; type is 'int[5]'}}
+#pragma acc parallel reduction(&: Array)
+  while (1);
+
+#pragma acc parallel reduction(&: CoS, Array[I], Array[0:I])
+  while (1);
+
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+#pragma acc parallel reduction(&: CoS.I)
+  while (1);
+
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+#pragma acc parallel reduction(&: CoSPtr->I)
+
+  while (1);
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+#pragma acc parallel reduction(&: ChC.COS)
+  while (1);
+
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+#pragma acc parallel reduction(&: ChCPtr->COS)
+  while (1);
+}
+
+template<typename T, typename U, typename V>
+void TemplUses(T Parm, U CoS, V ChC) {
+  T Var;
+  U *CoSPtr;
+  V *ChCPtr;
+
+#pragma acc parallel reduction(+:Parm)
+  while (1);
+#pragma acc serial reduction(+:Parm)
+  while (1);
+  // expected-error at +1{{OpenACC 'reduction' clause is not valid on 'kernels' directive}}
+#pragma acc kernels reduction(+:Parm)
+  while (1);
+
+  // On a 'parallel', 'num_gangs' cannot have >1 args. num_gangs not valid on
+  // 'serial', but 'reduction' not valid on 'kernels', other combos cannot be
+  // tested.
+#pragma acc parallel reduction(+:Parm) num_gangs(Var)
+  while (1);
+#pragma acc parallel num_gangs(Var) reduction(+:Var)
+  while (1);
+
+  // expected-error at +2{{OpenACC 'reduction' clause may not appear on a 'parallel' construct with a 'num_gangs' clause with more than 1 argument, have 2}}
+  // expected-note at +1{{previous clause is here}}
+#pragma acc parallel reduction(+:Parm) num_gangs(Parm, Var)
+  while (1);
+
+  // expected-error at +2{{OpenACC 'reduction' clause may not appear on a 'parallel' construct with a 'num_gangs' clause with more than 1 argument, have 2}}
+  // expected-note at +1{{previous clause is here}}
+#pragma acc parallel num_gangs(Parm, Var) reduction(+:Var)
+  while (1);
+
+#pragma acc parallel reduction(+:Parm) reduction(+:Parm)
+  while (1);
+
+  int NonDep;
+  int NonDepArray[5];
+  T Array[5];
+
+  // Vars in a reduction must be a scalar or a composite of scalars.
+#pragma acc parallel reduction(&: CoS, Var, Parm)
+  while (1);
+  // expected-error at +2{{OpenACC 'reduction' composite variable must not have non-scalar field}}
+  // expected-note@#COS_FIELD{{invalid field is here}}
+#pragma acc parallel reduction(&: ChC)
+  while (1);
+  // expected-error at +1{{OpenACC 'reduction' variable must be of scalar type, sub-array, or a composite of scalar types; type is 'int[5]'}}
+#pragma acc parallel reduction(&: Array)
+  while (1);
+  // expected-error at +1{{OpenACC 'reduction' variable must be of scalar type, sub-array, or a composite of scalar types; type is 'int[5]'}}
+#pragma acc parallel reduction(&: NonDepArray)
+  while (1);
+
+#pragma acc parallel reduction(&: CoS, Array[Var], Array[0:Var])
+  while (1);
+
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+#pragma acc parallel reduction(&: CoS.I)
+  while (1);
+
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+#pragma acc parallel reduction(&: CoSPtr->I)
+
+  while (1);
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+#pragma acc parallel reduction(&: ChC.COS)
+  while (1);
+
+  // expected-error at +1{{OpenACC variable is not a valid variable name, sub-array, array element, or composite variable member}}
+#pragma acc parallel reduction(&: ChCPtr->COS)
+  while (1);
+}
+
+void inst() {
+  CompositeOfScalars CoS;
+  CompositeHasComposite ChC;
+  // expected-note at +1{{in instantiation of function template specialization}}
+  TemplUses(5, CoS, ChC);
+}
diff --git a/clang/tools/libclang/CIndex.cpp b/clang/tools/libclang/CIndex.cpp
index bfbdb5be9ff2f..f00ba9e3acfc8 100644
--- a/clang/tools/libclang/CIndex.cpp
+++ b/clang/tools/libclang/CIndex.cpp
@@ -2855,6 +2855,10 @@ void OpenACCClauseEnqueue::VisitWaitClause(const OpenACCWaitClause &C) {
 }
 void OpenACCClauseEnqueue::VisitDeviceTypeClause(
     const OpenACCDeviceTypeClause &C) {}
+void OpenACCClauseEnqueue::VisitReductionClause(
+    const OpenACCReductionClause &C) {
+  VisitVarList(C);
+}
 } // namespace
 
 void EnqueueVisitor::EnqueueChildren(const OpenACCClause *C) {

>From d0e0205bfc5a147f8744a176a10f185af7520c26 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov at redhat.com>
Date: Tue, 21 May 2024 15:56:38 +0200
Subject: [PATCH 22/27] [InstCombine] Check for poison instead of undef in
 single shuffle fold

Otherwise we'll convert undef to poison. Alive2 was already flagging
the existing test8 test as a miscompile.
---
 .../InstCombine/InstCombineVectorOps.cpp        |  2 +-
 llvm/test/CodeGen/PowerPC/vec_shuffle.ll        | 10 +++++-----
 .../InstCombine/vec_shuffle-inseltpoison.ll     | 17 +++++++++++++++++
 llvm/test/Transforms/InstCombine/vec_shuffle.ll |  5 +++--
 4 files changed, 26 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 9dcd9ef07d74f..86e162e2f55d4 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -619,7 +619,7 @@ static bool collectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
          "Invalid CollectSingleShuffleElements");
   unsigned NumElts = cast<FixedVectorType>(V->getType())->getNumElements();
 
-  if (match(V, m_Undef())) {
+  if (match(V, m_Poison())) {
     Mask.assign(NumElts, -1);
     return true;
   }
diff --git a/llvm/test/CodeGen/PowerPC/vec_shuffle.ll b/llvm/test/CodeGen/PowerPC/vec_shuffle.ll
index e698ab1e15a91..22b5ff0d21cbd 100644
--- a/llvm/test/CodeGen/PowerPC/vec_shuffle.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_shuffle.ll
@@ -32,7 +32,7 @@ entry:
 	%tmp15 = extractelement <16 x i8> %tmp2.upgrd.2, i32 2		; <i8> [#uses=1]
 	%tmp16 = extractelement <16 x i8> %tmp2.upgrd.2, i32 3		; <i8> [#uses=1]
 	%tmp17 = extractelement <16 x i8> %tmp2.upgrd.2, i32 4		; <i8> [#uses=1]
-	%tmp18 = insertelement <16 x i8> undef, i8 %tmp.upgrd.3, i32 0		; <<16 x i8>> [#uses=1]
+	%tmp18 = insertelement <16 x i8> poison, i8 %tmp.upgrd.3, i32 0		; <<16 x i8>> [#uses=1]
 	%tmp19 = insertelement <16 x i8> %tmp18, i8 %tmp3, i32 1		; <<16 x i8>> [#uses=1]
 	%tmp20 = insertelement <16 x i8> %tmp19, i8 %tmp4, i32 2		; <<16 x i8>> [#uses=1]
 	%tmp21 = insertelement <16 x i8> %tmp20, i8 %tmp5, i32 3		; <<16 x i8>> [#uses=1]
@@ -80,7 +80,7 @@ define void @VSLDOI_xx(ptr %A, ptr %B) {
 	%tmp15 = extractelement <16 x i8> %tmp2.upgrd.6, i32 2		; <i8> [#uses=1]
 	%tmp16 = extractelement <16 x i8> %tmp2.upgrd.6, i32 3		; <i8> [#uses=1]
 	%tmp17 = extractelement <16 x i8> %tmp2.upgrd.6, i32 4		; <i8> [#uses=1]
-	%tmp18 = insertelement <16 x i8> undef, i8 %tmp.upgrd.7, i32 0		; <<16 x i8>> [#uses=1]
+	%tmp18 = insertelement <16 x i8> poison, i8 %tmp.upgrd.7, i32 0		; <<16 x i8>> [#uses=1]
 	%tmp19 = insertelement <16 x i8> %tmp18, i8 %tmp3, i32 1		; <<16 x i8>> [#uses=1]
 	%tmp20 = insertelement <16 x i8> %tmp19, i8 %tmp4, i32 2		; <<16 x i8>> [#uses=1]
 	%tmp21 = insertelement <16 x i8> %tmp20, i8 %tmp5, i32 3		; <<16 x i8>> [#uses=1]
@@ -150,7 +150,7 @@ entry:
 	%tmp15 = extractelement <16 x i8> %tmp2, i32 14		; <i8> [#uses=1]
 	%tmp16 = extractelement <16 x i8> %tmp, i32 15		; <i8> [#uses=1]
 	%tmp17 = extractelement <16 x i8> %tmp2, i32 15		; <i8> [#uses=1]
-	%tmp18 = insertelement <16 x i8> undef, i8 %tmp.upgrd.12, i32 0		; <<16 x i8>> [#uses=1]
+	%tmp18 = insertelement <16 x i8> poison, i8 %tmp.upgrd.12, i32 0		; <<16 x i8>> [#uses=1]
 	%tmp19 = insertelement <16 x i8> %tmp18, i8 %tmp3, i32 1		; <<16 x i8>> [#uses=1]
 	%tmp20 = insertelement <16 x i8> %tmp19, i8 %tmp4, i32 2		; <<16 x i8>> [#uses=1]
 	%tmp21 = insertelement <16 x i8> %tmp20, i8 %tmp5, i32 3		; <<16 x i8>> [#uses=1]
@@ -189,7 +189,7 @@ entry:
 	%tmp7 = extractelement <8 x i16> %tmp2, i32 6		; <i16> [#uses=1]
 	%tmp8 = extractelement <8 x i16> %tmp, i32 7		; <i16> [#uses=1]
 	%tmp9 = extractelement <8 x i16> %tmp2, i32 7		; <i16> [#uses=1]
-	%tmp10 = insertelement <8 x i16> undef, i16 %tmp.upgrd.13, i32 0		; <<8 x i16>> [#uses=1]
+	%tmp10 = insertelement <8 x i16> poison, i16 %tmp.upgrd.13, i32 0		; <<8 x i16>> [#uses=1]
 	%tmp11 = insertelement <8 x i16> %tmp10, i16 %tmp3, i32 1		; <<8 x i16>> [#uses=1]
 	%tmp12 = insertelement <8 x i16> %tmp11, i16 %tmp4, i32 2		; <<8 x i16>> [#uses=1]
 	%tmp13 = insertelement <8 x i16> %tmp12, i16 %tmp5, i32 3		; <<8 x i16>> [#uses=1]
@@ -216,7 +216,7 @@ entry:
 	%tmp3 = extractelement <4 x i32> %tmp2, i32 2		; <i32> [#uses=1]
 	%tmp4 = extractelement <4 x i32> %tmp, i32 3		; <i32> [#uses=1]
 	%tmp5 = extractelement <4 x i32> %tmp2, i32 3		; <i32> [#uses=1]
-	%tmp6 = insertelement <4 x i32> undef, i32 %tmp.upgrd.14, i32 0		; <<4 x i32>> [#uses=1]
+	%tmp6 = insertelement <4 x i32> poison, i32 %tmp.upgrd.14, i32 0		; <<4 x i32>> [#uses=1]
 	%tmp7 = insertelement <4 x i32> %tmp6, i32 %tmp3, i32 1		; <<4 x i32>> [#uses=1]
 	%tmp8 = insertelement <4 x i32> %tmp7, i32 %tmp4, i32 2		; <<4 x i32>> [#uses=1]
 	%tmp9 = insertelement <4 x i32> %tmp8, i32 %tmp5, i32 3		; <<4 x i32>> [#uses=1]
diff --git a/llvm/test/Transforms/InstCombine/vec_shuffle-inseltpoison.ll b/llvm/test/Transforms/InstCombine/vec_shuffle-inseltpoison.ll
index ef085d3e7b50b..a9cdc8bd20247 100644
--- a/llvm/test/Transforms/InstCombine/vec_shuffle-inseltpoison.ll
+++ b/llvm/test/Transforms/InstCombine/vec_shuffle-inseltpoison.ll
@@ -87,6 +87,23 @@ define <4 x float> @test8(<4 x float> %x, <4 x float> %y) {
 ; CHECK-LABEL: @test8(
 ; CHECK-NEXT:    [[T134:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x i32> <i32 1, i32 poison, i32 3, i32 4>
 ; CHECK-NEXT:    ret <4 x float> [[T134]]
+;
+  %t4 = extractelement <4 x float> %x, i32 1
+  %t2 = extractelement <4 x float> %x, i32 3
+  %t1 = extractelement <4 x float> %y, i32 0
+  %t128 = insertelement <4 x float> poison, float %t4, i32 0
+  %t130 = insertelement <4 x float> %t128, float poison, i32 1
+  %t132 = insertelement <4 x float> %t130, float %t2, i32 2
+  %t134 = insertelement <4 x float> %t132, float %t1, i32 3
+  ret <4 x float> %t134
+}
+
+; This shouldn't turn into a single shuffle
+define <4 x float> @test8_undef(<4 x float> %x, <4 x float> %y) {
+; CHECK-LABEL: @test8_undef(
+; CHECK-NEXT:    [[T132:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> <float poison, float undef, float poison, float poison>, <4 x i32> <i32 1, i32 5, i32 3, i32 poison>
+; CHECK-NEXT:    [[T134:%.*]] = shufflevector <4 x float> [[T132]], <4 x float> [[Y:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT:    ret <4 x float> [[T134]]
 ;
   %t4 = extractelement <4 x float> %x, i32 1
   %t2 = extractelement <4 x float> %x, i32 3
diff --git a/llvm/test/Transforms/InstCombine/vec_shuffle.ll b/llvm/test/Transforms/InstCombine/vec_shuffle.ll
index 4d7e9d9067e75..8c91efb473fae 100644
--- a/llvm/test/Transforms/InstCombine/vec_shuffle.ll
+++ b/llvm/test/Transforms/InstCombine/vec_shuffle.ll
@@ -88,10 +88,11 @@ define <4 x float> @test7(<4 x float> %x) {
   ret <4 x float> %r
 }
 
-; This should turn into a single shuffle.
+; This should not turn into a single shuffle.
 define <4 x float> @test8(<4 x float> %x, <4 x float> %y) {
 ; CHECK-LABEL: @test8(
-; CHECK-NEXT:    [[T134:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x i32> <i32 1, i32 poison, i32 3, i32 4>
+; CHECK-NEXT:    [[T132:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> <float poison, float undef, float poison, float poison>, <4 x i32> <i32 1, i32 5, i32 3, i32 poison>
+; CHECK-NEXT:    [[T134:%.*]] = shufflevector <4 x float> [[T132]], <4 x float> [[Y:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 4>
 ; CHECK-NEXT:    ret <4 x float> [[T134]]
 ;
   %t4 = extractelement <4 x float> %x, i32 1

>From 8d5b7d4d11ea47b9903b634f4159986422855383 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov at redhat.com>
Date: Tue, 21 May 2024 16:07:46 +0200
Subject: [PATCH 23/27] [InstCombine] Use m_Poison() instead of m_Undef()
 (NFCI)

In this case, the isIdentityWithExtract() checks should already
guarantee that these are single-source shuffles, so this is just
for clarity.
---
 .../Transforms/InstCombine/InstCombineVectorOps.cpp | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 86e162e2f55d4..9a4230999e471 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -2416,13 +2416,13 @@ static Instruction *foldTruncShuffle(ShuffleVectorInst &Shuf,
 }
 
 /// Match a shuffle-select-shuffle pattern where the shuffles are widening and
-/// narrowing (concatenating with undef and extracting back to the original
+/// narrowing (concatenating with poison and extracting back to the original
 /// length). This allows replacing the wide select with a narrow select.
 static Instruction *narrowVectorSelect(ShuffleVectorInst &Shuf,
                                        InstCombiner::BuilderTy &Builder) {
   // This must be a narrowing identity shuffle. It extracts the 1st N elements
   // of the 1st vector operand of a shuffle.
-  if (!match(Shuf.getOperand(1), m_Undef()) || !Shuf.isIdentityWithExtract())
+  if (!match(Shuf.getOperand(1), m_Poison()) || !Shuf.isIdentityWithExtract())
     return nullptr;
 
   // The vector being shuffled must be a vector select that we can eliminate.
@@ -2432,19 +2432,20 @@ static Instruction *narrowVectorSelect(ShuffleVectorInst &Shuf,
              m_OneUse(m_Select(m_Value(Cond), m_Value(X), m_Value(Y)))))
     return nullptr;
 
-  // We need a narrow condition value. It must be extended with undef elements
+  // We need a narrow condition value. It must be extended with poison elements
   // and have the same number of elements as this shuffle.
   unsigned NarrowNumElts =
       cast<FixedVectorType>(Shuf.getType())->getNumElements();
   Value *NarrowCond;
-  if (!match(Cond, m_OneUse(m_Shuffle(m_Value(NarrowCond), m_Undef()))) ||
+  if (!match(Cond, m_OneUse(m_Shuffle(m_Value(NarrowCond), m_Poison()))) ||
       cast<FixedVectorType>(NarrowCond->getType())->getNumElements() !=
           NarrowNumElts ||
       !cast<ShuffleVectorInst>(Cond)->isIdentityWithPadding())
     return nullptr;
 
-  // shuf (sel (shuf NarrowCond, undef, WideMask), X, Y), undef, NarrowMask) -->
-  // sel NarrowCond, (shuf X, undef, NarrowMask), (shuf Y, undef, NarrowMask)
+  // shuf (sel (shuf NarrowCond, poison, WideMask), X, Y), poison, NarrowMask)
+  // -->
+  // sel NarrowCond, (shuf X, poison, NarrowMask), (shuf Y, poison, NarrowMask)
   Value *NarrowX = Builder.CreateShuffleVector(X, Shuf.getShuffleMask());
   Value *NarrowY = Builder.CreateShuffleVector(Y, Shuf.getShuffleMask());
   return SelectInst::Create(NarrowCond, NarrowX, NarrowY);

>From 7c640d1d43d7a151100e92c678757e0ce897bcc2 Mon Sep 17 00:00:00 2001
From: Vlad Serebrennikov <serebrennikov.vladislav at gmail.com>
Date: Tue, 21 May 2024 17:12:15 +0300
Subject: [PATCH 24/27] [clangd] Fix directory separators in
 `infinite-instatiation.test`

Another take at fixing https://lab.llvm.org/buildbot/#/builders/123/builds/26920 and https://lab.llvm.org/buildbot/#/builders/123/builds/26913 caused by #92888
---
 clang-tools-extra/clangd/test/infinite-instantiation.test | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang-tools-extra/clangd/test/infinite-instantiation.test b/clang-tools-extra/clangd/test/infinite-instantiation.test
index da1a294142429..d379a9c2d523e 100644
--- a/clang-tools-extra/clangd/test/infinite-instantiation.test
+++ b/clang-tools-extra/clangd/test/infinite-instantiation.test
@@ -1,5 +1,5 @@
 // RUN: rm -rf %t.dir && mkdir -p %t.dir
-// RUN: echo '[{"directory": "%t.dir", "command": "clang -ftemplate-depth=100 -x c++ %s", "file": "%s"}]' > %t.dir/compile_commands.json
+// RUN: echo '[{"directory": "%/t.dir", "command": "clang -ftemplate-depth=100 -x c++ %s", "file": "%/s"}]' > %t.dir/compile_commands.json
 // RUN: not clangd --compile-commands-dir=%t.dir -check=%s 2>&1 | FileCheck -strict-whitespace %s
 
 // CHECK: [template_recursion_depth_exceeded]

>From 530d4c9bf3d963f51375a1d7afb32f439d9c94a9 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov at redhat.com>
Date: Tue, 21 May 2024 16:14:57 +0200
Subject: [PATCH 25/27] [InstCombine] Use m_Poison() instead of m_Undef()
 (NFCI)

In this case the shuffle mask checks should already guarantee a
single-source shuffle, so this is just for clarity.
---
 llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 9a4230999e471..244f099f06542 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -1319,7 +1319,7 @@ static Instruction *foldInsEltIntoSplat(InsertElementInst &InsElt) {
 static Instruction *foldInsEltIntoIdentityShuffle(InsertElementInst &InsElt) {
   // Check if the vector operand of this insert is an identity shuffle.
   auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0));
-  if (!Shuf || !match(Shuf->getOperand(1), m_Undef()) ||
+  if (!Shuf || !match(Shuf->getOperand(1), m_Poison()) ||
       !(Shuf->isIdentityWithExtract() || Shuf->isIdentityWithPadding()))
     return nullptr;
 

>From 0651a99d57c2ef9e6a8e6bd9bb36e7a38be821fd Mon Sep 17 00:00:00 2001
From: "Mubashar.Ahmad at arm.com" <mubashar.ahmad at arm.com>
Date: Thu, 16 May 2024 12:28:34 +0000
Subject: [PATCH 26/27] [mlir][VectorOps] Add deinterleave operation to vector
 dialect

The deinterleave operation constructs two vectors from a single input
vector. Each new vector is the collection of even and odd elements
from the input, respectively. This is essentially the inverse of an
interleave operation.

Each output's size is half of the input vector's trailing dimension
for the n-D case and only dimension for 1-D cases. It is not possible
to conduct the operation on 0-D inputs or vectors where the size of
the (trailing) dimension is 1.

The operation supports scalable vectors.

Example:
```mlir
%0 = vector.deinterleave %a
           : vector<[4]xi32>     ; yields vector<[2]xi32>, vector<[2]xi32>
%1 = vector.deinterleave %b
           : vector<8xi8>        ; yields vector<4xi8>, vector<4xi8>
%2 = vector.deinterleave %c
           : vector<2x8xf32>     ; yields vector<2x4xf32>, vector<2x4xf32>
%3 = vector.deinterleave %d
           : vector<2x4x[6]xf64> ; yields vector<2x4x[3]xf64>, vector<2x4x[3]xf64>
```
---
 .../mlir/Dialect/Vector/IR/VectorOps.td       | 76 +++++++++++++++++++
 mlir/test/Dialect/Vector/invalid.mlir         | 56 ++++++++++++++
 mlir/test/Dialect/Vector/ops.mlir             | 42 ++++++++++
 3 files changed, 174 insertions(+)

diff --git a/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td b/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td
index 332b5ad08ced9..1e7e0a1715178 100644
--- a/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td
+++ b/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td
@@ -543,6 +543,82 @@ def Vector_InterleaveOp :
   }];
 }
 
+class ResultIsHalfSourceVectorType<string result> : TypesMatchWith<
+  "type of 'input' is double the width of results",
+  "input", result,
+  [{
+    [&]() -> ::mlir::VectorType {
+      auto vectorType = ::llvm::cast<mlir::VectorType>($_self);
+      ::mlir::VectorType::Builder builder(vectorType);
+      auto lastDim = vectorType.getRank() - 1;
+      auto newDimSize = vectorType.getDimSize(lastDim) / 2;;
+      if (newDimSize <= 0)
+         return vectorType; // (invalid input type)
+      return builder.setDim(lastDim, newDimSize);
+    }()
+  }]
+>;
+
+def Vector_DeinterleaveOp :
+  Vector_Op<"deinterleave", [Pure,
+    PredOpTrait<"trailing dimension of input vector must be an even number",
+    CPred<[{
+      [&](){
+        auto srcVec = getSourceVectorType();
+        return srcVec.getDimSize(srcVec.getRank() - 1) % 2 == 0;
+      }()
+    }]>>,
+    ResultIsHalfSourceVectorType<"res1">,
+    ResultIsHalfSourceVectorType<"res2">,
+    AllTypesMatch<["res1", "res2"]>
+    ]> {
+      let summary = "constructs two vectors by deinterleaving an input vector";
+      let description = [{
+        The deinterleave operation constructs two vectors from a single input
+        vector. Each new vector is the collection of even and odd elements
+        from the input, respectively. This is essentially the inverse of an
+        interleave operation.
+
+        Each output's size is half of the input vector's trailing dimension
+        for the n-D case and only dimension for 1-D cases. It is not possible
+        to conduct the operation on 0-D inputs or vectors where the size of
+        the (trailing) dimension is 1.
+
+        The operation supports scalable vectors.
+
+        Example:
+        ```mlir
+        %0 = vector.deinterleave %a
+                   : vector<[4]xi32>     ; yields vector<[2]xi32>, vector<[2]xi32>
+        %1 = vector.deinterleave %b
+                   : vector<8xi8>        ; yields vector<4xi8>, vector<4xi8>
+        %2 = vector.deinterleave %c
+                   : vector<2x8xf32>     ; yields vector<2x4xf32>, vector<2x4xf32>
+        %3 = vector.deinterleave %d
+                   : vector<2x4x[6]xf64> ; yields vector<2x4x[3]xf64>, vector<2x4x[3]xf64>
+        ```
+      }];
+
+      let arguments = (ins AnyVector:$input);
+      let results = (outs AnyVector:$res1, AnyVector:$res2);
+
+      let assemblyFormat = [{
+        $input attr-dict `:` type($input)
+      }];
+
+      let extraClassDeclaration = [{
+        VectorType getSourceVectorType() {
+          return ::llvm::cast<VectorType>(getInput().getType());
+        }
+        VectorType getResultOneVectorType() {
+          return ::llvm::cast<VectorType>(getRes1().getType());
+        }
+        VectorType getResultTwoVectorType() {
+          return ::llvm::cast<VectorType>(getRes2().getType());
+        }
+      }];
+    }
+
 def Vector_ExtractElementOp :
   Vector_Op<"extractelement", [Pure,
      TypesMatchWith<"result type matches element type of vector operand",
diff --git a/mlir/test/Dialect/Vector/invalid.mlir b/mlir/test/Dialect/Vector/invalid.mlir
index c9f7e9c6e2fb0..25cacc6fdf93d 100644
--- a/mlir/test/Dialect/Vector/invalid.mlir
+++ b/mlir/test/Dialect/Vector/invalid.mlir
@@ -1798,3 +1798,59 @@ func.func @invalid_outerproduct1(%src : memref<?xf32>) {
   // expected-error @+1 {{'vector.outerproduct' op expected 1-d vector for operand #1}}
   %op = vector.outerproduct %0, %1 : vector<[4]x[4]xf32>, vector<[4]xf32>
 }
+
+// -----
+
+func.func @deinterleave_zero_dim_fail(%vec : vector<f32>) {
+  // expected-error @+1 {{'vector.deinterleave' 'input' must be vector of any type values, but got 'vector<f32>'}}
+  %0, %1 = vector.deinterleave %vec : vector<f32> 
+  return
+}
+
+// -----
+
+func.func @deinterleave_one_dim_fail(%vec : vector<1xf32>) {
+  // expected-error @+1 {{'vector.deinterleave' op failed to verify that trailing dimension of input vector must be an even number}}
+  %0, %1 = vector.deinterleave %vec : vector<1xf32>
+  return
+}
+
+// -----
+
+func.func @deinterleave_oversized_output_fail(%vec : vector<4xf32>) {
+  // expected-error @+1 {{'vector.deinterleave' op failed to verify that type of 'input' is double the width of results}}
+  %0, %1 = "vector.deinterleave" (%vec) : (vector<4xf32>) -> (vector<8xf32>, vector<8xf32>)
+  return
+}
+
+// -----
+
+func.func @deinterleave_output_dim_size_mismatch(%vec : vector<4xf32>) {
+  // expected-error @+1 {{'vector.deinterleave' op failed to verify that type of 'input' is double the width of results}}
+  %0, %1 = "vector.deinterleave" (%vec) : (vector<4xf32>) -> (vector<4xf32>, vector<2xf32>)
+  return
+}
+
+// -----
+
+func.func @deinterleave_n_dim_rank_fail(%vec : vector<2x3x4xf32>) {
+  // expected-error @+1 {{'vector.deinterleave' op failed to verify that type of 'input' is double the width of results}}
+  %0, %1 = "vector.deinterleave" (%vec) : (vector<2x3x4xf32>) -> (vector<2x3x4xf32>, vector<2x3x2xf32>)
+  return
+}
+
+// -----
+
+func.func @deinterleave_scalable_dim_size_fail(%vec : vector<2x[4]xf32>) {
+  // expected-error @+1 {{'vector.deinterleave' op failed to verify that type of 'input' is double the width of results}}
+  %0, %1 = "vector.deinterleave" (%vec) : (vector<2x[4]xf32>) -> (vector<2x[2]xf32>, vector<2x[1]xf32>)
+  return
+}
+
+// -----
+
+func.func @deinterleave_scalable_rank_fail(%vec : vector<2x[4]xf32>) {
+  // expected-error @+1 {{'vector.deinterleave' op failed to verify that type of 'input' is double the width of results}}
+  %0, %1 = "vector.deinterleave" (%vec) : (vector<2x[4]xf32>) -> (vector<2x[2]xf32>, vector<[2]xf32>)
+  return
+}
\ No newline at end of file
diff --git a/mlir/test/Dialect/Vector/ops.mlir b/mlir/test/Dialect/Vector/ops.mlir
index 79a80be4f8b20..a6a992f23a4ba 100644
--- a/mlir/test/Dialect/Vector/ops.mlir
+++ b/mlir/test/Dialect/Vector/ops.mlir
@@ -1116,3 +1116,45 @@ func.func @interleave_2d_scalable(%a: vector<2x[2]xf64>, %b: vector<2x[2]xf64>)
   %0 = vector.interleave %a, %b : vector<2x[2]xf64>
   return %0 : vector<2x[4]xf64>
 }
+
+// CHECK-LABEL: @deinterleave_1d
+func.func @deinterleave_1d(%arg: vector<4xf32>) -> (vector<2xf32>, vector<2xf32>) {
+  // CHECK: vector.deinterleave %{{.*}} : vector<4xf32>
+  %0, %1 = vector.deinterleave %arg : vector<4xf32>
+  return %0, %1 : vector<2xf32>, vector<2xf32>
+}
+
+// CHECK-LABEL: @deinterleave_1d_scalable
+func.func @deinterleave_1d_scalable(%arg: vector<[4]xf32>) -> (vector<[2]xf32>, vector<[2]xf32>) {
+  // CHECK: vector.deinterleave %{{.*}} : vector<[4]xf32>
+  %0, %1 = vector.deinterleave %arg : vector<[4]xf32>
+  return %0, %1 : vector<[2]xf32>, vector<[2]xf32>
+}
+
+// CHECK-LABEL: @deinterleave_2d
+func.func @deinterleave_2d(%arg: vector<3x4xf32>) -> (vector<3x2xf32>, vector<3x2xf32>) {
+  // CHECK: vector.deinterleave %{{.*}} : vector<3x4xf32>
+  %0, %1 = vector.deinterleave %arg : vector<3x4xf32>
+  return %0, %1 : vector<3x2xf32>, vector<3x2xf32>
+}
+
+// CHECK-LABEL: @deinterleave_2d_scalable
+func.func @deinterleave_2d_scalable(%arg: vector<3x[4]xf32>) -> (vector<3x[2]xf32>, vector<3x[2]xf32>) {
+  // CHECK: vector.deinterleave %{{.*}} : vector<3x[4]xf32>
+  %0, %1 = vector.deinterleave %arg : vector<3x[4]xf32>
+  return %0, %1 : vector<3x[2]xf32>, vector<3x[2]xf32>
+}
+
+// CHECK-LABEL: @deinterleave_nd
+func.func @deinterleave_nd(%arg: vector<2x3x4x6xf32>) -> (vector<2x3x4x3xf32>, vector<2x3x4x3xf32>) {
+  // CHECK: vector.deinterleave %{{.*}} : vector<2x3x4x6xf32>
+  %0, %1 = vector.deinterleave %arg : vector<2x3x4x6xf32>
+  return %0, %1 : vector<2x3x4x3xf32>, vector<2x3x4x3xf32>
+}
+
+// CHECK-LABEL: @deinterleave_nd_scalable
+func.func @deinterleave_nd_scalable(%arg:vector<2x3x4x[6]xf32>) -> (vector<2x3x4x[3]xf32>, vector<2x3x4x[3]xf32>) {
+  // CHECK: vector.deinterleave %{{.*}} : vector<2x3x4x[6]xf32>
+  %0, %1 = vector.deinterleave %arg : vector<2x3x4x[6]xf32>
+  return %0, %1 : vector<2x3x4x[3]xf32>, vector<2x3x4x[3]xf32>
+}
\ No newline at end of file

>From b78f5a169b9ec42ac0a69601e81da0ca5710dea4 Mon Sep 17 00:00:00 2001
From: "Mubashar.Ahmad at arm.com" <mubashar.ahmad at arm.com>
Date: Thu, 16 May 2024 12:28:34 +0000
Subject: [PATCH 27/27] [mlir][VectorOps] Add deinterleave operation to vector
 dialect

The deinterleave operation constructs two vectors from a single input
vector. Each new vector is the collection of even and odd elements
from the input, respectively. This is essentially the inverse of an
interleave operation.

Each output's size is half of the input vector's trailing dimension
for the n-D case and only dimension for 1-D cases. It is not possible
to conduct the operation on 0-D inputs or vectors where the size of
the (trailing) dimension is 1.

The operation supports scalable vectors.

Example:
```mlir
%0 = vector.deinterleave %a
           : vector<[4]xi32> -> vector<[2]xi32>
%1 = vector.deinterleave %b
           : vector<8xi8> -> vector<4xi8>
%2 = vector.deinterleave %c
           : vector<2x8xf32> -> vector<2x4xf32>
%3 = vector.deinterleave %d
           : vector<2x4x[6]xf64> -> vector<2x4x[3]xf64>
```
---
 .../mlir/Dialect/Vector/IR/VectorOps.td       | 62 ++++++++++---------
 mlir/test/Dialect/Vector/invalid.mlir         | 20 +++---
 mlir/test/Dialect/Vector/ops.mlir             | 26 ++++----
 3 files changed, 56 insertions(+), 52 deletions(-)

diff --git a/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td b/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td
index 1e7e0a1715178..bfbb40405c3c1 100644
--- a/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td
+++ b/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td
@@ -544,8 +544,8 @@ def Vector_InterleaveOp :
 }
 
 class ResultIsHalfSourceVectorType<string result> : TypesMatchWith<
-  "type of 'input' is double the width of results",
-  "input", result,
+  "the trailing dimension of the results is half the width of source trailing dimension",
+  "source", result,
   [{
     [&]() -> ::mlir::VectorType {
       auto vectorType = ::llvm::cast<mlir::VectorType>($_self);
@@ -559,63 +559,67 @@ class ResultIsHalfSourceVectorType<string result> : TypesMatchWith<
   }]
 >;
 
-def Vector_DeinterleaveOp :
-  Vector_Op<"deinterleave", [Pure,
-    PredOpTrait<"trailing dimension of input vector must be an even number",
+def SourceVectorEvenElementCount : PredOpTrait<
+  "the trailing dimension of the source vector has an even number of elements",
     CPred<[{
       [&](){
         auto srcVec = getSourceVectorType();
         return srcVec.getDimSize(srcVec.getRank() - 1) % 2 == 0;
       }()
-    }]>>,
+    }]>
+>;
+
+def Vector_DeinterleaveOp :
+  Vector_Op<"deinterleave", [Pure,
+    SourceVectorEvenElementCount,
     ResultIsHalfSourceVectorType<"res1">,
-    ResultIsHalfSourceVectorType<"res2">,
     AllTypesMatch<["res1", "res2"]>
     ]> {
       let summary = "constructs two vectors by deinterleaving an input vector";
       let description = [{
         The deinterleave operation constructs two vectors from a single input
-        vector. Each new vector is the collection of even and odd elements
-        from the input, respectively. This is essentially the inverse of an
-        interleave operation.
+        vector. The first result vector contains the elements from even indexes
+        of the input, and the second contains elements from odd indexes. This is
+        the inverse of a `vector.interleave` operation.
 
-        Each output's size is half of the input vector's trailing dimension
-        for the n-D case and only dimension for 1-D cases. It is not possible
-        to conduct the operation on 0-D inputs or vectors where the size of
-        the (trailing) dimension is 1.
+        Each output's trailing dimension is half of the size of the input
+        vector's trailing dimension. This operation requires the input vector
+        to have a rank > 0 and an even number of elements in its trailing
+        dimension.
 
         The operation supports scalable vectors.
 
         Example:
         ```mlir
-        %0 = vector.deinterleave %a
-                   : vector<[4]xi32>     ; yields vector<[2]xi32>, vector<[2]xi32>
-        %1 = vector.deinterleave %b
-                   : vector<8xi8>        ; yields vector<4xi8>, vector<4xi8>
-        %2 = vector.deinterleave %c
-                   : vector<2x8xf32>     ; yields vector<2x4xf32>, vector<2x4xf32>
-        %3 = vector.deinterleave %d
-                   : vector<2x4x[6]xf64> ; yields vector<2x4x[3]xf64>, vector<2x4x[3]xf64>
+        %0, %1 = vector.deinterleave %a
+                   :vector<8xi8> -> vector<4xi8>
+        %2, %3 = vector.deinterleave %b
+                   : vector<2x8xi8> -> vector<2x4xi8>
+        %4, %5 = vector.deinterleave %b
+                   : vector<2x8x4xi8> -> vector<2x8x2xi8>
+        %6, %7 = vector.deinterleave %c
+                   : vector<[8]xf32> -> vector<[4]xf32>
+        %8, %9 = vector.deinterleave %d
+                   : vector<2x[6]xf64> -> vector<2x[3]xf64>
+        %10, %11 = vector.deinterleave %d
+                   : vector<2x4x[6]xf64> -> vector<2x4x[3]xf64>
         ```
       }];
 
-      let arguments = (ins AnyVector:$input);
+      let arguments = (ins AnyVector:$source);
       let results = (outs AnyVector:$res1, AnyVector:$res2);
 
       let assemblyFormat = [{
-        $input attr-dict `:` type($input)
+        $source attr-dict `:` type($source) `->` type($res1)
       }];
 
       let extraClassDeclaration = [{
         VectorType getSourceVectorType() {
-          return ::llvm::cast<VectorType>(getInput().getType());
+          return ::llvm::cast<VectorType>(getSource().getType());
         }
-        VectorType getResultOneVectorType() {
+        VectorType getResultVectorType() {
           return ::llvm::cast<VectorType>(getRes1().getType());
         }
-        VectorType getResultTwoVectorType() {
-          return ::llvm::cast<VectorType>(getRes2().getType());
-        }
       }];
     }
 
diff --git a/mlir/test/Dialect/Vector/invalid.mlir b/mlir/test/Dialect/Vector/invalid.mlir
index 25cacc6fdf93d..1516f51fe1458 100644
--- a/mlir/test/Dialect/Vector/invalid.mlir
+++ b/mlir/test/Dialect/Vector/invalid.mlir
@@ -1802,23 +1802,23 @@ func.func @invalid_outerproduct1(%src : memref<?xf32>) {
 // -----
 
 func.func @deinterleave_zero_dim_fail(%vec : vector<f32>) {
-  // expected-error @+1 {{'vector.deinterleave' 'input' must be vector of any type values, but got 'vector<f32>'}}
-  %0, %1 = vector.deinterleave %vec : vector<f32> 
+  // expected-error @+1 {{'vector.deinterleave' op operand #0 must be vector of any type values, but got 'vector<f32>}}
+  %0, %1 = vector.deinterleave %vec : vector<f32> -> vector<f32>
   return
 }
 
 // -----
 
 func.func @deinterleave_one_dim_fail(%vec : vector<1xf32>) {
-  // expected-error @+1 {{'vector.deinterleave' op failed to verify that trailing dimension of input vector must be an even number}}
-  %0, %1 = vector.deinterleave %vec : vector<1xf32>
+  // expected-error @+1 {{'vector.deinterleave' op failed to verify that the trailing dimension of the source vector has an even number of elements}}
+  %0, %1 = vector.deinterleave %vec : vector<1xf32> -> vector<1xf32>
   return
 }
 
 // -----
 
 func.func @deinterleave_oversized_output_fail(%vec : vector<4xf32>) {
-  // expected-error @+1 {{'vector.deinterleave' op failed to verify that type of 'input' is double the width of results}}
+  // expected-error @+1 {{'vector.deinterleave' op failed to verify that the trailing dimension of the results is half the width of source trailing dimension}}
   %0, %1 = "vector.deinterleave" (%vec) : (vector<4xf32>) -> (vector<8xf32>, vector<8xf32>)
   return
 }
@@ -1826,7 +1826,7 @@ func.func @deinterleave_oversized_output_fail(%vec : vector<4xf32>) {
 // -----
 
 func.func @deinterleave_output_dim_size_mismatch(%vec : vector<4xf32>) {
-  // expected-error @+1 {{'vector.deinterleave' op failed to verify that type of 'input' is double the width of results}}
+  // expected-error @+1 {{'vector.deinterleave' op failed to verify that the trailing dimension of the results is half the width of source trailing dimension}}
   %0, %1 = "vector.deinterleave" (%vec) : (vector<4xf32>) -> (vector<4xf32>, vector<2xf32>)
   return
 }
@@ -1834,7 +1834,7 @@ func.func @deinterleave_output_dim_size_mismatch(%vec : vector<4xf32>) {
 // -----
 
 func.func @deinterleave_n_dim_rank_fail(%vec : vector<2x3x4xf32>) {
-  // expected-error @+1 {{'vector.deinterleave' op failed to verify that type of 'input' is double the width of results}}
+  // expected-error @+1 {{'vector.deinterleave' op failed to verify that the trailing dimension of the results is half the width of source trailing dimension}}
   %0, %1 = "vector.deinterleave" (%vec) : (vector<2x3x4xf32>) -> (vector<2x3x4xf32>, vector<2x3x2xf32>)
   return
 }
@@ -1842,7 +1842,7 @@ func.func @deinterleave_n_dim_rank_fail(%vec : vector<2x3x4xf32>) {
 // -----
 
 func.func @deinterleave_scalable_dim_size_fail(%vec : vector<2x[4]xf32>) {
-  // expected-error @+1 {{'vector.deinterleave' op failed to verify that type of 'input' is double the width of results}}
+  // expected-error @+1 {{'vector.deinterleave' op failed to verify that all of {res1, res2} have same type}}
   %0, %1 = "vector.deinterleave" (%vec) : (vector<2x[4]xf32>) -> (vector<2x[2]xf32>, vector<2x[1]xf32>)
   return
 }
@@ -1850,7 +1850,7 @@ func.func @deinterleave_scalable_dim_size_fail(%vec : vector<2x[4]xf32>) {
 // -----
 
 func.func @deinterleave_scalable_rank_fail(%vec : vector<2x[4]xf32>) {
-  // expected-error @+1 {{'vector.deinterleave' op failed to verify that type of 'input' is double the width of results}}
+  // expected-error @+1 {{'vector.deinterleave' op failed to verify that all of {res1, res2} have same type}}
   %0, %1 = "vector.deinterleave" (%vec) : (vector<2x[4]xf32>) -> (vector<2x[2]xf32>, vector<[2]xf32>)
   return
-}
\ No newline at end of file
+}
diff --git a/mlir/test/Dialect/Vector/ops.mlir b/mlir/test/Dialect/Vector/ops.mlir
index a6a992f23a4ba..9d8101d3eee97 100644
--- a/mlir/test/Dialect/Vector/ops.mlir
+++ b/mlir/test/Dialect/Vector/ops.mlir
@@ -1119,42 +1119,42 @@ func.func @interleave_2d_scalable(%a: vector<2x[2]xf64>, %b: vector<2x[2]xf64>)
 
 // CHECK-LABEL: @deinterleave_1d
 func.func @deinterleave_1d(%arg: vector<4xf32>) -> (vector<2xf32>, vector<2xf32>) {
-  // CHECK: vector.deinterleave %{{.*}} : vector<4xf32>
-  %0, %1 = vector.deinterleave %arg : vector<4xf32>
+  // CHECK: vector.deinterleave %{{.*}} : vector<4xf32> -> vector<2xf32>
+  %0, %1 = vector.deinterleave %arg : vector<4xf32> -> vector<2xf32>
   return %0, %1 : vector<2xf32>, vector<2xf32>
 }
 
 // CHECK-LABEL: @deinterleave_1d_scalable
 func.func @deinterleave_1d_scalable(%arg: vector<[4]xf32>) -> (vector<[2]xf32>, vector<[2]xf32>) {
-  // CHECK: vector.deinterleave %{{.*}} : vector<[4]xf32>
-  %0, %1 = vector.deinterleave %arg : vector<[4]xf32>
+  // CHECK: vector.deinterleave %{{.*}} : vector<[4]xf32> -> vector<[2]xf32>
+  %0, %1 = vector.deinterleave %arg : vector<[4]xf32> -> vector<[2]xf32>
   return %0, %1 : vector<[2]xf32>, vector<[2]xf32>
 }
 
 // CHECK-LABEL: @deinterleave_2d
 func.func @deinterleave_2d(%arg: vector<3x4xf32>) -> (vector<3x2xf32>, vector<3x2xf32>) {
-  // CHECK: vector.deinterleave %{{.*}} : vector<3x4xf32>
-  %0, %1 = vector.deinterleave %arg : vector<3x4xf32>
+  // CHECK: vector.deinterleave %{{.*}} : vector<3x4xf32> -> vector<3x2xf32>
+  %0, %1 = vector.deinterleave %arg : vector<3x4xf32> -> vector<3x2xf32>
   return %0, %1 : vector<3x2xf32>, vector<3x2xf32>
 }
 
 // CHECK-LABEL: @deinterleave_2d_scalable
 func.func @deinterleave_2d_scalable(%arg: vector<3x[4]xf32>) -> (vector<3x[2]xf32>, vector<3x[2]xf32>) {
-  // CHECK: vector.deinterleave %{{.*}} : vector<3x[4]xf32>
-  %0, %1 = vector.deinterleave %arg : vector<3x[4]xf32>
+  // CHECK: vector.deinterleave %{{.*}} : vector<3x[4]xf32> -> vector<3x[2]xf32>
+  %0, %1 = vector.deinterleave %arg : vector<3x[4]xf32> -> vector<3x[2]xf32>
   return %0, %1 : vector<3x[2]xf32>, vector<3x[2]xf32>
 }
 
 // CHECK-LABEL: @deinterleave_nd
 func.func @deinterleave_nd(%arg: vector<2x3x4x6xf32>) -> (vector<2x3x4x3xf32>, vector<2x3x4x3xf32>) {
-  // CHECK: vector.deinterleave %{{.*}} : vector<2x3x4x6xf32>
-  %0, %1 = vector.deinterleave %arg : vector<2x3x4x6xf32>
+  // CHECK: vector.deinterleave %{{.*}} : vector<2x3x4x6xf32> -> vector<2x3x4x3xf32>
+  %0, %1 = vector.deinterleave %arg : vector<2x3x4x6xf32> -> vector<2x3x4x3xf32>
   return %0, %1 : vector<2x3x4x3xf32>, vector<2x3x4x3xf32>
 }
 
 // CHECK-LABEL: @deinterleave_nd_scalable
 func.func @deinterleave_nd_scalable(%arg:vector<2x3x4x[6]xf32>) -> (vector<2x3x4x[3]xf32>, vector<2x3x4x[3]xf32>) {
-  // CHECK: vector.deinterleave %{{.*}} : vector<2x3x4x[6]xf32>
-  %0, %1 = vector.deinterleave %arg : vector<2x3x4x[6]xf32>
+  // CHECK: vector.deinterleave %{{.*}} : vector<2x3x4x[6]xf32> -> vector<2x3x4x[3]xf32>
+  %0, %1 = vector.deinterleave %arg : vector<2x3x4x[6]xf32> -> vector<2x3x4x[3]xf32>
   return %0, %1 : vector<2x3x4x[3]xf32>, vector<2x3x4x[3]xf32>
-}
\ No newline at end of file
+}