[Mlir-commits] [clang] [mlir] [Clang][Sema] Avoid duplicate diagnostics for incomplete types in nested name specifier (C++20+) (PR #147036)

Fri Jul 4 03:44:45 PDT 2025

https://github.com/zhy-tju created https://github.com/llvm/llvm-project/pull/147036

Linked issue #147000
Clang currently emits duplicate diagnostics when encountering an incomplete
type in a nested name specifier (e.g., `incomplete::type`) in C++20 or later.
This is due to multiple semantic analysis paths (such as scope resolution
and qualified type building) triggering the same diagnostic.

This patch suppresses duplicate errors by recording diagnosed TagDecls
in a DenseSet within Sema (`IncompleteDiagSet`). If a TagDecl has already
triggered a diagnostic for being incomplete in a nested name specifier, it
will be skipped on subsequent checks.

>From 540eba76e0397f68ea9148abd2810d1b235bc557 Mon Sep 17 00:00:00 2001
From: zhy <2697737506 at qq.com>
Date: Thu, 3 Jul 2025 17:12:04 +0800
Subject: [PATCH 1/6] [mlir][amdgpu][docs] Add op examples to dialect docs

---
 mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td | 99 +++++++++++++++++++
 1 file changed, 99 insertions(+)

diff --git a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
index eadb5d9326798..dede906dcec1d 100644
--- a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
+++ b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
@@ -106,6 +106,15 @@ def AMDGPU_ExtPackedFp8Op :
     If the passed-in vector has fewer than four elements, or the input is scalar,
     the remaining values in the <4 x i8> will be filled with
     undefined values as needed.
+    #### Example
+    ```mlir
+    // Extract single FP8 element to scalar f32
+    %element = amdgpu.ext_packed_fp8 %src_vector[0] : vector<4xf8E4M3FNUZ> to f32
+
+    // Extract two FP8 elements to vector<2xf32>
+    %elements = amdgpu.ext_packed_fp8 %src_vector[0] : vector<4xf8E4M3FNUZ> to vector<2xf32>
+    ```
+
   }];
   let assemblyFormat = [{
     attr-dict $source `[` $index `]` `:` type($source) `to` type($res)
@@ -162,6 +171,11 @@ def AMDGPU_PackedTrunc2xFp8Op :
     sub-registers, and so the conversion intrinsics (which are currently the
     only way to work with 8-bit float types) take packed vectors of 4 8-bit
     values.
+    #### Example
+    ```mlir
+    %result = amdgpu.packed_trunc_2xfp8 %src1, %src2 into %dest[word 1] 
+  : f32 to vector<4xf8E5M2FNUZ> into vector<4xf8E5M2FNUZ>
+    ```
   }];
   let assemblyFormat = [{
     attr-dict $sourceA `,` ($sourceB^):(`undef`)?
@@ -220,6 +234,11 @@ def AMDGPU_PackedStochRoundFp8Op :
     sub-registers, and so the conversion intrinsics (which are currently the
     only way to work with 8-bit float types) take packed vectors of 4 8-bit
     values.
+    #### Example
+    ```mlir
+   %result = amdgpu.packed_stoch_round_fp8 %src + %stoch_seed into %dest[2] 
+  : f32 to vector<4xf8E5M2FNUZ> into vector<4xf8E5M2FNUZ>
+    ```
   }];
   let assemblyFormat = [{
     attr-dict $source `+` $stochiasticParam
@@ -275,6 +294,18 @@ def AMDGPU_FatRawBufferCastOp :
     If the value of the memref's offset is not uniform (independent of the lane/thread ID),
     this will lead to substantially decreased performance due to the need for
     a waterfall loop on the base address of the buffer resource.
+    #### Example
+    ```mlir
+  // Simple cast
+%converted = amdgpu.fat_raw_buffer_cast %src 
+  : memref<8xi32> to memref<8xi32, #amdgpu.address_space<fat_raw_buffer>>
+
+// Cast with memory attributes
+%converted = amdgpu.fat_raw_buffer_cast %src validBytes(%valid) 
+  cacheSwizzleStride(%swizzle) boundsCheck(false) resetOffset
+  : memref<8xi32, strided<[1], offset: ?>> 
+    to memref<8xi32, strided<[1]>, #amdgpu.address_space<fat_raw_buffer>>
+    ```
   }];
 
   let extraClassDeclaration = [{
@@ -333,6 +364,18 @@ def AMDGPU_RawBufferLoadOp :
     - If `boundsCheck` is false and the target chipset is RDNA, OOB_SELECT is set
       to 2 to disable bounds checks, otherwise it is 3
     - The cache coherency bits are off
+    #### Example
+    ```mlir
+  // Load scalar f32 from 1D buffer
+%scalar = amdgpu.raw_buffer_load %src[%idx] : memref<128xf32>, i32 -> f32
+
+// Load vector<4xf32> from 4D buffer
+%vector = amdgpu.raw_buffer_load %src[%idx0, %idx1, %idx2, %idx3] 
+  : memref<128x64x32x16xf32>, i32, i32, i32, i32 -> vector<4xf32>
+
+// Load from scalar buffer
+%value = amdgpu.raw_buffer_load %src[] : memref<f32> -> f32
+    ```
   }];
   let assemblyFormat = [{
     attr-dict $memref `[` $indices `]`
@@ -372,6 +415,18 @@ def AMDGPU_RawBufferStoreOp :
 
     See `amdgpu.raw_buffer_load` for a description of how the underlying
     instruction is constructed.
+    #### Example
+    ```mlir
+  // Store scalar f32 to 1D buffer
+amdgpu.raw_buffer_store %value -> %dst[%idx] : f32 -> memref<128xf32>, i32
+
+// Store vector<4xf32> to 4D buffer
+amdgpu.raw_buffer_store %vec -> %dst[%idx0, %idx1, %idx2, %idx3] 
+  : vector<4xf32> -> memref<128x64x32x16xf32>, i32, i32, i32, i32
+
+// Store to scalar buffer
+amdgpu.raw_buffer_store %value -> %dst[] : f32 -> memref<f32>
+    ```
   }];
   let assemblyFormat = [{
     attr-dict $value `->` $memref `[` $indices `]`
@@ -453,6 +508,16 @@ def AMDGPU_RawBufferAtomicFaddOp :
 
     See `amdgpu.raw_buffer_load` for a description of how the underlying
     instruction is constructed.
+     #### Example
+    ```mlir
+  // Atomic floating-point add
+amdgpu.raw_buffer_atomic_fadd %value -> %dst[%idx] 
+  : f32 -> memref<128xf32>, i32
+
+// Atomic compare-swap
+amdgpu.raw_buffer_atomic_cmpswap %src, %cmp -> %dst[%idx] 
+  : f32 -> memref<128xf32>, i32
+    ```
   }];
   let assemblyFormat = [{
     attr-dict $value `->` $memref `[` $indices `]`
@@ -651,6 +716,10 @@ def AMDGPU_SwizzleBitModeOp : AMDGPU_Op<"swizzle_bitmode",
   let results = (outs AnyIntegerOrFloatOr1DVector:$result);
   let assemblyFormat = [{
     $src $and_mask $or_mask $xor_mask attr-dict `:` type($result)
+   #### Example
+    ```mlir
+ %result = amdgpu.swizzle_bitmode %src 1 2 4 : f32
+    ```
   }];
 }
 
@@ -673,6 +742,10 @@ def AMDGPU_LDSBarrierOp : AMDGPU_Op<"lds_barrier"> {
     (those which will implement this barrier by emitting inline assembly),
     use of this operation will impede the usabiliity of memory watches (including
     breakpoints set on variables) when debugging.
+    #### Example
+    ```mlir
+  amdgpu.lds_barrier
+    ```
   }];
   let assemblyFormat = "attr-dict";
 }
@@ -711,6 +784,14 @@ def AMDGPU_SchedBarrierOp :
     `amdgpu.sched_barrier` serves as a barrier that could be
     configured to restrict movements of instructions through it as
     defined by sched_barrier_opts.
+    #### Example
+    ```mlir
+  // Barrier allowing no dependent instructions
+amdgpu.sched_barrier allow = <none>
+
+// Barrier allowing specific execution units
+amdgpu.sched_barrier allow = <valu|all_vmem>
+    ```
   }];
   let assemblyFormat = [{
     `allow` `=` $opts attr-dict
@@ -810,6 +891,12 @@ def AMDGPU_MFMAOp :
 
     The negateA, negateB, and negateC flags are only supported for double-precision
     operations on gfx94x.
+    #### Example
+    ```mlir
+  %result = amdgpu.mfma %a * %b + %c 
+  { abid = 1 : i32, cbsz = 1 : i32, k = 1 : i32, m = 32 : i32, n = 32 : i32, blocks = 2 : i32 } 
+  : f32, f32, vector<32xf32>
+    ```
   }];
   let assemblyFormat = [{
     $sourceA `*` $sourceB `+` $destC
@@ -851,6 +938,11 @@ def AMDGPU_WMMAOp :
 
     The `clamp` flag is used to saturate the output of type T to numeric_limits<T>::max()
     in case of overflow.
+    #### Example
+    ```mlir
+  %result = amdgpu.wmma %a * %b + %c 
+  : vector<16xf16>, vector<16xf16>, vector<8xf16>
+    ```
   }];
   let assemblyFormat = [{
     $sourceA `*` $sourceB `+` $destC
@@ -973,6 +1065,13 @@ def AMDGPU_ScaledMFMAOp :
     are omitted from this wrapper.
     - The `negateA`, `negateB`, and `negateC` flags in `amdgpu.mfma` are only supported for 
     double-precision operations on gfx94x and so are not included here. 
+    #### Example
+    ```mlir
+ %result = amdgpu.scaled_mfma 
+  (%scale_a[0] * %vec_a) * (%scale_b[1] * %vec_b) + %accum
+  { k = 64 : i32, m = 32 : i32, n = 32 : i32 } 
+  : f8E8M0FNU, vector<32xf6E2M3FN>, f8E8M0FNU, vector<32xf6E2M3FN>, vector<16xf32>
+    ```
   }];
   let assemblyFormat = [{
     `(` $scalesA `[` $scalesIdxA `]` `*` $sourceA `)` `*` `(` $scalesB `[` $scalesIdxB `]` `*` $sourceB `)` `+` $destC

>From 78cba79df37bc364d4d2165c0216a71dd9e42a52 Mon Sep 17 00:00:00 2001
From: zhy <2697737506 at qq.com>
Date: Thu, 3 Jul 2025 18:02:01 +0800
Subject: [PATCH 2/6] [mlir][amdgpu][docs] Add op examples to dialect docs

---
 mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td | 77 +++++++++----------
 1 file changed, 37 insertions(+), 40 deletions(-)

diff --git a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
index dede906dcec1d..49eee82b1471d 100644
--- a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
+++ b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
@@ -294,18 +294,18 @@ def AMDGPU_FatRawBufferCastOp :
     If the value of the memref's offset is not uniform (independent of the lane/thread ID),
     this will lead to substantially decreased performance due to the need for
     a waterfall loop on the base address of the buffer resource.
-    #### Example
-    ```mlir
+
+   #### Example
+   ```mlir
   // Simple cast
 %converted = amdgpu.fat_raw_buffer_cast %src 
   : memref<8xi32> to memref<8xi32, #amdgpu.address_space<fat_raw_buffer>>
-
 // Cast with memory attributes
 %converted = amdgpu.fat_raw_buffer_cast %src validBytes(%valid) 
   cacheSwizzleStride(%swizzle) boundsCheck(false) resetOffset
   : memref<8xi32, strided<[1], offset: ?>> 
     to memref<8xi32, strided<[1]>, #amdgpu.address_space<fat_raw_buffer>>
-    ```
+   ```
   }];
 
   let extraClassDeclaration = [{
@@ -366,15 +366,13 @@ def AMDGPU_RawBufferLoadOp :
     - The cache coherency bits are off
     #### Example
     ```mlir
-  // Load scalar f32 from 1D buffer
-%scalar = amdgpu.raw_buffer_load %src[%idx] : memref<128xf32>, i32 -> f32
-
-// Load vector<4xf32> from 4D buffer
-%vector = amdgpu.raw_buffer_load %src[%idx0, %idx1, %idx2, %idx3] 
-  : memref<128x64x32x16xf32>, i32, i32, i32, i32 -> vector<4xf32>
-
-// Load from scalar buffer
-%value = amdgpu.raw_buffer_load %src[] : memref<f32> -> f32
+    // Load scalar f32 from 1D buffer
+    %scalar = amdgpu.raw_buffer_load %src[%idx] : memref<128xf32>, i32 -> f32
+    // Load vector<4xf32> from 4D buffer
+    %vector = amdgpu.raw_buffer_load %src[%idx0, %idx1, %idx2, %idx3] 
+    : memref<128x64x32x16xf32>, i32, i32, i32, i32 -> vector<4xf32>
+    // Load from scalar buffer
+    %value = amdgpu.raw_buffer_load %src[] : memref<f32> -> f32
     ```
   }];
   let assemblyFormat = [{
@@ -417,15 +415,13 @@ def AMDGPU_RawBufferStoreOp :
     instruction is constructed.
     #### Example
     ```mlir
-  // Store scalar f32 to 1D buffer
-amdgpu.raw_buffer_store %value -> %dst[%idx] : f32 -> memref<128xf32>, i32
-
-// Store vector<4xf32> to 4D buffer
-amdgpu.raw_buffer_store %vec -> %dst[%idx0, %idx1, %idx2, %idx3] 
-  : vector<4xf32> -> memref<128x64x32x16xf32>, i32, i32, i32, i32
-
-// Store to scalar buffer
-amdgpu.raw_buffer_store %value -> %dst[] : f32 -> memref<f32>
+    // Store scalar f32 to 1D buffer
+    amdgpu.raw_buffer_store %value -> %dst[%idx] : f32 -> memref<128xf32>, i32
+    // Store vector<4xf32> to 4D buffer
+    amdgpu.raw_buffer_store %vec -> %dst[%idx0, %idx1, %idx2, %idx3] 
+    : vector<4xf32> -> memref<128x64x32x16xf32>, i32, i32, i32, i32
+    // Store to scalar buffer
+    amdgpu.raw_buffer_store %value -> %dst[] : f32 -> memref<f32>
     ```
   }];
   let assemblyFormat = [{
@@ -469,6 +465,12 @@ def AMDGPU_RawBufferAtomicCmpswapOp :
 
     See `amdgpu.raw_buffer_load` for a description of how the underlying
     instruction is constructed.
+    #### Example
+    ```mlir
+    // Atomic compare-swap
+    amdgpu.raw_buffer_atomic_cmpswap %src, %cmp -> %dst[%idx] 
+    : f32 -> memref<128xf32>, i32
+    ```
   }];
   let assemblyFormat = [{
     attr-dict $src `,` $cmp `->` $memref `[` $indices `]`
@@ -508,15 +510,11 @@ def AMDGPU_RawBufferAtomicFaddOp :
 
     See `amdgpu.raw_buffer_load` for a description of how the underlying
     instruction is constructed.
-     #### Example
+    #### Example
     ```mlir
-  // Atomic floating-point add
-amdgpu.raw_buffer_atomic_fadd %value -> %dst[%idx] 
-  : f32 -> memref<128xf32>, i32
-
-// Atomic compare-swap
-amdgpu.raw_buffer_atomic_cmpswap %src, %cmp -> %dst[%idx] 
-  : f32 -> memref<128xf32>, i32
+    // Atomic floating-point add
+    amdgpu.raw_buffer_atomic_fadd %value -> %dst[%idx] 
+    : f32 -> memref<128xf32>, i32
     ```
   }];
   let assemblyFormat = [{
@@ -712,15 +710,15 @@ def AMDGPU_SwizzleBitModeOp : AMDGPU_Op<"swizzle_bitmode",
 
     Supports arbitrary int/float/vector types, which will be repacked to i32 and
     one or more `rocdl.ds_swizzle` ops during lowering.
-  }];
-  let results = (outs AnyIntegerOrFloatOr1DVector:$result);
-  let assemblyFormat = [{
-    $src $and_mask $or_mask $xor_mask attr-dict `:` type($result)
-   #### Example
+    #### Example
     ```mlir
  %result = amdgpu.swizzle_bitmode %src 1 2 4 : f32
     ```
   }];
+  let results = (outs AnyIntegerOrFloatOr1DVector:$result);
+  let assemblyFormat = [{
+    $src $and_mask $or_mask $xor_mask attr-dict `:` type($result)
+    }];
 }
 
 def AMDGPU_LDSBarrierOp : AMDGPU_Op<"lds_barrier"> {
@@ -786,11 +784,10 @@ def AMDGPU_SchedBarrierOp :
     defined by sched_barrier_opts.
     #### Example
     ```mlir
-  // Barrier allowing no dependent instructions
-amdgpu.sched_barrier allow = <none>
-
-// Barrier allowing specific execution units
-amdgpu.sched_barrier allow = <valu|all_vmem>
+    // Barrier allowing no dependent instructions
+    amdgpu.sched_barrier allow = <none>
+    // Barrier allowing specific execution units
+    amdgpu.sched_barrier allow = <valu|all_vmem>
     ```
   }];
   let assemblyFormat = [{

>From 364aa4fded63dfef194ccc65e75a0b543235f406 Mon Sep 17 00:00:00 2001
From: zhy <2697737506 at qq.com>
Date: Fri, 4 Jul 2025 00:27:23 +0800
Subject: [PATCH 3/6] Format examples: add blank line before headings

---
 mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
index 49eee82b1471d..8ac73322c5513 100644
--- a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
+++ b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
@@ -106,6 +106,7 @@ def AMDGPU_ExtPackedFp8Op :
     If the passed-in vector has fewer than four elements, or the input is scalar,
     the remaining values in the <4 x i8> will be filled with
     undefined values as needed.
+
     #### Example
     ```mlir
     // Extract single FP8 element to scalar f32
@@ -171,6 +172,7 @@ def AMDGPU_PackedTrunc2xFp8Op :
     sub-registers, and so the conversion intrinsics (which are currently the
     only way to work with 8-bit float types) take packed vectors of 4 8-bit
     values.
+
     #### Example
     ```mlir
     %result = amdgpu.packed_trunc_2xfp8 %src1, %src2 into %dest[word 1] 
@@ -234,6 +236,7 @@ def AMDGPU_PackedStochRoundFp8Op :
     sub-registers, and so the conversion intrinsics (which are currently the
     only way to work with 8-bit float types) take packed vectors of 4 8-bit
     values.
+
     #### Example
     ```mlir
    %result = amdgpu.packed_stoch_round_fp8 %src + %stoch_seed into %dest[2] 
@@ -364,6 +367,7 @@ def AMDGPU_RawBufferLoadOp :
     - If `boundsCheck` is false and the target chipset is RDNA, OOB_SELECT is set
       to 2 to disable bounds checks, otherwise it is 3
     - The cache coherency bits are off
+
     #### Example
     ```mlir
     // Load scalar f32 from 1D buffer
@@ -413,6 +417,7 @@ def AMDGPU_RawBufferStoreOp :
 
     See `amdgpu.raw_buffer_load` for a description of how the underlying
     instruction is constructed.
+
     #### Example
     ```mlir
     // Store scalar f32 to 1D buffer
@@ -465,6 +470,7 @@ def AMDGPU_RawBufferAtomicCmpswapOp :
 
     See `amdgpu.raw_buffer_load` for a description of how the underlying
     instruction is constructed.
+
     #### Example
     ```mlir
     // Atomic compare-swap
@@ -510,6 +516,7 @@ def AMDGPU_RawBufferAtomicFaddOp :
 
     See `amdgpu.raw_buffer_load` for a description of how the underlying
     instruction is constructed.
+
     #### Example
     ```mlir
     // Atomic floating-point add
@@ -710,6 +717,7 @@ def AMDGPU_SwizzleBitModeOp : AMDGPU_Op<"swizzle_bitmode",
 
     Supports arbitrary int/float/vector types, which will be repacked to i32 and
     one or more `rocdl.ds_swizzle` ops during lowering.
+
     #### Example
     ```mlir
  %result = amdgpu.swizzle_bitmode %src 1 2 4 : f32
@@ -740,6 +748,7 @@ def AMDGPU_LDSBarrierOp : AMDGPU_Op<"lds_barrier"> {
     (those which will implement this barrier by emitting inline assembly),
     use of this operation will impede the usabiliity of memory watches (including
     breakpoints set on variables) when debugging.
+
     #### Example
     ```mlir
   amdgpu.lds_barrier
@@ -782,6 +791,7 @@ def AMDGPU_SchedBarrierOp :
     `amdgpu.sched_barrier` serves as a barrier that could be
     configured to restrict movements of instructions through it as
     defined by sched_barrier_opts.
+
     #### Example
     ```mlir
     // Barrier allowing no dependent instructions
@@ -888,6 +898,7 @@ def AMDGPU_MFMAOp :
 
     The negateA, negateB, and negateC flags are only supported for double-precision
     operations on gfx94x.
+
     #### Example
     ```mlir
   %result = amdgpu.mfma %a * %b + %c 
@@ -935,6 +946,7 @@ def AMDGPU_WMMAOp :
 
     The `clamp` flag is used to saturate the output of type T to numeric_limits<T>::max()
     in case of overflow.
+
     #### Example
     ```mlir
   %result = amdgpu.wmma %a * %b + %c 
@@ -1062,6 +1074,7 @@ def AMDGPU_ScaledMFMAOp :
     are omitted from this wrapper.
     - The `negateA`, `negateB`, and `negateC` flags in `amdgpu.mfma` are only supported for 
     double-precision operations on gfx94x and so are not included here. 
+
     #### Example
     ```mlir
  %result = amdgpu.scaled_mfma 

>From bf343cc9973bba1612f685adcbe80c4e139ef3e1 Mon Sep 17 00:00:00 2001
From: zhy <2697737506 at qq.com>
Date: Fri, 4 Jul 2025 16:50:03 +0800
Subject: [PATCH 4/6] [Clang] Duplicate diagnostics in C++20+ mode

---
 clang/include/clang/Sema/DeclSpec.h | 10 ++++++++++
 clang/lib/Sema/SemaCXXScopeSpec.cpp | 10 ++++++++++
 2 files changed, 20 insertions(+)

diff --git a/clang/include/clang/Sema/DeclSpec.h b/clang/include/clang/Sema/DeclSpec.h
index 6c4a32c4ac2f0..19ce8e3be21f6 100644
--- a/clang/include/clang/Sema/DeclSpec.h
+++ b/clang/include/clang/Sema/DeclSpec.h
@@ -76,6 +76,10 @@ class CXXScopeSpec {
   NestedNameSpecifierLocBuilder Builder;
   ArrayRef<TemplateParameterList *> TemplateParamLists;
 
+  /// Flag indicating whether an incomplete-type diagnostic
+  /// has already been emitted for this scope specifier.
+  bool HadIncompleteTypeError = false;
+
 public:
   SourceRange getRange() const { return Range; }
   void setRange(SourceRange R) { Range = R; }
@@ -83,6 +87,12 @@ class CXXScopeSpec {
   void setEndLoc(SourceLocation Loc) { Range.setEnd(Loc); }
   SourceLocation getBeginLoc() const { return Range.getBegin(); }
   SourceLocation getEndLoc() const { return Range.getEnd(); }
+  
+  /// Return true if an incomplete-type diagnostic has already been emitted.
+  bool hasIncompleteTypeError() const { return HadIncompleteTypeError; }
+
+  /// Mark that an incomplete-type error was emitted for this scope.
+  void setIncompleteTypeError(bool v = true) { HadIncompleteTypeError = v; }
 
   void setTemplateParamLists(ArrayRef<TemplateParameterList *> L) {
     TemplateParamLists = L;
diff --git a/clang/lib/Sema/SemaCXXScopeSpec.cpp b/clang/lib/Sema/SemaCXXScopeSpec.cpp
index ab83f625d2849..a95d2e83768e2 100644
--- a/clang/lib/Sema/SemaCXXScopeSpec.cpp
+++ b/clang/lib/Sema/SemaCXXScopeSpec.cpp
@@ -209,10 +209,20 @@ bool Sema::RequireCompleteDeclContext(CXXScopeSpec &SS,
   SourceLocation loc = SS.getLastQualifierNameLoc();
   if (loc.isInvalid()) loc = SS.getRange().getBegin();
 
+  // If an incomplete-type error has already been emitted for this scope,
+  // suppress duplicate diagnostics to avoid noisy repeated messages.
+  if (SS.hasIncompleteTypeError())
+    return true;
+
   // The type must be complete.
   if (RequireCompleteType(loc, type, diag::err_incomplete_nested_name_spec,
                           SS.getRange())) {
     SS.SetInvalid(SS.getRange());
+
+    // Remember that we've already diagnosed this incomplete type,
+    // so later checks won't emit redundant diagnostics.
+    SS.setIncompleteTypeError();
+
     return true;
   }
 

>From c4cdce8856bc9afcddc354f0218cfe16b81fa27a Mon Sep 17 00:00:00 2001
From: zhy <2697737506 at qq.com>
Date: Fri, 4 Jul 2025 17:19:50 +0800
Subject: [PATCH 5/6] [Clang] Duplicate diagnostics in C++20+ mode

---
 clang/test/SemaCXX/nested-name-spec.cpp | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/clang/test/SemaCXX/nested-name-spec.cpp b/clang/test/SemaCXX/nested-name-spec.cpp
index abeaba9d8dde2..df82d7a8dcf70 100644
--- a/clang/test/SemaCXX/nested-name-spec.cpp
+++ b/clang/test/SemaCXX/nested-name-spec.cpp
@@ -1,3 +1,10 @@
+// RUN: %clang_cc1 -std=c++20 -fsyntax-only -verify %s
+
+struct incomplete;
+incomplete::type var; // expected-error{{incomplete type 'incomplete' named in nested name specifier}}
+// expected-note at -2{{forward declaration of 'incomplete'}}
+
+
 // RUN: %clang_cc1 -fsyntax-only -std=c++98 -verify -fblocks %s
 namespace A {
   struct C {

>From ed0632e9413064fe135ed906833a8fbc39d15eb9 Mon Sep 17 00:00:00 2001
From: zhy <2697737506 at qq.com>
Date: Fri, 4 Jul 2025 18:27:28 +0800
Subject: [PATCH 6/6] [Clang] Duplicate diagnostics

---
 clang/include/clang/Sema/DeclSpec.h | 10 ----------
 clang/include/clang/Sema/Sema.h     |  6 ++++++
 clang/lib/Sema/SemaCXXScopeSpec.cpp | 17 ++++++++---------
 3 files changed, 14 insertions(+), 19 deletions(-)

diff --git a/clang/include/clang/Sema/DeclSpec.h b/clang/include/clang/Sema/DeclSpec.h
index 19ce8e3be21f6..6c4a32c4ac2f0 100644
--- a/clang/include/clang/Sema/DeclSpec.h
+++ b/clang/include/clang/Sema/DeclSpec.h
@@ -76,10 +76,6 @@ class CXXScopeSpec {
   NestedNameSpecifierLocBuilder Builder;
   ArrayRef<TemplateParameterList *> TemplateParamLists;
 
-  /// Flag indicating whether an incomplete-type diagnostic
-  /// has already been emitted for this scope specifier.
-  bool HadIncompleteTypeError = false;
-
 public:
   SourceRange getRange() const { return Range; }
   void setRange(SourceRange R) { Range = R; }
@@ -87,12 +83,6 @@ class CXXScopeSpec {
   void setEndLoc(SourceLocation Loc) { Range.setEnd(Loc); }
   SourceLocation getBeginLoc() const { return Range.getBegin(); }
   SourceLocation getEndLoc() const { return Range.getEnd(); }
-  
-  /// Return true if an incomplete-type diagnostic has already been emitted.
-  bool hasIncompleteTypeError() const { return HadIncompleteTypeError; }
-
-  /// Mark that an incomplete-type error was emitted for this scope.
-  void setIncompleteTypeError(bool v = true) { HadIncompleteTypeError = v; }
 
   void setTemplateParamLists(ArrayRef<TemplateParameterList *> L) {
     TemplateParamLists = L;
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index 3fe26f950ad51..1c7a67d32cf72 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -1555,6 +1555,12 @@ class Sema final : public SemaBase {
   Sema(const Sema &) = delete;
   void operator=(const Sema &) = delete;
 
+  /// Used to suppress duplicate diagnostics for incomplete types
+  /// in nested name specifiers (e.g. `incomplete::type`).
+  /// Without this, Clang may emit the same error multiple times
+  /// in C++20 or later, due to multiple semantic passes over the scope.
+  llvm::DenseSet<const TagDecl *> IncompleteDiagSet;
+
   /// The handler for the FileChanged preprocessor events.
   ///
   /// Used for diagnostics that implement custom semantic analysis for #include
diff --git a/clang/lib/Sema/SemaCXXScopeSpec.cpp b/clang/lib/Sema/SemaCXXScopeSpec.cpp
index a95d2e83768e2..8731f3cbbb8cd 100644
--- a/clang/lib/Sema/SemaCXXScopeSpec.cpp
+++ b/clang/lib/Sema/SemaCXXScopeSpec.cpp
@@ -206,22 +206,21 @@ bool Sema::RequireCompleteDeclContext(CXXScopeSpec &SS,
   if (tag->isBeingDefined())
     return false;
 
+  // Avoid emitting duplicate diagnostics for the same tag.
+  // This happens in C++20+ due to more aggressive semantic analysis.
+  if (IncompleteDiagSet.contains(tag))
+    return true;
+
   SourceLocation loc = SS.getLastQualifierNameLoc();
   if (loc.isInvalid()) loc = SS.getRange().getBegin();
 
-  // If an incomplete-type error has already been emitted for this scope,
-  // suppress duplicate diagnostics to avoid noisy repeated messages.
-  if (SS.hasIncompleteTypeError())
-    return true;
-
   // The type must be complete.
   if (RequireCompleteType(loc, type, diag::err_incomplete_nested_name_spec,
                           SS.getRange())) {
-    SS.SetInvalid(SS.getRange());
+    // mark as diagnosed
+    IncompleteDiagSet.insert(tag); 
 
-    // Remember that we've already diagnosed this incomplete type,
-    // so later checks won't emit redundant diagnostics.
-    SS.setIncompleteTypeError();
+    SS.SetInvalid(SS.getRange());
 
     return true;
   }