================ @@ -685,17 +685,22 @@ template <typename Ty> class StaticLoopChunker { Ty KernelIteration = NumBlocks * BlockChunk; ---------------- DominikAdamski wrote: Current implementation matches clang chunking scheme. https://github.com/llvm/llvm-project/pull/81618