[Mlir-commits] [mlir] [mlir][LLVMIR][OpenMP] fix dominance for reduction init block (PR #96052)
Tom Eccles
llvmlistbot at llvm.org
Wed Jun 19 06:14:02 PDT 2024
https://github.com/tblah updated https://github.com/llvm/llvm-project/pull/96052
>From 51f371377ef0a257b36bd54fdfe340d9ad33968e Mon Sep 17 00:00:00 2001
From: Tom Eccles <tom.eccles at arm.com>
Date: Thu, 13 Jun 2024 14:11:13 +0000
Subject: [PATCH 1/3] [mlir][LLVMIR][OpenMP] fix dominance for reduction init
block
It was incorrect to set the insertion point to the init block after
inlining the initialization region because the code generated in the
init block depends upon the value yielded from the init region. When
there were multiple reduction initialization regions each with multiple
blocks, this could lead to the initilization region being inlined after
the init block which depends upon it.
Moving the insertion point to before inlining the initialization block
turned up further issues around the handling of the terminator for the
initialization block, which are also fixed here.
This fixes a bug in #92430 (but the affected code couldn't compile
before #92430 anyway).
---
.../OpenMP/OpenMPToLLVMIRTranslation.cpp | 21 +-
.../openmp-parallel-reduction-multiblock.mlir | 342 ++++++++++++++++++
.../LLVMIR/openmp-reduction-init-arg.mlir | 2 +-
3 files changed, 361 insertions(+), 4 deletions(-)
create mode 100644 mlir/test/Target/LLVMIR/openmp-parallel-reduction-multiblock.mlir
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index cbfc64972f38b..9fe63a9655be2 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -388,8 +388,18 @@ static LogicalResult inlineConvertOmpRegions(
// be processed multiple times.
moduleTranslation.forgetMapping(region);
- if (potentialTerminator && potentialTerminator->isTerminator())
- potentialTerminator->insertAfter(&builder.GetInsertBlock()->back());
+ if (potentialTerminator && potentialTerminator->isTerminator()) {
+ llvm::BasicBlock *block = builder.GetInsertBlock();
+ if (block->empty())
+ // this can happen for really simple reduction init regions e.g.
+ // %0 = llvm.mlir.constant(0 : i32) : i32
+ // omp.yield(%0 : i32)
+ // because the llvm.mlir.constant (MLIR op) isn't converted into any
+ // llvm op
+ potentialTerminator->insertInto(block, block->begin());
+ else
+ potentialTerminator->insertAfter(&block->back());
+ }
return success();
}
@@ -1171,6 +1181,8 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
}
}
+ builder.SetInsertPoint(initBlock->getFirstNonPHIOrDbgOrAlloca());
+
for (unsigned i = 0; i < opInst.getNumReductionVars(); ++i) {
SmallVector<llvm::Value *> phis;
@@ -1183,7 +1195,10 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
assert(phis.size() == 1 &&
"expected one value to be yielded from the "
"reduction neutral element declaration region");
- builder.SetInsertPoint(initBlock->getTerminator());
+
+ // mapInitializationArg finishes its block with a terminator. We need to
+ // insert before that terminator.
+ builder.SetInsertPoint(builder.GetInsertBlock()->getTerminator());
if (isByRef[i]) {
// Store the result of the inlined region to the allocated reduction var
diff --git a/mlir/test/Target/LLVMIR/openmp-parallel-reduction-multiblock.mlir b/mlir/test/Target/LLVMIR/openmp-parallel-reduction-multiblock.mlir
new file mode 100644
index 0000000000000..00020bd4c9d1e
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/openmp-parallel-reduction-multiblock.mlir
@@ -0,0 +1,342 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+// generated by flang-new:
+// subroutine missordered_blocks(x,y)
+// integer, allocatable :: x, y
+// !$omp parallel reduction(+:x,y)
+// x = 42
+// y = 24
+// !$omp end parallel
+// end subroutine
+
+// This is basically a test that we don't crash while translating this IR
+
+omp.declare_reduction @add_reduction_byref_box_heap_i32 : !llvm.ptr init {
+^bb0(%arg0: !llvm.ptr):
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ %1 = llvm.alloca %0 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> {alignment = 8 : i64} : (i32) -> !llvm.ptr
+ %2 = llvm.mlir.constant(1 : i32) : i32
+ %3 = llvm.alloca %2 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> {alignment = 8 : i64} : (i32) -> !llvm.ptr
+ %4 = llvm.mlir.constant(1 : i32) : i32
+ %5 = llvm.alloca %4 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> {alignment = 8 : i64} : (i32) -> !llvm.ptr
+ %6 = llvm.mlir.constant(0 : i64) : i64
+ %7 = llvm.mlir.constant(0 : i32) : i32
+ %8 = llvm.load %arg0 : !llvm.ptr -> !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+ llvm.store %8, %5 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>, !llvm.ptr
+ %9 = llvm.mlir.constant(1 : i64) : i64
+ %10 = llvm.alloca %9 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> : (i64) -> !llvm.ptr
+ %11 = llvm.getelementptr %5[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+ %12 = llvm.load %11 : !llvm.ptr -> !llvm.ptr
+ %13 = llvm.ptrtoint %12 : !llvm.ptr to i64
+ %14 = llvm.icmp "eq" %13, %6 : i64
+ llvm.cond_br %14, ^bb1, ^bb2
+^bb1: // pred: ^bb0
+ %15 = llvm.mlir.constant(9 : i32) : i32
+ %16 = llvm.mlir.zero : !llvm.ptr
+ %17 = llvm.getelementptr %16[1] : (!llvm.ptr) -> !llvm.ptr, i32
+ %18 = llvm.ptrtoint %17 : !llvm.ptr to i64
+ %19 = llvm.mlir.undef : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+ %20 = llvm.insertvalue %18, %19[1] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+ %21 = llvm.mlir.constant(20180515 : i32) : i32
+ %22 = llvm.insertvalue %21, %20[2] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+ %23 = llvm.mlir.constant(0 : i32) : i32
+ %24 = llvm.trunc %23 : i32 to i8
+ %25 = llvm.insertvalue %24, %22[3] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+ %26 = llvm.trunc %15 : i32 to i8
+ %27 = llvm.insertvalue %26, %25[4] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+ %28 = llvm.mlir.constant(2 : i32) : i32
+ %29 = llvm.trunc %28 : i32 to i8
+ %30 = llvm.insertvalue %29, %27[5] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+ %31 = llvm.mlir.constant(0 : i32) : i32
+ %32 = llvm.trunc %31 : i32 to i8
+ %33 = llvm.insertvalue %32, %30[6] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+ %34 = llvm.insertvalue %12, %33[0] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+ llvm.store %34, %3 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>, !llvm.ptr
+ %35 = llvm.load %3 : !llvm.ptr -> !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+ llvm.store %35, %10 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>, !llvm.ptr
+ llvm.br ^bb3
+^bb2: // pred: ^bb0
+ %36 = llvm.mlir.zero : !llvm.ptr
+ %37 = llvm.getelementptr %36[1] : (!llvm.ptr) -> !llvm.ptr, i32
+ %38 = llvm.ptrtoint %37 : !llvm.ptr to i64
+ //%39 = llvm.call @malloc(%38) {in_type = i32, operandSegmentSizes = array<i32: 0, 0>} : (i64) -> !llvm.ptr
+ %39 = llvm.mlir.zero : !llvm.ptr
+ llvm.store %7, %39 : i32, !llvm.ptr
+ %40 = llvm.mlir.constant(9 : i32) : i32
+ %41 = llvm.mlir.zero : !llvm.ptr
+ %42 = llvm.getelementptr %41[1] : (!llvm.ptr) -> !llvm.ptr, i32
+ %43 = llvm.ptrtoint %42 : !llvm.ptr to i64
+ %44 = llvm.mlir.undef : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+ %45 = llvm.insertvalue %43, %44[1] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+ %46 = llvm.mlir.constant(20180515 : i32) : i32
+ %47 = llvm.insertvalue %46, %45[2] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+ %48 = llvm.mlir.constant(0 : i32) : i32
+ %49 = llvm.trunc %48 : i32 to i8
+ %50 = llvm.insertvalue %49, %47[3] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+ %51 = llvm.trunc %40 : i32 to i8
+ %52 = llvm.insertvalue %51, %50[4] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+ %53 = llvm.mlir.constant(2 : i32) : i32
+ %54 = llvm.trunc %53 : i32 to i8
+ %55 = llvm.insertvalue %54, %52[5] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+ %56 = llvm.mlir.constant(0 : i32) : i32
+ %57 = llvm.trunc %56 : i32 to i8
+ %58 = llvm.insertvalue %57, %55[6] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+ %59 = llvm.insertvalue %39, %58[0] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+ llvm.store %59, %1 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>, !llvm.ptr
+ %60 = llvm.load %1 : !llvm.ptr -> !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+ llvm.store %60, %10 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>, !llvm.ptr
+ llvm.br ^bb3
+^bb3: // 2 preds: ^bb1, ^bb2
+ omp.yield(%10 : !llvm.ptr)
+} combiner {
+^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr):
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ %1 = llvm.alloca %0 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> {alignment = 8 : i64} : (i32) -> !llvm.ptr
+ %2 = llvm.mlir.constant(1 : i32) : i32
+ %3 = llvm.alloca %2 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> {alignment = 8 : i64} : (i32) -> !llvm.ptr
+ %4 = llvm.load %arg0 : !llvm.ptr -> !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+ llvm.store %4, %3 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>, !llvm.ptr
+ %5 = llvm.load %arg1 : !llvm.ptr -> !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+ llvm.store %5, %1 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>, !llvm.ptr
+ %6 = llvm.getelementptr %3[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+ %7 = llvm.load %6 : !llvm.ptr -> !llvm.ptr
+ %8 = llvm.getelementptr %1[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+ %9 = llvm.load %8 : !llvm.ptr -> !llvm.ptr
+ %10 = llvm.load %7 : !llvm.ptr -> i32
+ %11 = llvm.load %9 : !llvm.ptr -> i32
+ %12 = llvm.add %10, %11 : i32
+ llvm.store %12, %7 : i32, !llvm.ptr
+ omp.yield(%arg0 : !llvm.ptr)
+} cleanup {
+^bb0(%arg0: !llvm.ptr):
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ %1 = llvm.alloca %0 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> {alignment = 8 : i64} : (i32) -> !llvm.ptr
+ %2 = llvm.mlir.constant(0 : i64) : i64
+ %3 = llvm.load %arg0 : !llvm.ptr -> !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+ llvm.store %3, %1 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>, !llvm.ptr
+ %4 = llvm.getelementptr %1[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+ %5 = llvm.load %4 : !llvm.ptr -> !llvm.ptr
+ %6 = llvm.ptrtoint %5 : !llvm.ptr to i64
+ %7 = llvm.icmp "ne" %6, %2 : i64
+ llvm.cond_br %7, ^bb1, ^bb2
+^bb1: // pred: ^bb0
+ //llvm.call @free(%5) : (!llvm.ptr) -> ()
+ llvm.br ^bb2
+^bb2: // 2 preds: ^bb0, ^bb1
+ omp.yield
+}
+llvm.func @missordered_blocks_(%arg0: !llvm.ptr {fir.bindc_name = "x"}, %arg1: !llvm.ptr {fir.bindc_name = "y"}) attributes {fir.internal_name = "_QPmissordered_blocks", frame_pointer = #llvm.framePointerKind<"non-leaf">, target_cpu = "generic", target_features = #llvm.target_features<["+outline-atomics", "+v8a", "+fp-armv8", "+neon"]>} {
+ %0 = llvm.mlir.constant(24 : i32) : i32
+ %1 = llvm.mlir.constant(42 : i32) : i32
+ omp.parallel reduction(byref @add_reduction_byref_box_heap_i32 %arg0 -> %arg2 : !llvm.ptr, byref @add_reduction_byref_box_heap_i32 %arg1 -> %arg3 : !llvm.ptr) {
+ %2 = llvm.mlir.constant(1 : i32) : i32
+ %3 = llvm.alloca %2 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> {alignment = 8 : i64} : (i32) -> !llvm.ptr
+ %4 = llvm.mlir.constant(1 : i32) : i32
+ %5 = llvm.alloca %4 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> {alignment = 8 : i64} : (i32) -> !llvm.ptr
+ %6 = llvm.load %arg2 : !llvm.ptr -> !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+ llvm.store %6, %5 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>, !llvm.ptr
+ %7 = llvm.getelementptr %5[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+ %8 = llvm.load %7 : !llvm.ptr -> !llvm.ptr
+ llvm.store %1, %8 : i32, !llvm.ptr
+ %9 = llvm.load %arg3 : !llvm.ptr -> !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+ llvm.store %9, %3 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>, !llvm.ptr
+ %10 = llvm.getelementptr %3[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+ %11 = llvm.load %10 : !llvm.ptr -> !llvm.ptr
+ llvm.store %0, %11 : i32, !llvm.ptr
+ omp.terminator
+ }
+ llvm.return
+}
+
+
+// CHECK: %[[VAL_0:.*]] = alloca { ptr, ptr }, align 8
+// CHECK: br label %[[VAL_1:.*]]
+// CHECK: entry: ; preds = %[[VAL_2:.*]]
+// CHECK: %[[VAL_3:.*]] = call i32 @__kmpc_global_thread_num(ptr @1)
+// CHECK: br label %[[VAL_4:.*]]
+// CHECK: omp_parallel: ; preds = %[[VAL_1]]
+// CHECK: %[[VAL_5:.*]] = getelementptr { ptr, ptr }, ptr %[[VAL_0]], i32 0, i32 0
+// CHECK: store ptr %[[VAL_6:.*]], ptr %[[VAL_5]], align 8
+// CHECK: %[[VAL_7:.*]] = getelementptr { ptr, ptr }, ptr %[[VAL_0]], i32 0, i32 1
+// CHECK: store ptr %[[VAL_8:.*]], ptr %[[VAL_7]], align 8
+// CHECK: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @1, i32 1, ptr @missordered_blocks_..omp_par, ptr %[[VAL_0]])
+// CHECK: br label %[[VAL_9:.*]]
+// CHECK: omp.par.outlined.exit: ; preds = %[[VAL_4]]
+// CHECK: br label %[[VAL_10:.*]]
+// CHECK: omp.par.exit.split: ; preds = %[[VAL_9]]
+// CHECK: ret void
+// CHECK: omp.par.entry:
+// CHECK: %[[VAL_11:.*]] = getelementptr { ptr, ptr }, ptr %[[VAL_12:.*]], i32 0, i32 0
+// CHECK: %[[VAL_13:.*]] = load ptr, ptr %[[VAL_11]], align 8
+// CHECK: %[[VAL_14:.*]] = getelementptr { ptr, ptr }, ptr %[[VAL_12]], i32 0, i32 1
+// CHECK: %[[VAL_15:.*]] = load ptr, ptr %[[VAL_14]], align 8
+// CHECK: %[[VAL_16:.*]] = alloca i32, align 4
+// CHECK: %[[VAL_17:.*]] = load i32, ptr %[[VAL_18:.*]], align 4
+// CHECK: store i32 %[[VAL_17]], ptr %[[VAL_16]], align 4
+// CHECK: %[[VAL_19:.*]] = load i32, ptr %[[VAL_16]], align 4
+// CHECK: %[[VAL_20:.*]] = alloca ptr, align 8
+// CHECK: %[[VAL_21:.*]] = alloca ptr, align 8
+// CHECK: %[[VAL_22:.*]] = alloca [2 x ptr], align 8
+// CHECK: br label %[[VAL_23:.*]]
+// CHECK: omp.reduction.init: ; preds = %[[VAL_24:.*]]
+// CHECK: br label %[[VAL_25:.*]]
+// CHECK: omp.reduction.neutral: ; preds = %[[VAL_23]]
+// CHECK: %[[VAL_26:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8 }, align 8
+// CHECK: %[[VAL_27:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8 }, align 8
+// CHECK: %[[VAL_28:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8 }, align 8
+// CHECK: %[[VAL_29:.*]] = load { ptr, i64, i32, i8, i8, i8, i8 }, ptr %[[VAL_13]], align 8
+// CHECK: store { ptr, i64, i32, i8, i8, i8, i8 } %[[VAL_29]], ptr %[[VAL_28]], align 8
+// CHECK: %[[VAL_30:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8 }, i64 1, align 8
+// CHECK: %[[VAL_31:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8 }, ptr %[[VAL_28]], i32 0, i32 0
+// CHECK: %[[VAL_32:.*]] = load ptr, ptr %[[VAL_31]], align 8
+// CHECK: %[[VAL_33:.*]] = ptrtoint ptr %[[VAL_32]] to i64
+// CHECK: %[[VAL_34:.*]] = icmp eq i64 %[[VAL_33]], 0
+// CHECK: br i1 %[[VAL_34]], label %[[VAL_35:.*]], label %[[VAL_36:.*]]
+// CHECK: omp.reduction.neutral2: ; preds = %[[VAL_25]]
+// CHECK: store i32 0, ptr null, align 4
+// CHECK: store { ptr, i64, i32, i8, i8, i8, i8 } { ptr null, i64 ptrtoint (ptr getelementptr (i32, ptr null, i32 1) to i64), i32 20180515, i8 0, i8 9, i8 2, i8 0 }, ptr %[[VAL_26]], align 8
+// CHECK: %[[VAL_37:.*]] = load { ptr, i64, i32, i8, i8, i8, i8 }, ptr %[[VAL_26]], align 8
+// CHECK: store { ptr, i64, i32, i8, i8, i8, i8 } %[[VAL_37]], ptr %[[VAL_30]], align 8
+// CHECK: br label %[[VAL_38:.*]]
+// CHECK: omp.reduction.neutral3: ; preds = %[[VAL_35]], %[[VAL_36]]
+// CHECK: br label %[[VAL_39:.*]]
+// CHECK: omp.region.cont: ; preds = %[[VAL_38]]
+// CHECK: %[[VAL_40:.*]] = phi ptr [ %[[VAL_30]], %[[VAL_38]] ]
+// CHECK: store ptr %[[VAL_40]], ptr %[[VAL_20]], align 8
+// CHECK: br label %[[VAL_41:.*]]
+// CHECK: omp.reduction.neutral5: ; preds = %[[VAL_39]]
+// CHECK: %[[VAL_42:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8 }, align 8
+// CHECK: %[[VAL_43:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8 }, align 8
+// CHECK: %[[VAL_44:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8 }, align 8
+// CHECK: %[[VAL_45:.*]] = load { ptr, i64, i32, i8, i8, i8, i8 }, ptr %[[VAL_15]], align 8
+// CHECK: store { ptr, i64, i32, i8, i8, i8, i8 } %[[VAL_45]], ptr %[[VAL_44]], align 8
+// CHECK: %[[VAL_46:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8 }, i64 1, align 8
+// CHECK: %[[VAL_47:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8 }, ptr %[[VAL_44]], i32 0, i32 0
+// CHECK: %[[VAL_48:.*]] = load ptr, ptr %[[VAL_47]], align 8
+// CHECK: %[[VAL_49:.*]] = ptrtoint ptr %[[VAL_48]] to i64
+// CHECK: %[[VAL_50:.*]] = icmp eq i64 %[[VAL_49]], 0
+// CHECK: br i1 %[[VAL_50]], label %[[VAL_51:.*]], label %[[VAL_52:.*]]
+// CHECK: omp.reduction.neutral7: ; preds = %[[VAL_41]]
+// CHECK: store i32 0, ptr null, align 4
+// CHECK: store { ptr, i64, i32, i8, i8, i8, i8 } { ptr null, i64 ptrtoint (ptr getelementptr (i32, ptr null, i32 1) to i64), i32 20180515, i8 0, i8 9, i8 2, i8 0 }, ptr %[[VAL_42]], align 8
+// CHECK: %[[VAL_53:.*]] = load { ptr, i64, i32, i8, i8, i8, i8 }, ptr %[[VAL_42]], align 8
+// CHECK: store { ptr, i64, i32, i8, i8, i8, i8 } %[[VAL_53]], ptr %[[VAL_46]], align 8
+// CHECK: br label %[[VAL_54:.*]]
+// CHECK: omp.reduction.neutral8: ; preds = %[[VAL_51]], %[[VAL_52]]
+// CHECK: br label %[[VAL_55:.*]]
+// CHECK: omp.region.cont4: ; preds = %[[VAL_54]]
+// CHECK: %[[VAL_56:.*]] = phi ptr [ %[[VAL_46]], %[[VAL_54]] ]
+// CHECK: store ptr %[[VAL_56]], ptr %[[VAL_21]], align 8
+// CHECK: br label %[[VAL_57:.*]]
+// CHECK: omp.par.region: ; preds = %[[VAL_55]]
+// CHECK: br label %[[VAL_58:.*]]
+// CHECK: omp.par.region10: ; preds = %[[VAL_57]]
+// CHECK: %[[VAL_59:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8 }, align 8
+// CHECK: %[[VAL_60:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8 }, align 8
+// CHECK: %[[VAL_61:.*]] = load { ptr, i64, i32, i8, i8, i8, i8 }, ptr %[[VAL_40]], align 8
+// CHECK: store { ptr, i64, i32, i8, i8, i8, i8 } %[[VAL_61]], ptr %[[VAL_60]], align 8
+// CHECK: %[[VAL_62:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8 }, ptr %[[VAL_60]], i32 0, i32 0
+// CHECK: %[[VAL_63:.*]] = load ptr, ptr %[[VAL_62]], align 8
+// CHECK: store i32 42, ptr %[[VAL_63]], align 4
+// CHECK: %[[VAL_64:.*]] = load { ptr, i64, i32, i8, i8, i8, i8 }, ptr %[[VAL_56]], align 8
+// CHECK: store { ptr, i64, i32, i8, i8, i8, i8 } %[[VAL_64]], ptr %[[VAL_59]], align 8
+// CHECK: %[[VAL_65:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8 }, ptr %[[VAL_59]], i32 0, i32 0
+// CHECK: %[[VAL_66:.*]] = load ptr, ptr %[[VAL_65]], align 8
+// CHECK: store i32 24, ptr %[[VAL_66]], align 4
+// CHECK: br label %[[VAL_67:.*]]
+// CHECK: omp.region.cont9: ; preds = %[[VAL_58]]
+// CHECK: %[[VAL_68:.*]] = getelementptr inbounds [2 x ptr], ptr %[[VAL_22]], i64 0, i64 0
+// CHECK: store ptr %[[VAL_20]], ptr %[[VAL_68]], align 8
+// CHECK: %[[VAL_69:.*]] = getelementptr inbounds [2 x ptr], ptr %[[VAL_22]], i64 0, i64 1
+// CHECK: store ptr %[[VAL_21]], ptr %[[VAL_69]], align 8
+// CHECK: %[[VAL_70:.*]] = call i32 @__kmpc_global_thread_num(ptr @1)
+// CHECK: %[[VAL_71:.*]] = call i32 @__kmpc_reduce(ptr @1, i32 %[[VAL_70]], i32 2, i64 16, ptr %[[VAL_22]], ptr @.omp.reduction.func, ptr @.gomp_critical_user_.reduction.var)
+// CHECK: switch i32 %[[VAL_71]], label %[[VAL_72:.*]] [
+// CHECK: i32 1, label %[[VAL_73:.*]]
+// CHECK: i32 2, label %[[VAL_74:.*]]
+// CHECK: ]
+// CHECK: reduce.switch.atomic: ; preds = %[[VAL_67]]
+// CHECK: unreachable
+// CHECK: reduce.switch.nonatomic: ; preds = %[[VAL_67]]
+// CHECK: %[[VAL_75:.*]] = load ptr, ptr %[[VAL_20]], align 8
+// CHECK: %[[VAL_76:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8 }, align 8
+// CHECK: %[[VAL_77:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8 }, align 8
+// CHECK: %[[VAL_78:.*]] = load { ptr, i64, i32, i8, i8, i8, i8 }, ptr %[[VAL_13]], align 8
+// CHECK: store { ptr, i64, i32, i8, i8, i8, i8 } %[[VAL_78]], ptr %[[VAL_77]], align 8
+// CHECK: %[[VAL_79:.*]] = load { ptr, i64, i32, i8, i8, i8, i8 }, ptr %[[VAL_75]], align 8
+// CHECK: store { ptr, i64, i32, i8, i8, i8, i8 } %[[VAL_79]], ptr %[[VAL_76]], align 8
+// CHECK: %[[VAL_80:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8 }, ptr %[[VAL_77]], i32 0, i32 0
+// CHECK: %[[VAL_81:.*]] = load ptr, ptr %[[VAL_80]], align 8
+// CHECK: %[[VAL_82:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8 }, ptr %[[VAL_76]], i32 0, i32 0
+// CHECK: %[[VAL_83:.*]] = load ptr, ptr %[[VAL_82]], align 8
+// CHECK: %[[VAL_84:.*]] = load i32, ptr %[[VAL_81]], align 4
+// CHECK: %[[VAL_85:.*]] = load i32, ptr %[[VAL_83]], align 4
+// CHECK: %[[VAL_86:.*]] = add i32 %[[VAL_84]], %[[VAL_85]]
+// CHECK: store i32 %[[VAL_86]], ptr %[[VAL_81]], align 4
+// CHECK: %[[VAL_87:.*]] = load ptr, ptr %[[VAL_21]], align 8
+// CHECK: %[[VAL_88:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8 }, align 8
+// CHECK: %[[VAL_89:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8 }, align 8
+// CHECK: %[[VAL_90:.*]] = load { ptr, i64, i32, i8, i8, i8, i8 }, ptr %[[VAL_15]], align 8
+// CHECK: store { ptr, i64, i32, i8, i8, i8, i8 } %[[VAL_90]], ptr %[[VAL_89]], align 8
+// CHECK: %[[VAL_91:.*]] = load { ptr, i64, i32, i8, i8, i8, i8 }, ptr %[[VAL_87]], align 8
+// CHECK: store { ptr, i64, i32, i8, i8, i8, i8 } %[[VAL_91]], ptr %[[VAL_88]], align 8
+// CHECK: %[[VAL_92:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8 }, ptr %[[VAL_89]], i32 0, i32 0
+// CHECK: %[[VAL_93:.*]] = load ptr, ptr %[[VAL_92]], align 8
+// CHECK: %[[VAL_94:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8 }, ptr %[[VAL_88]], i32 0, i32 0
+// CHECK: %[[VAL_95:.*]] = load ptr, ptr %[[VAL_94]], align 8
+// CHECK: %[[VAL_96:.*]] = load i32, ptr %[[VAL_93]], align 4
+// CHECK: %[[VAL_97:.*]] = load i32, ptr %[[VAL_95]], align 4
+// CHECK: %[[VAL_98:.*]] = add i32 %[[VAL_96]], %[[VAL_97]]
+// CHECK: store i32 %[[VAL_98]], ptr %[[VAL_93]], align 4
+// CHECK: call void @__kmpc_end_reduce(ptr @1, i32 %[[VAL_70]], ptr @.gomp_critical_user_.reduction.var)
+// CHECK: br label %[[VAL_72]]
+// CHECK: reduce.finalize: ; preds = %[[VAL_73]], %[[VAL_67]]
+// CHECK: br label %[[VAL_99:.*]]
+// CHECK: omp.par.pre_finalize: ; preds = %[[VAL_72]]
+// CHECK: %[[VAL_100:.*]] = load ptr, ptr %[[VAL_20]], align 8
+// CHECK: br label %[[VAL_101:.*]]
+// CHECK: omp.reduction.cleanup: ; preds = %[[VAL_99]]
+// CHECK: %[[VAL_102:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8 }, align 8
+// CHECK: %[[VAL_103:.*]] = load { ptr, i64, i32, i8, i8, i8, i8 }, ptr %[[VAL_100]], align 8
+// CHECK: store { ptr, i64, i32, i8, i8, i8, i8 } %[[VAL_103]], ptr %[[VAL_102]], align 8
+// CHECK: %[[VAL_104:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8 }, ptr %[[VAL_102]], i32 0, i32 0
+// CHECK: %[[VAL_105:.*]] = load ptr, ptr %[[VAL_104]], align 8
+// CHECK: %[[VAL_106:.*]] = ptrtoint ptr %[[VAL_105]] to i64
+// CHECK: %[[VAL_107:.*]] = icmp ne i64 %[[VAL_106]], 0
+// CHECK: br i1 %[[VAL_107]], label %[[VAL_108:.*]], label %[[VAL_109:.*]]
+// CHECK: omp.reduction.cleanup14: ; preds = %[[VAL_108]], %[[VAL_101]]
+// CHECK: br label %[[VAL_110:.*]]
+// CHECK: omp.region.cont12: ; preds = %[[VAL_109]]
+// CHECK: %[[VAL_111:.*]] = load ptr, ptr %[[VAL_21]], align 8
+// CHECK: br label %[[VAL_112:.*]]
+// CHECK: omp.reduction.cleanup16: ; preds = %[[VAL_110]]
+// CHECK: %[[VAL_113:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8 }, align 8
+// CHECK: %[[VAL_114:.*]] = load { ptr, i64, i32, i8, i8, i8, i8 }, ptr %[[VAL_111]], align 8
+// CHECK: store { ptr, i64, i32, i8, i8, i8, i8 } %[[VAL_114]], ptr %[[VAL_113]], align 8
+// CHECK: %[[VAL_115:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8 }, ptr %[[VAL_113]], i32 0, i32 0
+// CHECK: %[[VAL_116:.*]] = load ptr, ptr %[[VAL_115]], align 8
+// CHECK: %[[VAL_117:.*]] = ptrtoint ptr %[[VAL_116]] to i64
+// CHECK: %[[VAL_118:.*]] = icmp ne i64 %[[VAL_117]], 0
+// CHECK: br i1 %[[VAL_118]], label %[[VAL_119:.*]], label %[[VAL_120:.*]]
+// CHECK: omp.reduction.cleanup18: ; preds = %[[VAL_119]], %[[VAL_112]]
+// CHECK: br label %[[VAL_121:.*]]
+// CHECK: omp.region.cont15: ; preds = %[[VAL_120]]
+// CHECK: br label %[[VAL_122:.*]]
+// CHECK: omp.reduction.cleanup17: ; preds = %[[VAL_112]]
+// CHECK: br label %[[VAL_120]]
+// CHECK: omp.reduction.cleanup13: ; preds = %[[VAL_101]]
+// CHECK: br label %[[VAL_109]]
+// CHECK: omp.reduction.neutral6: ; preds = %[[VAL_41]]
+// CHECK: %[[VAL_123:.*]] = insertvalue { ptr, i64, i32, i8, i8, i8, i8 } { ptr undef, i64 ptrtoint (ptr getelementptr (i32, ptr null, i32 1) to i64), i32 20180515, i8 0, i8 9, i8 2, i8 0 }, ptr %[[VAL_48]], 0
+// CHECK: store { ptr, i64, i32, i8, i8, i8, i8 } %[[VAL_123]], ptr %[[VAL_43]], align 8
+// CHECK: %[[VAL_124:.*]] = load { ptr, i64, i32, i8, i8, i8, i8 }, ptr %[[VAL_43]], align 8
+// CHECK: store { ptr, i64, i32, i8, i8, i8, i8 } %[[VAL_124]], ptr %[[VAL_46]], align 8
+// CHECK: br label %[[VAL_54]]
+// CHECK: omp.reduction.neutral1: ; preds = %[[VAL_25]]
+// CHECK: %[[VAL_125:.*]] = insertvalue { ptr, i64, i32, i8, i8, i8, i8 } { ptr undef, i64 ptrtoint (ptr getelementptr (i32, ptr null, i32 1) to i64), i32 20180515, i8 0, i8 9, i8 2, i8 0 }, ptr %[[VAL_32]], 0
+// CHECK: store { ptr, i64, i32, i8, i8, i8, i8 } %[[VAL_125]], ptr %[[VAL_27]], align 8
+// CHECK: %[[VAL_126:.*]] = load { ptr, i64, i32, i8, i8, i8, i8 }, ptr %[[VAL_27]], align 8
+// CHECK: store { ptr, i64, i32, i8, i8, i8, i8 } %[[VAL_126]], ptr %[[VAL_30]], align 8
+// CHECK: br label %[[VAL_38]]
+// CHECK: omp.par.outlined.exit.exitStub: ; preds = %[[VAL_121]]
+// CHECK: ret void
diff --git a/mlir/test/Target/LLVMIR/openmp-reduction-init-arg.mlir b/mlir/test/Target/LLVMIR/openmp-reduction-init-arg.mlir
index 361905f7cddeb..0f757de39a006 100644
--- a/mlir/test/Target/LLVMIR/openmp-reduction-init-arg.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-reduction-init-arg.mlir
@@ -61,10 +61,10 @@ module {
// CHECK: %[[VAL_19:.*]] = load i32, ptr %[[VAL_16]], align 4
// CHECK: %[[VAL_21:.*]] = alloca ptr, align 8
// CHECK: %[[VAL_23:.*]] = alloca ptr, align 8
-// CHECK: %[[VAL_20:.*]] = load { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[VAL_13]], align 8
// CHECK: %[[VAL_24:.*]] = alloca [2 x ptr], align 8
// CHECK: br label %[[INIT_LABEL:.*]]
// CHECK: [[INIT_LABEL]]:
+// CHECK: %[[VAL_20:.*]] = load { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[VAL_13]], align 8
// CHECK: store ptr %[[VAL_13]], ptr %[[VAL_21]], align 8
// CHECK: %[[VAL_22:.*]] = load { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[VAL_15]], align 8
// CHECK: store ptr %[[VAL_15]], ptr %[[VAL_23]], align 8
>From 5b39edff5d21242e3af64d863ac68cdd6b458f29 Mon Sep 17 00:00:00 2001
From: Tom Eccles <tom.eccles at arm.com>
Date: Wed, 19 Jun 2024 13:00:44 +0000
Subject: [PATCH 2/3] Add braces
---
.../LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 9fe63a9655be2..7793d5da952ef 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -390,15 +390,16 @@ static LogicalResult inlineConvertOmpRegions(
if (potentialTerminator && potentialTerminator->isTerminator()) {
llvm::BasicBlock *block = builder.GetInsertBlock();
- if (block->empty())
+ if (block->empty()) {
// this can happen for really simple reduction init regions e.g.
// %0 = llvm.mlir.constant(0 : i32) : i32
// omp.yield(%0 : i32)
// because the llvm.mlir.constant (MLIR op) isn't converted into any
// llvm op
potentialTerminator->insertInto(block, block->begin());
- else
+ } else {
potentialTerminator->insertAfter(&block->back());
+ }
}
return success();
>From 2be9ad26b63872eea8610f575ec13b4956be04af Mon Sep 17 00:00:00 2001
From: Tom Eccles <tom.eccles at arm.com>
Date: Wed, 19 Jun 2024 13:13:37 +0000
Subject: [PATCH 3/3] canonicalize and cse test
---
.../openmp-parallel-reduction-multiblock.mlir | 194 ++++++++----------
1 file changed, 86 insertions(+), 108 deletions(-)
diff --git a/mlir/test/Target/LLVMIR/openmp-parallel-reduction-multiblock.mlir b/mlir/test/Target/LLVMIR/openmp-parallel-reduction-multiblock.mlir
index 00020bd4c9d1e..4952b15287f81 100644
--- a/mlir/test/Target/LLVMIR/openmp-parallel-reduction-multiblock.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-parallel-reduction-multiblock.mlir
@@ -13,142 +13,120 @@
omp.declare_reduction @add_reduction_byref_box_heap_i32 : !llvm.ptr init {
^bb0(%arg0: !llvm.ptr):
- %0 = llvm.mlir.constant(1 : i32) : i32
- %1 = llvm.alloca %0 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> {alignment = 8 : i64} : (i32) -> !llvm.ptr
- %2 = llvm.mlir.constant(1 : i32) : i32
- %3 = llvm.alloca %2 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> {alignment = 8 : i64} : (i32) -> !llvm.ptr
- %4 = llvm.mlir.constant(1 : i32) : i32
- %5 = llvm.alloca %4 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> {alignment = 8 : i64} : (i32) -> !llvm.ptr
- %6 = llvm.mlir.constant(0 : i64) : i64
- %7 = llvm.mlir.constant(0 : i32) : i32
- %8 = llvm.load %arg0 : !llvm.ptr -> !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
- llvm.store %8, %5 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>, !llvm.ptr
- %9 = llvm.mlir.constant(1 : i64) : i64
- %10 = llvm.alloca %9 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> : (i64) -> !llvm.ptr
- %11 = llvm.getelementptr %5[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
- %12 = llvm.load %11 : !llvm.ptr -> !llvm.ptr
- %13 = llvm.ptrtoint %12 : !llvm.ptr to i64
- %14 = llvm.icmp "eq" %13, %6 : i64
- llvm.cond_br %14, ^bb1, ^bb2
+ %0 = llvm.mlir.constant(2 : i32) : i32
+ %1 = llvm.mlir.constant(20180515 : i32) : i32
+ %2 = llvm.mlir.undef : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+ %3 = llvm.mlir.zero : !llvm.ptr
+ %4 = llvm.mlir.constant(9 : i32) : i32
+ %5 = llvm.mlir.constant(1 : i64) : i64
+ %6 = llvm.mlir.constant(0 : i32) : i32
+ %7 = llvm.mlir.constant(0 : i64) : i64
+ %8 = llvm.mlir.constant(1 : i32) : i32
+ %9 = llvm.alloca %8 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> {alignment = 8 : i64} : (i32) -> !llvm.ptr
+ %10 = llvm.alloca %8 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> {alignment = 8 : i64} : (i32) -> !llvm.ptr
+ %11 = llvm.alloca %8 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> {alignment = 8 : i64} : (i32) -> !llvm.ptr
+ %12 = llvm.load %arg0 : !llvm.ptr -> !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+ llvm.store %12, %11 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>, !llvm.ptr
+ %13 = llvm.alloca %5 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> : (i64) -> !llvm.ptr
+ %14 = llvm.getelementptr %11[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+ %15 = llvm.load %14 : !llvm.ptr -> !llvm.ptr
+ %16 = llvm.ptrtoint %15 : !llvm.ptr to i64
+ %17 = llvm.icmp "eq" %16, %7 : i64
+ llvm.cond_br %17, ^bb1, ^bb2
^bb1: // pred: ^bb0
- %15 = llvm.mlir.constant(9 : i32) : i32
- %16 = llvm.mlir.zero : !llvm.ptr
- %17 = llvm.getelementptr %16[1] : (!llvm.ptr) -> !llvm.ptr, i32
- %18 = llvm.ptrtoint %17 : !llvm.ptr to i64
- %19 = llvm.mlir.undef : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
- %20 = llvm.insertvalue %18, %19[1] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
- %21 = llvm.mlir.constant(20180515 : i32) : i32
- %22 = llvm.insertvalue %21, %20[2] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
- %23 = llvm.mlir.constant(0 : i32) : i32
- %24 = llvm.trunc %23 : i32 to i8
- %25 = llvm.insertvalue %24, %22[3] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
- %26 = llvm.trunc %15 : i32 to i8
- %27 = llvm.insertvalue %26, %25[4] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
- %28 = llvm.mlir.constant(2 : i32) : i32
- %29 = llvm.trunc %28 : i32 to i8
- %30 = llvm.insertvalue %29, %27[5] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
- %31 = llvm.mlir.constant(0 : i32) : i32
- %32 = llvm.trunc %31 : i32 to i8
- %33 = llvm.insertvalue %32, %30[6] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
- %34 = llvm.insertvalue %12, %33[0] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
- llvm.store %34, %3 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>, !llvm.ptr
- %35 = llvm.load %3 : !llvm.ptr -> !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
- llvm.store %35, %10 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>, !llvm.ptr
+ %18 = llvm.getelementptr %3[1] : (!llvm.ptr) -> !llvm.ptr, i32
+ %19 = llvm.ptrtoint %18 : !llvm.ptr to i64
+ %20 = llvm.insertvalue %19, %2[1] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+ %21 = llvm.insertvalue %1, %20[2] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+ %22 = llvm.trunc %6 : i32 to i8
+ %23 = llvm.insertvalue %22, %21[3] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+ %24 = llvm.trunc %4 : i32 to i8
+ %25 = llvm.insertvalue %24, %23[4] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+ %26 = llvm.trunc %0 : i32 to i8
+ %27 = llvm.insertvalue %26, %25[5] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+ %28 = llvm.insertvalue %22, %27[6] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+ %29 = llvm.insertvalue %15, %28[0] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+ llvm.store %29, %10 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>, !llvm.ptr
+ %30 = llvm.load %10 : !llvm.ptr -> !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+ llvm.store %30, %13 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>, !llvm.ptr
llvm.br ^bb3
^bb2: // pred: ^bb0
- %36 = llvm.mlir.zero : !llvm.ptr
- %37 = llvm.getelementptr %36[1] : (!llvm.ptr) -> !llvm.ptr, i32
- %38 = llvm.ptrtoint %37 : !llvm.ptr to i64
- //%39 = llvm.call @malloc(%38) {in_type = i32, operandSegmentSizes = array<i32: 0, 0>} : (i64) -> !llvm.ptr
- %39 = llvm.mlir.zero : !llvm.ptr
- llvm.store %7, %39 : i32, !llvm.ptr
- %40 = llvm.mlir.constant(9 : i32) : i32
- %41 = llvm.mlir.zero : !llvm.ptr
- %42 = llvm.getelementptr %41[1] : (!llvm.ptr) -> !llvm.ptr, i32
- %43 = llvm.ptrtoint %42 : !llvm.ptr to i64
- %44 = llvm.mlir.undef : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
- %45 = llvm.insertvalue %43, %44[1] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
- %46 = llvm.mlir.constant(20180515 : i32) : i32
- %47 = llvm.insertvalue %46, %45[2] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
- %48 = llvm.mlir.constant(0 : i32) : i32
- %49 = llvm.trunc %48 : i32 to i8
- %50 = llvm.insertvalue %49, %47[3] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
- %51 = llvm.trunc %40 : i32 to i8
- %52 = llvm.insertvalue %51, %50[4] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
- %53 = llvm.mlir.constant(2 : i32) : i32
- %54 = llvm.trunc %53 : i32 to i8
- %55 = llvm.insertvalue %54, %52[5] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
- %56 = llvm.mlir.constant(0 : i32) : i32
- %57 = llvm.trunc %56 : i32 to i8
- %58 = llvm.insertvalue %57, %55[6] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
- %59 = llvm.insertvalue %39, %58[0] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
- llvm.store %59, %1 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>, !llvm.ptr
- %60 = llvm.load %1 : !llvm.ptr -> !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
- llvm.store %60, %10 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>, !llvm.ptr
+ %31 = llvm.getelementptr %3[1] : (!llvm.ptr) -> !llvm.ptr, i32
+ llvm.store %6, %3 : i32, !llvm.ptr
+ %32 = llvm.ptrtoint %31 : !llvm.ptr to i64
+ %33 = llvm.insertvalue %32, %2[1] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+ %34 = llvm.insertvalue %1, %33[2] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+ %35 = llvm.trunc %6 : i32 to i8
+ %36 = llvm.insertvalue %35, %34[3] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+ %37 = llvm.trunc %4 : i32 to i8
+ %38 = llvm.insertvalue %37, %36[4] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+ %39 = llvm.trunc %0 : i32 to i8
+ %40 = llvm.insertvalue %39, %38[5] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+ %41 = llvm.insertvalue %35, %40[6] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+ %42 = llvm.insertvalue %3, %41[0] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+ llvm.store %42, %9 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>, !llvm.ptr
+ %43 = llvm.load %9 : !llvm.ptr -> !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+ llvm.store %43, %13 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>, !llvm.ptr
llvm.br ^bb3
^bb3: // 2 preds: ^bb1, ^bb2
- omp.yield(%10 : !llvm.ptr)
+ omp.yield(%13 : !llvm.ptr)
} combiner {
^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr):
%0 = llvm.mlir.constant(1 : i32) : i32
%1 = llvm.alloca %0 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> {alignment = 8 : i64} : (i32) -> !llvm.ptr
- %2 = llvm.mlir.constant(1 : i32) : i32
- %3 = llvm.alloca %2 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> {alignment = 8 : i64} : (i32) -> !llvm.ptr
- %4 = llvm.load %arg0 : !llvm.ptr -> !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
- llvm.store %4, %3 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>, !llvm.ptr
- %5 = llvm.load %arg1 : !llvm.ptr -> !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
- llvm.store %5, %1 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>, !llvm.ptr
- %6 = llvm.getelementptr %3[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
- %7 = llvm.load %6 : !llvm.ptr -> !llvm.ptr
- %8 = llvm.getelementptr %1[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
- %9 = llvm.load %8 : !llvm.ptr -> !llvm.ptr
- %10 = llvm.load %7 : !llvm.ptr -> i32
- %11 = llvm.load %9 : !llvm.ptr -> i32
- %12 = llvm.add %10, %11 : i32
- llvm.store %12, %7 : i32, !llvm.ptr
+ %2 = llvm.alloca %0 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> {alignment = 8 : i64} : (i32) -> !llvm.ptr
+ %3 = llvm.load %arg0 : !llvm.ptr -> !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+ llvm.store %3, %2 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>, !llvm.ptr
+ %4 = llvm.load %arg1 : !llvm.ptr -> !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+ llvm.store %4, %1 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>, !llvm.ptr
+ %5 = llvm.getelementptr %2[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+ %6 = llvm.load %5 : !llvm.ptr -> !llvm.ptr
+ %7 = llvm.getelementptr %1[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+ %8 = llvm.load %7 : !llvm.ptr -> !llvm.ptr
+ %9 = llvm.load %6 : !llvm.ptr -> i32
+ %10 = llvm.load %8 : !llvm.ptr -> i32
+ %11 = llvm.add %9, %10 : i32
+ llvm.store %11, %6 : i32, !llvm.ptr
omp.yield(%arg0 : !llvm.ptr)
} cleanup {
^bb0(%arg0: !llvm.ptr):
- %0 = llvm.mlir.constant(1 : i32) : i32
- %1 = llvm.alloca %0 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> {alignment = 8 : i64} : (i32) -> !llvm.ptr
- %2 = llvm.mlir.constant(0 : i64) : i64
+ %0 = llvm.mlir.constant(0 : i64) : i64
+ %1 = llvm.mlir.constant(1 : i32) : i32
+ %2 = llvm.alloca %1 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> {alignment = 8 : i64} : (i32) -> !llvm.ptr
%3 = llvm.load %arg0 : !llvm.ptr -> !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
- llvm.store %3, %1 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>, !llvm.ptr
- %4 = llvm.getelementptr %1[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+ llvm.store %3, %2 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>, !llvm.ptr
+ %4 = llvm.getelementptr %2[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
%5 = llvm.load %4 : !llvm.ptr -> !llvm.ptr
%6 = llvm.ptrtoint %5 : !llvm.ptr to i64
- %7 = llvm.icmp "ne" %6, %2 : i64
+ %7 = llvm.icmp "ne" %6, %0 : i64
llvm.cond_br %7, ^bb1, ^bb2
^bb1: // pred: ^bb0
- //llvm.call @free(%5) : (!llvm.ptr) -> ()
llvm.br ^bb2
^bb2: // 2 preds: ^bb0, ^bb1
omp.yield
}
llvm.func @missordered_blocks_(%arg0: !llvm.ptr {fir.bindc_name = "x"}, %arg1: !llvm.ptr {fir.bindc_name = "y"}) attributes {fir.internal_name = "_QPmissordered_blocks", frame_pointer = #llvm.framePointerKind<"non-leaf">, target_cpu = "generic", target_features = #llvm.target_features<["+outline-atomics", "+v8a", "+fp-armv8", "+neon"]>} {
- %0 = llvm.mlir.constant(24 : i32) : i32
- %1 = llvm.mlir.constant(42 : i32) : i32
+ %0 = llvm.mlir.constant(1 : i32) : i32
+ %1 = llvm.mlir.constant(24 : i32) : i32
+ %2 = llvm.mlir.constant(42 : i32) : i32
omp.parallel reduction(byref @add_reduction_byref_box_heap_i32 %arg0 -> %arg2 : !llvm.ptr, byref @add_reduction_byref_box_heap_i32 %arg1 -> %arg3 : !llvm.ptr) {
- %2 = llvm.mlir.constant(1 : i32) : i32
- %3 = llvm.alloca %2 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> {alignment = 8 : i64} : (i32) -> !llvm.ptr
- %4 = llvm.mlir.constant(1 : i32) : i32
- %5 = llvm.alloca %4 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> {alignment = 8 : i64} : (i32) -> !llvm.ptr
- %6 = llvm.load %arg2 : !llvm.ptr -> !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
- llvm.store %6, %5 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>, !llvm.ptr
- %7 = llvm.getelementptr %5[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
- %8 = llvm.load %7 : !llvm.ptr -> !llvm.ptr
- llvm.store %1, %8 : i32, !llvm.ptr
- %9 = llvm.load %arg3 : !llvm.ptr -> !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
- llvm.store %9, %3 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>, !llvm.ptr
- %10 = llvm.getelementptr %3[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
- %11 = llvm.load %10 : !llvm.ptr -> !llvm.ptr
- llvm.store %0, %11 : i32, !llvm.ptr
+ %3 = llvm.alloca %0 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> {alignment = 8 : i64} : (i32) -> !llvm.ptr
+ %4 = llvm.alloca %0 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> {alignment = 8 : i64} : (i32) -> !llvm.ptr
+ %5 = llvm.load %arg2 : !llvm.ptr -> !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+ llvm.store %5, %4 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>, !llvm.ptr
+ %6 = llvm.getelementptr %4[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+ %7 = llvm.load %6 : !llvm.ptr -> !llvm.ptr
+ llvm.store %2, %7 : i32, !llvm.ptr
+ %8 = llvm.load %arg3 : !llvm.ptr -> !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+ llvm.store %8, %3 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>, !llvm.ptr
+ %9 = llvm.getelementptr %3[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>
+ %10 = llvm.load %9 : !llvm.ptr -> !llvm.ptr
+ llvm.store %1, %10 : i32, !llvm.ptr
omp.terminator
}
llvm.return
}
-
// CHECK: %[[VAL_0:.*]] = alloca { ptr, ptr }, align 8
// CHECK: br label %[[VAL_1:.*]]
// CHECK: entry: ; preds = %[[VAL_2:.*]]
More information about the Mlir-commits
mailing list