[llvm] 9d81ccc - [WebAssembly] Enable loop unrolling
Sam Parker via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 10 00:26:48 PST 2021
Author: Sam Parker
Date: 2021-02-10T08:25:46Z
New Revision: 9d81ccc02ffb154cd5ee7ade21740dc4a45f4261
URL: https://github.com/llvm/llvm-project/commit/9d81ccc02ffb154cd5ee7ade21740dc4a45f4261
DIFF: https://github.com/llvm/llvm-project/commit/9d81ccc02ffb154cd5ee7ade21740dc4a45f4261.diff
LOG: [WebAssembly] Enable loop unrolling
Enable partial and runtime unrolling with a threshold of 30, which
was derived from a large number of kernels running on node and
wasmtime for amd64 and aarch64.
Unrolling is enabled by default at -O2 and -O3 and is disabled at
-Oz and -Os. Compiling with -Os is recommended if the wasm binary
size is the most important factor.
Differential Revision: https://reviews.llvm.org/D95125
Added:
llvm/test/Transforms/LoopUnroll/WebAssembly/basic-unrolling.ll
llvm/test/Transforms/LoopUnroll/WebAssembly/lit.local.cfg
Modified:
llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h
Removed:
################################################################################
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
index be1cfbaef3e43..a2111afbfcbaf 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
@@ -102,3 +102,29 @@ bool WebAssemblyTTIImpl::areInlineCompatible(const Function *Caller,
return (CallerBits & CalleeBits) == CalleeBits;
}
+
+void WebAssemblyTTIImpl::getUnrollingPreferences(
+ Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP) const {
+ // Scan the loop: don't unroll loops with calls. This is a standard approach
+ // for most (all?) targets.
+ for (BasicBlock *BB : L->blocks())
+ for (Instruction &I : *BB)
+ if (isa<CallInst>(I) || isa<InvokeInst>(I))
+ if (const Function *F = cast<CallBase>(I).getCalledFunction())
+ if (isLoweredToCall(F))
+ return;
+
+ // The chosen threshold is within the range of 'LoopMicroOpBufferSize' of
+ // the various microarchitectures that use the BasicTTI implementation and
+ // has been selected through heuristics across multiple cores and runtimes.
+ UP.Partial = UP.Runtime = UP.UpperBound = true;
+ UP.PartialThreshold = 30;
+
+ // Avoid unrolling when optimizing for size.
+ UP.OptSizeThreshold = 0;
+ UP.PartialOptSizeThreshold = 0;
+
+ // Set number of instructions optimized when "back edge"
+ // becomes "fall through" to default value of 2.
+ UP.BEInsns = 2;
+}
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h
index 41e358c159b48..3515a3e149d1e 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h
@@ -48,6 +48,9 @@ class WebAssemblyTTIImpl final : public BasicTTIImplBase<WebAssemblyTTIImpl> {
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) const;
+ void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
+ TTI::UnrollingPreferences &UP) const;
+
/// @}
/// \name Vector TTI Implementations
diff --git a/llvm/test/Transforms/LoopUnroll/WebAssembly/basic-unrolling.ll b/llvm/test/Transforms/LoopUnroll/WebAssembly/basic-unrolling.ll
new file mode 100644
index 0000000000000..8a02181734dba
--- /dev/null
+++ b/llvm/test/Transforms/LoopUnroll/WebAssembly/basic-unrolling.ll
@@ -0,0 +1,303 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -mtriple=wasm32-unknown-unknown -loop-unroll -simplifycfg -instcombine -S %s -o - | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
+
+define hidden void @compile_time_full(i8* nocapture %a, i8* nocapture readonly %b) {
+; CHECK-LABEL: @compile_time_full(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[I:%.*]] = load i8, i8* [[B:%.*]], align 1
+; CHECK-NEXT: store i8 [[I]], i8* [[A:%.*]], align 1
+; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i8, i8* [[B]], i32 1
+; CHECK-NEXT: [[I_1:%.*]] = load i8, i8* [[ARRAYIDX_1]], align 1
+; CHECK-NEXT: [[ARRAYIDX1_1:%.*]] = getelementptr inbounds i8, i8* [[A]], i32 1
+; CHECK-NEXT: store i8 [[I_1]], i8* [[ARRAYIDX1_1]], align 1
+; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i8, i8* [[B]], i32 2
+; CHECK-NEXT: [[I_2:%.*]] = load i8, i8* [[ARRAYIDX_2]], align 1
+; CHECK-NEXT: [[ARRAYIDX1_2:%.*]] = getelementptr inbounds i8, i8* [[A]], i32 2
+; CHECK-NEXT: store i8 [[I_2]], i8* [[ARRAYIDX1_2]], align 1
+; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i8, i8* [[B]], i32 3
+; CHECK-NEXT: [[I_3:%.*]] = load i8, i8* [[ARRAYIDX_3]], align 1
+; CHECK-NEXT: [[ARRAYIDX1_3:%.*]] = getelementptr inbounds i8, i8* [[A]], i32 3
+; CHECK-NEXT: store i8 [[I_3]], i8* [[ARRAYIDX1_3]], align 1
+; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i8, i8* [[B]], i32 4
+; CHECK-NEXT: [[I_4:%.*]] = load i8, i8* [[ARRAYIDX_4]], align 1
+; CHECK-NEXT: [[ARRAYIDX1_4:%.*]] = getelementptr inbounds i8, i8* [[A]], i32 4
+; CHECK-NEXT: store i8 [[I_4]], i8* [[ARRAYIDX1_4]], align 1
+; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds i8, i8* [[B]], i32 5
+; CHECK-NEXT: [[I_5:%.*]] = load i8, i8* [[ARRAYIDX_5]], align 1
+; CHECK-NEXT: [[ARRAYIDX1_5:%.*]] = getelementptr inbounds i8, i8* [[A]], i32 5
+; CHECK-NEXT: store i8 [[I_5]], i8* [[ARRAYIDX1_5]], align 1
+; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds i8, i8* [[B]], i32 6
+; CHECK-NEXT: [[I_6:%.*]] = load i8, i8* [[ARRAYIDX_6]], align 1
+; CHECK-NEXT: [[ARRAYIDX1_6:%.*]] = getelementptr inbounds i8, i8* [[A]], i32 6
+; CHECK-NEXT: store i8 [[I_6]], i8* [[ARRAYIDX1_6]], align 1
+; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i8, i8* [[B]], i32 7
+; CHECK-NEXT: [[I_7:%.*]] = load i8, i8* [[ARRAYIDX_7]], align 1
+; CHECK-NEXT: [[ARRAYIDX1_7:%.*]] = getelementptr inbounds i8, i8* [[A]], i32 7
+; CHECK-NEXT: store i8 [[I_7]], i8* [[ARRAYIDX1_7]], align 1
+; CHECK-NEXT: [[ARRAYIDX_8:%.*]] = getelementptr inbounds i8, i8* [[B]], i32 8
+; CHECK-NEXT: [[I_8:%.*]] = load i8, i8* [[ARRAYIDX_8]], align 1
+; CHECK-NEXT: [[ARRAYIDX1_8:%.*]] = getelementptr inbounds i8, i8* [[A]], i32 8
+; CHECK-NEXT: store i8 [[I_8]], i8* [[ARRAYIDX1_8]], align 1
+; CHECK-NEXT: [[ARRAYIDX_9:%.*]] = getelementptr inbounds i8, i8* [[B]], i32 9
+; CHECK-NEXT: [[I_9:%.*]] = load i8, i8* [[ARRAYIDX_9]], align 1
+; CHECK-NEXT: [[ARRAYIDX1_9:%.*]] = getelementptr inbounds i8, i8* [[A]], i32 9
+; CHECK-NEXT: store i8 [[I_9]], i8* [[ARRAYIDX1_9]], align 1
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %for.body
+
+for.cond.cleanup: ; preds = %for.body
+ ret void
+
+for.body: ; preds = %for.body, %entry
+ %i.06 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+ %arrayidx = getelementptr inbounds i8, i8* %b, i32 %i.06
+ %i = load i8, i8* %arrayidx, align 1
+ %arrayidx1 = getelementptr inbounds i8, i8* %a, i32 %i.06
+ store i8 %i, i8* %arrayidx1, align 1
+ %inc = add nuw nsw i32 %i.06, 1
+ %exitcond.not = icmp eq i32 %inc, 10
+ br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+define hidden void @compile_time_partial(i16* nocapture %a, i16* nocapture readonly %b) {
+; CHECK-LABEL: @compile_time_partial(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.cond.cleanup:
+; CHECK-NEXT: ret void
+; CHECK: for.body:
+; CHECK-NEXT: [[I_07:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_3:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[B:%.*]], i32 [[I_07]]
+; CHECK-NEXT: [[I:%.*]] = load i16, i16* [[ARRAYIDX]], align 2
+; CHECK-NEXT: [[ADD:%.*]] = add i16 [[I]], 1
+; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[A:%.*]], i32 [[I_07]]
+; CHECK-NEXT: store i16 [[ADD]], i16* [[ARRAYIDX2]], align 2
+; CHECK-NEXT: [[INC:%.*]] = or i32 [[I_07]], 1
+; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i16, i16* [[B]], i32 [[INC]]
+; CHECK-NEXT: [[I_1:%.*]] = load i16, i16* [[ARRAYIDX_1]], align 2
+; CHECK-NEXT: [[ADD_1:%.*]] = add i16 [[I_1]], 1
+; CHECK-NEXT: [[ARRAYIDX2_1:%.*]] = getelementptr inbounds i16, i16* [[A]], i32 [[INC]]
+; CHECK-NEXT: store i16 [[ADD_1]], i16* [[ARRAYIDX2_1]], align 2
+; CHECK-NEXT: [[INC_1:%.*]] = or i32 [[I_07]], 2
+; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i16, i16* [[B]], i32 [[INC_1]]
+; CHECK-NEXT: [[I_2:%.*]] = load i16, i16* [[ARRAYIDX_2]], align 2
+; CHECK-NEXT: [[ADD_2:%.*]] = add i16 [[I_2]], 1
+; CHECK-NEXT: [[ARRAYIDX2_2:%.*]] = getelementptr inbounds i16, i16* [[A]], i32 [[INC_1]]
+; CHECK-NEXT: store i16 [[ADD_2]], i16* [[ARRAYIDX2_2]], align 2
+; CHECK-NEXT: [[INC_2:%.*]] = or i32 [[I_07]], 3
+; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i16, i16* [[B]], i32 [[INC_2]]
+; CHECK-NEXT: [[I_3:%.*]] = load i16, i16* [[ARRAYIDX_3]], align 2
+; CHECK-NEXT: [[ADD_3:%.*]] = add i16 [[I_3]], 1
+; CHECK-NEXT: [[ARRAYIDX2_3:%.*]] = getelementptr inbounds i16, i16* [[A]], i32 [[INC_2]]
+; CHECK-NEXT: store i16 [[ADD_3]], i16* [[ARRAYIDX2_3]], align 2
+; CHECK-NEXT: [[INC_3]] = add nuw nsw i32 [[I_07]], 4
+; CHECK-NEXT: [[EXITCOND_NOT_3:%.*]] = icmp eq i32 [[INC_3]], 1000
+; CHECK-NEXT: br i1 [[EXITCOND_NOT_3]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
+;
+entry:
+ br label %for.body
+
+for.cond.cleanup: ; preds = %for.body
+ ret void
+
+for.body: ; preds = %for.body, %entry
+ %i.07 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+ %arrayidx = getelementptr inbounds i16, i16* %b, i32 %i.07
+ %i = load i16, i16* %arrayidx, align 2
+ %add = add i16 %i, 1
+ %arrayidx2 = getelementptr inbounds i16, i16* %a, i32 %i.07
+ store i16 %add, i16* %arrayidx2, align 2
+ %inc = add nuw nsw i32 %i.07, 1
+ %exitcond.not = icmp eq i32 %inc, 1000
+ br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+define hidden void @runtime(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) {
+; CHECK-LABEL: @runtime(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CMP8_NOT:%.*]] = icmp eq i32 [[N:%.*]], 0
+; CHECK-NEXT: br i1 [[CMP8_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]]
+; CHECK: for.body.preheader:
+; CHECK-NEXT: [[XTRAITER:%.*]] = and i32 [[N]], 1
+; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[N]], 1
+; CHECK-NEXT: br i1 [[TMP0]], label [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA:%.*]], label [[FOR_BODY_PREHEADER_NEW:%.*]]
+; CHECK: for.body.preheader.new:
+; CHECK-NEXT: [[UNROLL_ITER:%.*]] = and i32 [[N]], -2
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.cond.cleanup.loopexit.unr-lcssa:
+; CHECK-NEXT: [[I_09_UNR:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INC_1:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[LCMP_MOD_NOT:%.*]] = icmp eq i32 [[XTRAITER]], 0
+; CHECK-NEXT: br i1 [[LCMP_MOD_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY_EPIL:%.*]]
+; CHECK: for.body.epil:
+; CHECK-NEXT: [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i32 [[I_09_UNR]]
+; CHECK-NEXT: [[I_EPIL:%.*]] = load i32, i32* [[ARRAYIDX_EPIL]], align 4
+; CHECK-NEXT: [[ARRAYIDX1_EPIL:%.*]] = getelementptr inbounds i32, i32* [[C:%.*]], i32 [[I_09_UNR]]
+; CHECK-NEXT: [[I1_EPIL:%.*]] = load i32, i32* [[ARRAYIDX1_EPIL]], align 4
+; CHECK-NEXT: [[MUL_EPIL:%.*]] = mul nsw i32 [[I1_EPIL]], [[I_EPIL]]
+; CHECK-NEXT: [[ARRAYIDX2_EPIL:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[I_09_UNR]]
+; CHECK-NEXT: store i32 [[MUL_EPIL]], i32* [[ARRAYIDX2_EPIL]], align 4
+; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]
+; CHECK: for.cond.cleanup:
+; CHECK-NEXT: ret void
+; CHECK: for.body:
+; CHECK-NEXT: [[I_09:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER_NEW]] ], [ [[INC_1]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[NITER:%.*]] = phi i32 [ [[UNROLL_ITER]], [[FOR_BODY_PREHEADER_NEW]] ], [ [[NITER_NSUB_1:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 [[I_09]]
+; CHECK-NEXT: [[I:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[C]], i32 [[I_09]]
+; CHECK-NEXT: [[I1:%.*]] = load i32, i32* [[ARRAYIDX1]], align 4
+; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[I1]], [[I]]
+; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[I_09]]
+; CHECK-NEXT: store i32 [[MUL]], i32* [[ARRAYIDX2]], align 4
+; CHECK-NEXT: [[INC:%.*]] = or i32 [[I_09]], 1
+; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 [[INC]]
+; CHECK-NEXT: [[I_1:%.*]] = load i32, i32* [[ARRAYIDX_1]], align 4
+; CHECK-NEXT: [[ARRAYIDX1_1:%.*]] = getelementptr inbounds i32, i32* [[C]], i32 [[INC]]
+; CHECK-NEXT: [[I1_1:%.*]] = load i32, i32* [[ARRAYIDX1_1]], align 4
+; CHECK-NEXT: [[MUL_1:%.*]] = mul nsw i32 [[I1_1]], [[I_1]]
+; CHECK-NEXT: [[ARRAYIDX2_1:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[INC]]
+; CHECK-NEXT: store i32 [[MUL_1]], i32* [[ARRAYIDX2_1]], align 4
+; CHECK-NEXT: [[INC_1]] = add nuw i32 [[I_09]], 2
+; CHECK-NEXT: [[NITER_NSUB_1]] = add i32 [[NITER]], -2
+; CHECK-NEXT: [[NITER_NCMP_1:%.*]] = icmp eq i32 [[NITER_NSUB_1]], 0
+; CHECK-NEXT: br i1 [[NITER_NCMP_1]], label [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA]], label [[FOR_BODY]]
+;
+entry:
+ %cmp8.not = icmp eq i32 %N, 0
+ br i1 %cmp8.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup: ; preds = %for.body, %entry
+ ret void
+
+for.body: ; preds = %for.body, %entry
+ %i.09 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i32, i32* %b, i32 %i.09
+ %i = load i32, i32* %arrayidx, align 4
+ %arrayidx1 = getelementptr inbounds i32, i32* %c, i32 %i.09
+ %i1 = load i32, i32* %arrayidx1, align 4
+ %mul = mul nsw i32 %i1, %i
+ %arrayidx2 = getelementptr inbounds i32, i32* %a, i32 %i.09
+ store i32 %mul, i32* %arrayidx2, align 4
+ %inc = add nuw i32 %i.09, 1
+ %exitcond.not = icmp eq i32 %inc, %N
+ br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+define hidden void @dont_unroll_call(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) {
+; CHECK-LABEL: @dont_unroll_call(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CMP12_NOT:%.*]] = icmp eq i32 [[N:%.*]], 0
+; CHECK-NEXT: br i1 [[CMP12_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY:%.*]]
+; CHECK: for.cond.cleanup:
+; CHECK-NEXT: ret void
+; CHECK: for.body:
+; CHECK-NEXT: [[I_013:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i32 [[I_013]]
+; CHECK-NEXT: [[I:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[C:%.*]], i32 [[I_013]]
+; CHECK-NEXT: [[I1:%.*]] = load i32, i32* [[ARRAYIDX1]], align 4
+; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[I1]], [[I]]
+; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[I_013]]
+; CHECK-NEXT: store i32 [[MUL]], i32* [[ARRAYIDX2]], align 4
+; CHECK-NEXT: call void (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i32 0, i32 0), i32 [[I_013]], i32 [[MUL]])
+; CHECK-NEXT: [[INC]] = add nuw i32 [[I_013]], 1
+; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[N]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]]
+;
+entry:
+ %cmp12.not = icmp eq i32 %N, 0
+ br i1 %cmp12.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup: ; preds = %for.body, %entry
+ ret void
+
+for.body: ; preds = %for.body, %entry
+ %i.013 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i32, i32* %b, i32 %i.013
+ %i = load i32, i32* %arrayidx, align 4
+ %arrayidx1 = getelementptr inbounds i32, i32* %c, i32 %i.013
+ %i1 = load i32, i32* %arrayidx1, align 4
+ %mul = mul nsw i32 %i1, %i
+ %arrayidx2 = getelementptr inbounds i32, i32* %a, i32 %i.013
+ store i32 %mul, i32* %arrayidx2, align 4
+ call void (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i32 0, i32 0), i32 %i.013, i32 %mul)
+ %inc = add nuw i32 %i.013, 1
+ %exitcond.not = icmp eq i32 %inc, %N
+ br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+define hidden void @dont_unroll_optsize(i8* nocapture %a, i8* nocapture readonly %b) #0 {
+; CHECK-LABEL: @dont_unroll_optsize(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.cond.cleanup:
+; CHECK-NEXT: ret void
+; CHECK: for.body:
+; CHECK-NEXT: [[I_06:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8* [[B:%.*]], i32 [[I_06]]
+; CHECK-NEXT: [[I:%.*]] = load i8, i8* [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, i8* [[A:%.*]], i32 [[I_06]]
+; CHECK-NEXT: store i8 [[I]], i8* [[ARRAYIDX1]], align 1
+; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_06]], 1
+; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], 10
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
+;
+entry:
+ br label %for.body
+
+for.cond.cleanup: ; preds = %for.body
+ ret void
+
+for.body: ; preds = %for.body, %entry
+ %i.06 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+ %arrayidx = getelementptr inbounds i8, i8* %b, i32 %i.06
+ %i = load i8, i8* %arrayidx, align 1
+ %arrayidx1 = getelementptr inbounds i8, i8* %a, i32 %i.06
+ store i8 %i, i8* %arrayidx1, align 1
+ %inc = add nuw nsw i32 %i.06, 1
+ %exitcond.not = icmp eq i32 %inc, 10
+ br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+define hidden void @dont_unroll_minsize(i8* nocapture %a, i8* nocapture readonly %b) #1 {
+; CHECK-LABEL: @dont_unroll_minsize(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.cond.cleanup:
+; CHECK-NEXT: ret void
+; CHECK: for.body:
+; CHECK-NEXT: [[I_06:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8* [[B:%.*]], i32 [[I_06]]
+; CHECK-NEXT: [[I:%.*]] = load i8, i8* [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, i8* [[A:%.*]], i32 [[I_06]]
+; CHECK-NEXT: store i8 [[I]], i8* [[ARRAYIDX1]], align 1
+; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_06]], 1
+; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], 10
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
+;
+entry:
+ br label %for.body
+
+for.cond.cleanup: ; preds = %for.body
+ ret void
+
+for.body: ; preds = %for.body, %entry
+ %i.06 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+ %arrayidx = getelementptr inbounds i8, i8* %b, i32 %i.06
+ %i = load i8, i8* %arrayidx, align 1
+ %arrayidx1 = getelementptr inbounds i8, i8* %a, i32 %i.06
+ store i8 %i, i8* %arrayidx1, align 1
+ %inc = add nuw nsw i32 %i.06, 1
+ %exitcond.not = icmp eq i32 %inc, 10
+ br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+attributes #0 = { optsize }
+attributes #1 = { minsize }
+
+ at .str = private unnamed_addr constant [12 x i8] c"a[%d] = %d\0A\00", align 1
+declare void @printf(i8* nocapture readonly, ...)
diff --git a/llvm/test/Transforms/LoopUnroll/WebAssembly/lit.local.cfg b/llvm/test/Transforms/LoopUnroll/WebAssembly/lit.local.cfg
new file mode 100644
index 0000000000000..743473517cd0f
--- /dev/null
+++ b/llvm/test/Transforms/LoopUnroll/WebAssembly/lit.local.cfg
@@ -0,0 +1,2 @@
+if not 'WebAssembly' in config.root.targets:
+ config.unsupported = True
More information about the llvm-commits
mailing list