[llvm] 2daf117 - [SLP] Add some tests that require memory runtime checks.

Florian Hahn via llvm-commits llvm-commits at lists.llvm.org
Thu Jun 24 01:46:16 PDT 2021


Author: Florian Hahn
Date: 2021-06-24T09:19:28+01:00
New Revision: 2daf11749235f91bd159d62133a4b2adb09dedcb

URL: https://github.com/llvm/llvm-project/commit/2daf11749235f91bd159d62133a4b2adb09dedcb
DIFF: https://github.com/llvm/llvm-project/commit/2daf11749235f91bd159d62133a4b2adb09dedcb.diff

LOG: [SLP] Add some tests that require memory runtime checks.

Added: 
    llvm/test/Transforms/SLPVectorizer/AArch64/memory-runtime-checks.ll
    llvm/test/Transforms/SLPVectorizer/X86/memory-runtime-checks.ll

Modified: 
    

Removed: 
    


################################################################################
diff  --git a/llvm/test/Transforms/SLPVectorizer/AArch64/memory-runtime-checks.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/memory-runtime-checks.ll
new file mode 100644
index 0000000000000..3ecb0f20cbbd4
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/memory-runtime-checks.ll
@@ -0,0 +1,354 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -scoped-noalias-aa -slp-vectorizer -mtriple=arm64-apple-darwin -enable-new-pm=false -S %s | FileCheck %s
+
+define void @needs_versioning_not_profitable(i32* %dst, i32* %src) {
+; CHECK-LABEL: @needs_versioning_not_profitable(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[SRC_0:%.*]] = load i32, i32* [[SRC:%.*]], align 4
+; CHECK-NEXT:    [[R_0:%.*]] = ashr i32 [[SRC_0]], 16
+; CHECK-NEXT:    store i32 [[R_0]], i32* [[DST:%.*]], align 4
+; CHECK-NEXT:    [[SRC_GEP_1:%.*]] = getelementptr inbounds i32, i32* [[SRC]], i64 1
+; CHECK-NEXT:    [[SRC_1:%.*]] = load i32, i32* [[SRC_GEP_1]], align 4
+; CHECK-NEXT:    [[R_1:%.*]] = ashr i32 [[SRC_1]], 16
+; CHECK-NEXT:    [[DST_GEP_1:%.*]] = getelementptr inbounds i32, i32* [[DST]], i64 1
+; CHECK-NEXT:    store i32 [[R_1]], i32* [[DST_GEP_1]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %src.0 = load i32, i32* %src, align 4
+  %r.0 = ashr i32 %src.0, 16
+  store i32 %r.0, i32* %dst, align 4
+  %src.gep.1 = getelementptr inbounds i32, i32* %src, i64 1
+  %src.1 = load i32, i32* %src.gep.1, align 4
+  %r.1 = ashr i32 %src.1, 16
+  %dst.gep.1 = getelementptr inbounds i32, i32* %dst, i64 1
+  store i32 %r.1, i32* %dst.gep.1, align 4
+  ret void
+}
+
+define void @needs_versioning_profitable(i32* %dst, i32* %src) {
+; CHECK-LABEL: @needs_versioning_profitable(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[SRC_0:%.*]] = load i32, i32* [[SRC:%.*]], align 4
+; CHECK-NEXT:    [[R_0:%.*]] = ashr i32 [[SRC_0]], 16
+; CHECK-NEXT:    store i32 [[R_0]], i32* [[DST:%.*]], align 4
+; CHECK-NEXT:    [[SRC_GEP_1:%.*]] = getelementptr inbounds i32, i32* [[SRC]], i64 1
+; CHECK-NEXT:    [[SRC_1:%.*]] = load i32, i32* [[SRC_GEP_1]], align 4
+; CHECK-NEXT:    [[R_1:%.*]] = ashr i32 [[SRC_1]], 16
+; CHECK-NEXT:    [[DST_GEP_1:%.*]] = getelementptr inbounds i32, i32* [[DST]], i64 1
+; CHECK-NEXT:    store i32 [[R_1]], i32* [[DST_GEP_1]], align 4
+; CHECK-NEXT:    [[SRC_GEP_2:%.*]] = getelementptr inbounds i32, i32* [[SRC]], i64 2
+; CHECK-NEXT:    [[SRC_2:%.*]] = load i32, i32* [[SRC_GEP_2]], align 4
+; CHECK-NEXT:    [[R_2:%.*]] = ashr i32 [[SRC_2]], 16
+; CHECK-NEXT:    [[DST_GEP_2:%.*]] = getelementptr inbounds i32, i32* [[DST]], i64 2
+; CHECK-NEXT:    store i32 [[R_2]], i32* [[DST_GEP_2]], align 4
+; CHECK-NEXT:    [[SRC_GEP_3:%.*]] = getelementptr inbounds i32, i32* [[SRC]], i64 3
+; CHECK-NEXT:    [[SRC_3:%.*]] = load i32, i32* [[SRC_GEP_3]], align 4
+; CHECK-NEXT:    [[R_3:%.*]] = ashr i32 [[SRC_3]], 16
+; CHECK-NEXT:    [[DST_GEP_3:%.*]] = getelementptr inbounds i32, i32* [[DST]], i64 3
+; CHECK-NEXT:    store i32 [[R_3]], i32* [[DST_GEP_3]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %src.0 = load i32, i32* %src, align 4
+  %r.0 = ashr i32 %src.0, 16
+  store i32 %r.0, i32* %dst, align 4
+  %src.gep.1 = getelementptr inbounds i32, i32* %src, i64 1
+  %src.1 = load i32, i32* %src.gep.1, align 4
+  %r.1 = ashr i32 %src.1, 16
+  %dst.gep.1 = getelementptr inbounds i32, i32* %dst, i64 1
+  store i32 %r.1, i32* %dst.gep.1, align 4
+  %src.gep.2 = getelementptr inbounds i32, i32* %src, i64 2
+  %src.2 = load i32, i32* %src.gep.2, align 4
+  %r.2 = ashr i32 %src.2, 16
+  %dst.gep.2 = getelementptr inbounds i32, i32* %dst, i64 2
+  store i32 %r.2, i32* %dst.gep.2, align 4
+  %src.gep.3 = getelementptr inbounds i32, i32* %src, i64 3
+  %src.3 = load i32, i32* %src.gep.3, align 4
+  %r.3 = ashr i32 %src.3, 16
+  %dst.gep.3 = getelementptr inbounds i32, i32* %dst, i64 3
+  store i32 %r.3, i32* %dst.gep.3, align 4
+
+  ret void
+}
+
+
+define void @no_version(i32* nocapture %dst, i32* nocapture readonly %src) {
+; CHECK-LABEL: @no_version(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[SRC_GEP_1:%.*]] = getelementptr inbounds i32, i32* [[SRC:%.*]], i64 1
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[SRC]] to <2 x i32>*
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = ashr <2 x i32> [[TMP1]], <i32 16, i32 16>
+; CHECK-NEXT:    [[DST_GEP_1:%.*]] = getelementptr inbounds i32, i32* [[DST:%.*]], i64 1
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[DST]] to <2 x i32>*
+; CHECK-NEXT:    store <2 x i32> [[TMP2]], <2 x i32>* [[TMP3]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %src.0 = load i32, i32* %src, align 4
+  %src.gep.1 = getelementptr inbounds i32, i32* %src, i64 1
+  %src.1 = load i32, i32* %src.gep.1, align 4
+  %r.0 = ashr i32 %src.0, 16
+  %r.1 = ashr i32 %src.1, 16
+  %dst.gep.1 = getelementptr inbounds i32, i32* %dst, i64 1
+  store i32 %r.0, i32* %dst, align 4
+  store i32 %r.1, i32* %dst.gep.1, align 4
+  ret void
+}
+
+define void @version_multiple(i32* nocapture %out_block, i32* nocapture readonly %counter) {
+; CHECK-LABEL: @version_multiple(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[COUNTER:%.*]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[OUT_BLOCK:%.*]], align 4
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[TMP1]], [[TMP0]]
+; CHECK-NEXT:    store i32 [[XOR]], i32* [[OUT_BLOCK]], align 4
+; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, i32* [[COUNTER]], i64 1
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX_1]], align 4
+; CHECK-NEXT:    [[ARRAYIDX2_1:%.*]] = getelementptr inbounds i32, i32* [[OUT_BLOCK]], i64 1
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX2_1]], align 4
+; CHECK-NEXT:    [[XOR_1:%.*]] = xor i32 [[TMP3]], [[TMP2]]
+; CHECK-NEXT:    store i32 [[XOR_1]], i32* [[ARRAYIDX2_1]], align 4
+; CHECK-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, i32* [[COUNTER]], i64 2
+; CHECK-NEXT:    [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX_2]], align 4
+; CHECK-NEXT:    [[ARRAYIDX2_2:%.*]] = getelementptr inbounds i32, i32* [[OUT_BLOCK]], i64 2
+; CHECK-NEXT:    [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX2_2]], align 4
+; CHECK-NEXT:    [[XOR_2:%.*]] = xor i32 [[TMP5]], [[TMP4]]
+; CHECK-NEXT:    store i32 [[XOR_2]], i32* [[ARRAYIDX2_2]], align 4
+; CHECK-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, i32* [[COUNTER]], i64 3
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32* [[ARRAYIDX_3]], align 4
+; CHECK-NEXT:    [[ARRAYIDX2_3:%.*]] = getelementptr inbounds i32, i32* [[OUT_BLOCK]], i64 3
+; CHECK-NEXT:    [[TMP7:%.*]] = load i32, i32* [[ARRAYIDX2_3]], align 4
+; CHECK-NEXT:    [[XOR_3:%.*]] = xor i32 [[TMP7]], [[TMP6]]
+; CHECK-NEXT:    store i32 [[XOR_3]], i32* [[ARRAYIDX2_3]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %0 = load i32, i32* %counter, align 4
+  %1 = load i32, i32* %out_block, align 4
+  %xor = xor i32 %1, %0
+  store i32 %xor, i32* %out_block, align 4
+  %arrayidx.1 = getelementptr inbounds i32, i32* %counter, i64 1
+  %2 = load i32, i32* %arrayidx.1, align 4
+  %arrayidx2.1 = getelementptr inbounds i32, i32* %out_block, i64 1
+  %3 = load i32, i32* %arrayidx2.1, align 4
+  %xor.1 = xor i32 %3, %2
+  store i32 %xor.1, i32* %arrayidx2.1, align 4
+  %arrayidx.2 = getelementptr inbounds i32, i32* %counter, i64 2
+  %4 = load i32, i32* %arrayidx.2, align 4
+  %arrayidx2.2 = getelementptr inbounds i32, i32* %out_block, i64 2
+  %5 = load i32, i32* %arrayidx2.2, align 4
+  %xor.2 = xor i32 %5, %4
+  store i32 %xor.2, i32* %arrayidx2.2, align 4
+  %arrayidx.3 = getelementptr inbounds i32, i32* %counter, i64 3
+  %6 = load i32, i32* %arrayidx.3, align 4
+  %arrayidx2.3 = getelementptr inbounds i32, i32* %out_block, i64 3
+  %7 = load i32, i32* %arrayidx2.3, align 4
+  %xor.3 = xor i32 %7, %6
+  store i32 %xor.3, i32* %arrayidx2.3, align 4
+  ret void
+}
+
+define i32 @use_outside_version_bb(i32* %dst, i32* %src, i1 %c.1) {
+; CHECK-LABEL: @use_outside_version_bb(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[SRC_0:%.*]] = load i32, i32* [[SRC:%.*]], align 4
+; CHECK-NEXT:    [[R_0:%.*]] = ashr i32 [[SRC_0]], 16
+; CHECK-NEXT:    store i32 [[R_0]], i32* [[DST:%.*]], align 4
+; CHECK-NEXT:    [[SRC_GEP_1:%.*]] = getelementptr inbounds i32, i32* [[SRC]], i64 1
+; CHECK-NEXT:    [[SRC_1:%.*]] = load i32, i32* [[SRC_GEP_1]], align 4
+; CHECK-NEXT:    [[R_1:%.*]] = ashr i32 [[SRC_1]], 16
+; CHECK-NEXT:    [[DST_GEP_1:%.*]] = getelementptr inbounds i32, i32* [[DST]], i64 1
+; CHECK-NEXT:    store i32 [[R_1]], i32* [[DST_GEP_1]], align 4
+; CHECK-NEXT:    br label [[EXIT:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret i32 [[R_0]]
+;
+entry:
+  %src.0 = load i32, i32* %src, align 4
+  %r.0 = ashr i32 %src.0, 16
+  store i32 %r.0, i32* %dst, align 4
+  %src.gep.1 = getelementptr inbounds i32, i32* %src, i64 1
+  %src.1 = load i32, i32* %src.gep.1, align 4
+  %r.1 = ashr i32 %src.1, 16
+  %dst.gep.1 = getelementptr inbounds i32, i32* %dst, i64 1
+  store i32 %r.1, i32* %dst.gep.1, align 4
+  br label %exit
+
+exit:
+  ret i32 %r.0
+}
+
+define i32 @value_used_in_return(i32* %dst, i32* %src, i32 %x) {
+; CHECK-LABEL: @value_used_in_return(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[SRC_0:%.*]] = load i32, i32* [[SRC:%.*]], align 4
+; CHECK-NEXT:    [[R_0:%.*]] = ashr i32 [[SRC_0]], 16
+; CHECK-NEXT:    store i32 [[R_0]], i32* [[DST:%.*]], align 4
+; CHECK-NEXT:    [[SRC_GEP_1:%.*]] = getelementptr inbounds i32, i32* [[SRC]], i64 1
+; CHECK-NEXT:    [[SRC_1:%.*]] = load i32, i32* [[SRC_GEP_1]], align 4
+; CHECK-NEXT:    [[R_1:%.*]] = ashr i32 [[SRC_1]], 16
+; CHECK-NEXT:    [[DST_GEP_1:%.*]] = getelementptr inbounds i32, i32* [[DST]], i64 1
+; CHECK-NEXT:    store i32 [[R_1]], i32* [[DST_GEP_1]], align 4
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[X:%.*]], 20
+; CHECK-NEXT:    ret i32 [[ADD]]
+;
+entry:
+  %src.0 = load i32, i32* %src, align 4
+  %r.0 = ashr i32 %src.0, 16
+  store i32 %r.0, i32* %dst, align 4
+  %src.gep.1 = getelementptr inbounds i32, i32* %src, i64 1
+  %src.1 = load i32, i32* %src.gep.1, align 4
+  %r.1 = ashr i32 %src.1, 16
+  %dst.gep.1 = getelementptr inbounds i32, i32* %dst, i64 1
+  store i32 %r.1, i32* %dst.gep.1, align 4
+  %add = add i32 %x, 20
+  ret i32 %add
+}
+define i32 @needs_versioning2_cond_br(i32* %dst, i32* %src, i1 %c.1) {
+; CHECK-LABEL: @needs_versioning2_cond_br(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[C_1:%.*]], label [[THEN:%.*]], label [[ELSE:%.*]]
+; CHECK:       then:
+; CHECK-NEXT:    [[SRC_0:%.*]] = load i32, i32* [[SRC:%.*]], align 4
+; CHECK-NEXT:    [[R_0:%.*]] = ashr i32 [[SRC_0]], 16
+; CHECK-NEXT:    store i32 [[R_0]], i32* [[DST:%.*]], align 4
+; CHECK-NEXT:    [[SRC_GEP_1:%.*]] = getelementptr inbounds i32, i32* [[SRC]], i64 1
+; CHECK-NEXT:    [[SRC_1:%.*]] = load i32, i32* [[SRC_GEP_1]], align 4
+; CHECK-NEXT:    [[R_1:%.*]] = ashr i32 [[SRC_1]], 16
+; CHECK-NEXT:    [[DST_GEP_1:%.*]] = getelementptr inbounds i32, i32* [[DST]], i64 1
+; CHECK-NEXT:    store i32 [[R_1]], i32* [[DST_GEP_1]], align 4
+; CHECK-NEXT:    ret i32 10
+; CHECK:       else:
+; CHECK-NEXT:    ret i32 0
+;
+entry:
+  br i1 %c.1, label %then, label %else
+
+then:
+  %src.0 = load i32, i32* %src, align 4
+  %r.0 = ashr i32 %src.0, 16
+  store i32 %r.0, i32* %dst, align 4
+  %src.gep.1 = getelementptr inbounds i32, i32* %src, i64 1
+  %src.1 = load i32, i32* %src.gep.1, align 4
+  %r.1 = ashr i32 %src.1, 16
+  %dst.gep.1 = getelementptr inbounds i32, i32* %dst, i64 1
+  store i32 %r.1, i32* %dst.gep.1, align 4
+  ret i32 10
+
+
+else:
+  ret i32 0
+}
+
+define void @pointer_defined_in_bb(i32* %dst, i32** %src.p) {
+; CHECK-LABEL: @pointer_defined_in_bb(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[SRC:%.*]] = load i32*, i32** [[SRC_P:%.*]], align 8
+; CHECK-NEXT:    [[SRC_0:%.*]] = load i32, i32* [[SRC]], align 4
+; CHECK-NEXT:    [[R_0:%.*]] = ashr i32 [[SRC_0]], 16
+; CHECK-NEXT:    store i32 [[R_0]], i32* [[DST:%.*]], align 4
+; CHECK-NEXT:    [[SRC_GEP_1:%.*]] = getelementptr inbounds i32, i32* [[SRC]], i64 1
+; CHECK-NEXT:    [[SRC_1:%.*]] = load i32, i32* [[SRC_GEP_1]], align 4
+; CHECK-NEXT:    [[R_1:%.*]] = ashr i32 [[SRC_1]], 16
+; CHECK-NEXT:    [[DST_GEP_1:%.*]] = getelementptr inbounds i32, i32* [[DST]], i64 1
+; CHECK-NEXT:    store i32 [[R_1]], i32* [[DST_GEP_1]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %src = load i32*, i32** %src.p
+  %src.0 = load i32, i32* %src, align 4
+  %r.0 = ashr i32 %src.0, 16
+  store i32 %r.0, i32* %dst, align 4
+  %src.gep.1 = getelementptr inbounds i32, i32* %src, i64 1
+  %src.1 = load i32, i32* %src.gep.1, align 4
+  %r.1 = ashr i32 %src.1, 16
+  %dst.gep.1 = getelementptr inbounds i32, i32* %dst, i64 1
+  store i32 %r.1, i32* %dst.gep.1, align 4
+  ret void
+}
+
+define void @clobber_same_underlying_object(i32* %this) {
+; CHECK-LABEL: @clobber_same_underlying_object(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[P_3:%.*]] = getelementptr inbounds i32, i32* [[THIS:%.*]], i32 3
+; CHECK-NEXT:    store i32 10, i32* [[P_3]], align 8
+; CHECK-NEXT:    tail call void @clobber()
+; CHECK-NEXT:    [[P_4:%.*]] = getelementptr inbounds i32, i32* [[THIS]], i32 4
+; CHECK-NEXT:    [[L2:%.*]] = load i32, i32* [[P_4]], align 8
+; CHECK-NEXT:    store i32 20, i32* [[P_4]], align 8
+; CHECK-NEXT:    ret void
+;
+entry:
+  %p.3 = getelementptr inbounds i32, i32* %this, i32 3
+  store i32 10, i32* %p.3, align 8
+  tail call void @clobber()
+  %p.4 = getelementptr inbounds i32, i32* %this, i32 4
+  %l2 = load i32, i32* %p.4, align 8
+  store i32 20, i32* %p.4, align 8
+  ret void
+}
+
+declare void @clobber()
+
+define void @slp_not_beneficial(i32* %A, i32* %B) {
+; CHECK-LABEL: @slp_not_beneficial(
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[TMP:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 4
+; CHECK-NEXT:    store i32 0, i32* [[TMP]], align 8
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 5
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i32 4
+; CHECK-NEXT:    [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 8
+; CHECK-NEXT:    store i32 [[TMP5]], i32* [[TMP3]], align 8
+; CHECK-NEXT:    ret void
+;
+bb:
+  %tmp = getelementptr inbounds i32, i32* %A, i32 4
+  store i32 0, i32* %tmp, align 8
+  %tmp3 = getelementptr inbounds i32, i32* %A, i32 5
+  %tmp4 = getelementptr inbounds i32, i32* %B, i32 4
+  %tmp5 = load i32, i32* %tmp4, align 8
+  store i32 %tmp5, i32* %tmp3, align 8
+  ret void
+}
+
+define void @widget(double* %ptr, double* %ptr.2) {
+; CHECK-LABEL: @widget(
+; CHECK-NEXT:  bb1:
+; CHECK-NEXT:    [[TMP3:%.*]] = load double, double* null, align 8
+; CHECK-NEXT:    [[TMP4:%.*]] = fmul double undef, [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds double, double* [[PTR:%.*]], i32 0
+; CHECK-NEXT:    [[TMP6:%.*]] = load double, double* [[TMP5]], align 8
+; CHECK-NEXT:    [[TMP7:%.*]] = fadd double [[TMP6]], [[TMP4]]
+; CHECK-NEXT:    store double [[TMP7]], double* [[TMP5]], align 8
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds double, double* [[PTR_2:%.*]], i64 0
+; CHECK-NEXT:    [[TMP9:%.*]] = load double, double* [[TMP8]], align 8
+; CHECK-NEXT:    [[TMP10:%.*]] = fmul double undef, [[TMP9]]
+; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds double, double* [[PTR]], i32 1
+; CHECK-NEXT:    [[TMP12:%.*]] = load double, double* [[TMP11]], align 8
+; CHECK-NEXT:    [[TMP13:%.*]] = fadd double [[TMP12]], [[TMP10]]
+; CHECK-NEXT:    store double [[TMP13]], double* [[TMP11]], align 8
+; CHECK-NEXT:    br label [[BB15:%.*]]
+; CHECK:       bb15:
+; CHECK-NEXT:    br label [[BB15]]
+;
+bb1:                                              ; preds = %bb
+  %tmp3 = load double, double* null, align 8
+  %tmp4 = fmul double undef, %tmp3
+  %tmp5 = getelementptr inbounds double, double* %ptr, i32 0
+  %tmp6 = load double, double* %tmp5, align 8
+  %tmp7 = fadd double %tmp6, %tmp4
+  store double %tmp7, double* %tmp5, align 8
+  %tmp8 = getelementptr inbounds double, double* %ptr.2, i64 0
+  %tmp9 = load double, double* %tmp8, align 8
+  %tmp10 = fmul double undef, %tmp9
+  %tmp11 = getelementptr inbounds double, double* %ptr, i32 1
+  %tmp12 = load double, double* %tmp11, align 8
+  %tmp13 = fadd double %tmp12, %tmp10
+  store double %tmp13, double* %tmp11, align 8
+  br label %bb15
+
+bb15:                                             ; preds = %bb15, %bb14
+  br label %bb15
+}

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/memory-runtime-checks.ll b/llvm/test/Transforms/SLPVectorizer/X86/memory-runtime-checks.ll
new file mode 100644
index 0000000000000..d28a4df96127a
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/memory-runtime-checks.ll
@@ -0,0 +1,188 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -scoped-noalias-aa -slp-vectorizer -mtriple=x86_64-apple-darwin -enable-new-pm=false -S %s | FileCheck %s
+
+define void @version_multiple(i32* nocapture %out_block, i32* nocapture readonly %counter) {
+; CHECK-LABEL: @version_multiple(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[COUNTER:%.*]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[OUT_BLOCK:%.*]], align 4
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[TMP1]], [[TMP0]]
+; CHECK-NEXT:    store i32 [[XOR]], i32* [[OUT_BLOCK]], align 4
+; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, i32* [[COUNTER]], i64 1
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX_1]], align 4
+; CHECK-NEXT:    [[ARRAYIDX2_1:%.*]] = getelementptr inbounds i32, i32* [[OUT_BLOCK]], i64 1
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX2_1]], align 4
+; CHECK-NEXT:    [[XOR_1:%.*]] = xor i32 [[TMP3]], [[TMP2]]
+; CHECK-NEXT:    store i32 [[XOR_1]], i32* [[ARRAYIDX2_1]], align 4
+; CHECK-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, i32* [[COUNTER]], i64 2
+; CHECK-NEXT:    [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX_2]], align 4
+; CHECK-NEXT:    [[ARRAYIDX2_2:%.*]] = getelementptr inbounds i32, i32* [[OUT_BLOCK]], i64 2
+; CHECK-NEXT:    [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX2_2]], align 4
+; CHECK-NEXT:    [[XOR_2:%.*]] = xor i32 [[TMP5]], [[TMP4]]
+; CHECK-NEXT:    store i32 [[XOR_2]], i32* [[ARRAYIDX2_2]], align 4
+; CHECK-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, i32* [[COUNTER]], i64 3
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32* [[ARRAYIDX_3]], align 4
+; CHECK-NEXT:    [[ARRAYIDX2_3:%.*]] = getelementptr inbounds i32, i32* [[OUT_BLOCK]], i64 3
+; CHECK-NEXT:    [[TMP7:%.*]] = load i32, i32* [[ARRAYIDX2_3]], align 4
+; CHECK-NEXT:    [[XOR_3:%.*]] = xor i32 [[TMP7]], [[TMP6]]
+; CHECK-NEXT:    store i32 [[XOR_3]], i32* [[ARRAYIDX2_3]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %0 = load i32, i32* %counter, align 4
+  %1 = load i32, i32* %out_block, align 4
+  %xor = xor i32 %1, %0
+  store i32 %xor, i32* %out_block, align 4
+  %arrayidx.1 = getelementptr inbounds i32, i32* %counter, i64 1
+  %2 = load i32, i32* %arrayidx.1, align 4
+  %arrayidx2.1 = getelementptr inbounds i32, i32* %out_block, i64 1
+  %3 = load i32, i32* %arrayidx2.1, align 4
+  %xor.1 = xor i32 %3, %2
+  store i32 %xor.1, i32* %arrayidx2.1, align 4
+  %arrayidx.2 = getelementptr inbounds i32, i32* %counter, i64 2
+  %4 = load i32, i32* %arrayidx.2, align 4
+  %arrayidx2.2 = getelementptr inbounds i32, i32* %out_block, i64 2
+  %5 = load i32, i32* %arrayidx2.2, align 4
+  %xor.2 = xor i32 %5, %4
+  store i32 %xor.2, i32* %arrayidx2.2, align 4
+  %arrayidx.3 = getelementptr inbounds i32, i32* %counter, i64 3
+  %6 = load i32, i32* %arrayidx.3, align 4
+  %arrayidx2.3 = getelementptr inbounds i32, i32* %out_block, i64 3
+  %7 = load i32, i32* %arrayidx2.3, align 4
+  %xor.3 = xor i32 %7, %6
+  store i32 %xor.3, i32* %arrayidx2.3, align 4
+  ret void
+}
+
+declare void @use(<8 x float>)
+define void @delete_pointer_bound(float* %a, float* %b, i1 %c) #0 {
+; CHECK-LABEL: @delete_pointer_bound(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x float*> poison, float* [[B:%.*]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x float*> [[TMP0]], float* [[B]], i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr float, <2 x float*> [[TMP1]], <2 x i64> <i64 10, i64 14>
+; CHECK-NEXT:    br i1 [[C:%.*]], label [[THEN:%.*]], label [[ELSE:%.*]]
+; CHECK:       else:
+; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> [[TMP2]], i32 4, <2 x i1> <i1 true, i1 true>, <2 x float> undef)
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> poison, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x float> [[SHUFFLE]], <4 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 undef, i32 undef, i32 3, i32 undef, i32 undef>
+; CHECK-NEXT:    [[I71:%.*]] = shufflevector <8 x float> undef, <8 x float> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 10, i32 5, i32 6, i32 13>
+; CHECK-NEXT:    call void @use(<8 x float> [[I71]])
+; CHECK-NEXT:    ret void
+; CHECK:       then:
+; CHECK-NEXT:    [[A_8:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 8
+; CHECK-NEXT:    store float 0.000000e+00, float* [[A_8]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x float*> [[TMP2]], i32 1
+; CHECK-NEXT:    [[L6:%.*]] = load float, float* [[TMP5]], align 4
+; CHECK-NEXT:    [[A_5:%.*]] = getelementptr inbounds float, float* [[A]], i64 5
+; CHECK-NEXT:    store float [[L6]], float* [[A_5]], align 4
+; CHECK-NEXT:    [[A_6:%.*]] = getelementptr inbounds float, float* [[A]], i64 6
+; CHECK-NEXT:    store float 0.000000e+00, float* [[A_6]], align 4
+; CHECK-NEXT:    [[A_7:%.*]] = getelementptr inbounds float, float* [[A]], i64 7
+; CHECK-NEXT:    store float 0.000000e+00, float* [[A_7]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %b.10 = getelementptr inbounds float, float* %b, i64 10
+  %b.14 = getelementptr inbounds float, float* %b, i64 14
+  br i1 %c, label %then, label %else
+
+else:
+  %l0 = load float, float* %b.10, align 4
+  %l1 = load float, float* %b.14, align 4
+  %i2 = insertelement <8 x float> undef, float %l0, i32 2
+  %i3 = insertelement <8 x float> %i2, float %l0, i32 3
+  %i4 = insertelement <8 x float> %i3, float %l1, i32 4
+  %i7 = insertelement <8 x float> %i4, float %l1, i32 7
+  call void @use(<8 x float> %i7)
+  ret void
+
+then:
+  %a.8 = getelementptr inbounds float, float* %a, i64 8
+  store float 0.0, float* %a.8, align 4
+  %l6 = load float, float* %b.14, align 4
+  %a.5 = getelementptr inbounds float, float* %a, i64 5
+  store float %l6, float* %a.5, align 4
+  %a.6 = getelementptr inbounds float, float* %a, i64 6
+  store float 0.0, float* %a.6, align 4
+  %a.7 = getelementptr inbounds float, float* %a, i64 7
+  store float 0.0, float* %a.7, align 4
+  ret void
+}
+
+%struct.zot = type { i16, i16, i16, i32, float, float, float, %struct.quux*, %struct.zot*, %struct.wombat*, %struct.wombat.0 }
+%struct.quux = type { i16, %struct.quux*, %struct.quux* }
+%struct.wombat = type { i32, i16, i8, i8, %struct.eggs* }
+%struct.eggs = type { float, i8, %struct.ham }
+%struct.ham = type { [2 x double], [8 x i8] }
+%struct.wombat.0 = type { %struct.bar }
+%struct.bar = type { [3 x double], [3 x double], double, double, i16, [3 x double]*, i32, [3 x double] }
+
+define double @preserve_loop_info(%struct.zot* %arg) {
+; CHECK-LABEL: @preserve_loop_info(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP:%.*]] = alloca [3 x double], align 16
+; CHECK-NEXT:    br label [[OUTER_HEADER:%.*]]
+; CHECK:       outer.header:
+; CHECK-NEXT:    br label [[INNER:%.*]]
+; CHECK:       inner:
+; CHECK-NEXT:    br i1 undef, label [[OUTER_LATCH:%.*]], label [[INNER]]
+; CHECK:       outer.latch:
+; CHECK-NEXT:    br i1 undef, label [[BB:%.*]], label [[OUTER_HEADER]]
+; CHECK:       bb:
+; CHECK-NEXT:    [[TMP5:%.*]] = load [3 x double]*, [3 x double]** undef, align 8
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x double], [3 x double]* [[TMP]], i64 0, i64 0
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x double], [3 x double]* [[TMP]], i64 0, i64 1
+; CHECK-NEXT:    br label [[LOOP_3HEADER:%.*]]
+; CHECK:       loop.3header:
+; CHECK-NEXT:    br i1 undef, label [[LOOP_3LATCH:%.*]], label [[BB9:%.*]]
+; CHECK:       bb9:
+; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x double], [3 x double]* [[TMP5]], i64 undef, i64 1
+; CHECK-NEXT:    store double undef, double* [[TMP6]], align 16
+; CHECK-NEXT:    [[TMP12:%.*]] = load double, double* [[TMP10]], align 8
+; CHECK-NEXT:    store double [[TMP12]], double* [[TMP7]], align 8
+; CHECK-NEXT:    br label [[LOOP_3LATCH]]
+; CHECK:       loop.3latch:
+; CHECK-NEXT:    br i1 undef, label [[BB14:%.*]], label [[LOOP_3HEADER]]
+; CHECK:       bb14:
+; CHECK-NEXT:    [[TMP15:%.*]] = call double undef(double* [[TMP6]], %struct.zot* [[ARG:%.*]])
+; CHECK-NEXT:    ret double undef
+;
+entry:
+  %tmp = alloca [3 x double], align 16
+  br label %outer.header
+
+outer.header:                                              ; preds = %bb3, %bb
+  br label %inner
+
+inner:
+  br i1 undef, label %outer.latch, label %inner
+
+outer.latch:                                              ; preds = %bb16
+  br i1 undef, label %bb, label %outer.header
+
+bb:                                              ; preds = %bb3
+  %tmp5 = load [3 x double]*, [3 x double]** undef, align 8
+  %tmp6 = getelementptr inbounds [3 x double], [3 x double]* %tmp, i64 0, i64 0
+  %tmp7 = getelementptr inbounds [3 x double], [3 x double]* %tmp, i64 0, i64 1
+  br label %loop.3header
+
+loop.3header:                                              ; preds = %bb13, %bb4
+  br i1 undef, label %loop.3latch, label %bb9
+
+bb9:                                              ; preds = %bb8
+  %tmp10 = getelementptr inbounds [3 x double], [3 x double]* %tmp5, i64 undef, i64 1
+  store double undef, double* %tmp6, align 16
+  %tmp12 = load double, double* %tmp10, align 8
+  store double %tmp12, double* %tmp7, align 8
+  br label %loop.3latch
+
+loop.3latch:                                             ; preds = %bb11, %bb8
+  br i1 undef, label %bb14, label %loop.3header
+
+bb14:                                             ; preds = %bb13
+  %tmp15 = call double undef(double* %tmp6, %struct.zot* %arg)
+  ret double undef
+}
+
+attributes #0 = { "target-features"="+avx2" }


        


More information about the llvm-commits mailing list