[llvm] 4cbedae - [LoopVectorize][X86] Regenerate slm-no-vectorize.ll

Tue Jun 13 06:23:54 PDT 2023

Author: Simon Pilgrim
Date: 2023-06-13T14:15:37+01:00
New Revision: 4cbedaeff54b8b8e967765333e5720d3760d30a2

URL: https://github.com/llvm/llvm-project/commit/4cbedaeff54b8b8e967765333e5720d3760d30a2
DIFF: https://github.com/llvm/llvm-project/commit/4cbedaeff54b8b8e967765333e5720d3760d30a2.diff

LOG: [LoopVectorize][X86] Regenerate slm-no-vectorize.ll

Added: 
    

Modified: 
    llvm/test/Transforms/LoopVectorize/X86/slm-no-vectorize.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/Transforms/LoopVectorize/X86/slm-no-vectorize.ll b/llvm/test/Transforms/LoopVectorize/X86/slm-no-vectorize.ll
index 1b8e075fb4e4e..71697ba727dd5 100644

--- a/llvm/test/Transforms/LoopVectorize/X86/slm-no-vectorize.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/slm-no-vectorize.ll
@@ -1,16 +1,53 @@
-; RUN: opt < %s -passes=loop-vectorize -mtriple=x86_64-unknown-linux -S -mcpu=slm -debug 2>&1 | FileCheck -check-prefix=MSG %s
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
+; RUN: opt < %s -passes=loop-vectorize -mtriple=x86_64-unknown-linux -S -mcpu=slm | FileCheck %s
 ; REQUIRES: asserts
+
 ; This test should not be vectorized in X86\SLM arch
 ; Vectorizing the 64bit multiply in this case is wrong since
 ; it can be done with a lower bit mode (notice that the sources is 16bit)
 ; Also addq\subq (quad word) has a high cost on SLM arch.
 ; this test has a bad performance (regression of -70%) if vectorized on SLM arch
+
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
 define i32 @no_vec(i32 %LastIndex, ptr nocapture readonly %InputData, i16 signext %lag, i16 signext %Scale) {
+; CHECK-LABEL: define i32 @no_vec
+; CHECK-SAME: (i32 [[LASTINDEX:%.*]], ptr nocapture readonly [[INPUTDATA:%.*]], i16 signext [[LAG:%.*]], i16 signext [[SCALE:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP17:%.*]] = icmp sgt i32 [[LASTINDEX]], 0
+; CHECK-NEXT:    br i1 [[CMP17]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_COND_CLEANUP:%.*]]
+; CHECK:       for.body.lr.ph:
+; CHECK-NEXT:    [[CONV5:%.*]] = sext i16 [[SCALE]] to i64
+; CHECK-NEXT:    [[SH_PROM:%.*]] = and i64 [[CONV5]], 4294967295
+; CHECK-NEXT:    [[TMP0:%.*]] = sext i16 [[LAG]] to i64
+; CHECK-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[LASTINDEX]] to i64
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.cond.cleanup.loopexit:
+; CHECK-NEXT:    [[ADD7_LCSSA:%.*]] = phi i64 [ [[ADD7:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[CONV8:%.*]] = trunc i64 [[ADD7_LCSSA]] to i32
+; CHECK-NEXT:    br label [[FOR_COND_CLEANUP]]
+; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    [[ACCUMULATOR_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[CONV8]], [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] ]
+; CHECK-NEXT:    ret i32 [[ACCUMULATOR_0_LCSSA]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ACCUMULATOR_018:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[ADD7]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[INPUTDATA]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP1:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
+; CHECK-NEXT:    [[CONV:%.*]] = sext i16 [[TMP1]] to i64
+; CHECK-NEXT:    [[TMP2:%.*]] = add nsw i64 [[INDVARS_IV]], [[TMP0]]
+; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i16, ptr [[INPUTDATA]], i64 [[TMP2]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr [[ARRAYIDX3]], align 2
+; CHECK-NEXT:    [[CONV4:%.*]] = sext i16 [[TMP3]] to i64
+; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i64 [[CONV4]], [[CONV]]
+; CHECK-NEXT:    [[SHR:%.*]] = ashr i64 [[MUL]], [[SH_PROM]]
+; CHECK-NEXT:    [[ADD7]] = add i64 [[SHR]], [[ACCUMULATOR_018]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]]
+;
 entry:
-; MSG: LV: Selecting VF: 1.
   %cmp17 = icmp sgt i32 %LastIndex, 0
   br i1 %cmp17, label %for.body.lr.ph, label %for.cond.cleanup