[llvm] a64b2e9 - [NFC][SCEV][LoopUnroll] Add tests where treating `or` as `add` raises expansion cost

Mon Dec 12 09:42:08 PST 2022

Author: Roman Lebedev
Date: 2022-12-12T20:41:56+03:00
New Revision: a64b2e9e3e5422acbde549f3e6f458a6be25db76

URL: https://github.com/llvm/llvm-project/commit/a64b2e9e3e5422acbde549f3e6f458a6be25db76
DIFF: https://github.com/llvm/llvm-project/commit/a64b2e9e3e5422acbde549f3e6f458a6be25db76.diff

LOG: [NFC][SCEV][LoopUnroll] Add tests where treating `or` as `add` raises expansion cost

>From https://reviews.llvm.org/rG46db90cc71d1#1154128

Added: 
    llvm/test/Transforms/LoopUnroll/X86/high-cost-expansion.ll

Modified: 
    llvm/test/Analysis/ScalarEvolution/add-like-or.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/Analysis/ScalarEvolution/add-like-or.ll b/llvm/test/Analysis/ScalarEvolution/add-like-or.ll
index a1cfec808437..e144755d8fba 100644

--- a/llvm/test/Analysis/ScalarEvolution/add-like-or.ll
+++ b/llvm/test/Analysis/ScalarEvolution/add-like-or.ll
@@ -14,3 +14,44 @@ define i8 @or-of-constant-with-no-common-bits-set(i8 %x, i8 %y) {
   %r = or i8 %t0, 3
   ret i8 %r
 }
+
+define void @mask-high(i64 %arg, ptr dereferenceable(4) %arg1) {
+; CHECK-LABEL: 'mask-high'
+; CHECK-NEXT:  Classifying expressions for: @mask-high
+; CHECK-NEXT:    %i = load i32, ptr %arg1, align 4
+; CHECK-NEXT:    --> %i U: full-set S: full-set
+; CHECK-NEXT:    %i2 = sext i32 %i to i64
+; CHECK-NEXT:    --> (sext i32 %i to i64) U: [-2147483648,2147483648) S: [-2147483648,2147483648)
+; CHECK-NEXT:    %i3 = and i64 %arg, -16
+; CHECK-NEXT:    --> (16 * (%arg /u 16))<nuw> U: [0,-15) S: [-9223372036854775808,9223372036854775793)
+; CHECK-NEXT:    %i4 = or i64 1, %i3
+; CHECK-NEXT:    --> (1 + (16 * (%arg /u 16))<nuw>)<nuw><nsw> U: [1,-14) S: [-9223372036854775807,9223372036854775794)
+; CHECK-NEXT:    %i7 = phi i64 [ %i4, %bb ], [ %i8, %bb6 ]
+; CHECK-NEXT:    --> {(1 + (16 * (%arg /u 16))<nuw>)<nuw><nsw>,+,1}<%bb6> U: full-set S: full-set Exits: ((sext i32 %i to i64) smax (1 + (16 * (%arg /u 16))<nuw>)<nuw><nsw>) LoopDispositions: { %bb6: Computable }
+; CHECK-NEXT:    %i8 = add i64 %i7, 1
+; CHECK-NEXT:    --> {(2 + (16 * (%arg /u 16))<nuw>)<nuw><nsw>,+,1}<%bb6> U: full-set S: full-set Exits: (1 + ((sext i32 %i to i64) smax (1 + (16 * (%arg /u 16))<nuw>)<nuw><nsw>))<nsw> LoopDispositions: { %bb6: Computable }
+; CHECK-NEXT:  Determining loop execution counts for: @mask-high
+; CHECK-NEXT:  Loop %bb6: backedge-taken count is (-1 + (-16 * (%arg /u 16)) + ((sext i32 %i to i64) smax (1 + (16 * (%arg /u 16))<nuw>)<nuw><nsw>))
+; CHECK-NEXT:  Loop %bb6: constant max backedge-taken count is -9223372034707292162
+; CHECK-NEXT:  Loop %bb6: symbolic max backedge-taken count is (-1 + (-16 * (%arg /u 16)) + ((sext i32 %i to i64) smax (1 + (16 * (%arg /u 16))<nuw>)<nuw><nsw>))
+; CHECK-NEXT:  Loop %bb6: Predicated backedge-taken count is (-1 + (-16 * (%arg /u 16)) + ((sext i32 %i to i64) smax (1 + (16 * (%arg /u 16))<nuw>)<nuw><nsw>))
+; CHECK-NEXT:   Predicates:
+; CHECK:       Loop %bb6: Trip multiple is 1
+;
+bb:
+  %i = load i32, ptr %arg1, align 4
+  %i2 = sext i32 %i to i64
+  %i3 = and i64 %arg, -16
+  %i4 = or i64 1, %i3
+  %i5 = icmp sgt i64 %i4, %i2
+  br i1 %i5, label %bb10, label %bb6
+
+bb6:                                              ; preds = %bb6, %bb
+  %i7 = phi i64 [ %i4, %bb ], [ %i8, %bb6 ]
+  %i8 = add i64 %i7, 1
+  %i9 = icmp slt i64 %i7, %i2
+  br i1 %i9, label %bb6, label %bb10
+
+bb10:                                             ; preds = %bb6, %bb
+  ret void
+}

diff  --git a/llvm/test/Transforms/LoopUnroll/X86/high-cost-expansion.ll b/llvm/test/Transforms/LoopUnroll/X86/high-cost-expansion.ll
new file mode 100644
index 000000000000..158aa8774205
--- /dev/null
+++ b/llvm/test/Transforms/LoopUnroll/X86/high-cost-expansion.ll
@@ -0,0 +1,81 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -mtriple=x86_64-unknown-linux-gnu -passes='loop-unroll' -unroll-runtime -S < %s 2>&1 | FileCheck %s
+
+define void @mask-high(i64 %arg, ptr dereferenceable(4) %arg1) {
+; CHECK-LABEL: define {{[^@]+}}@mask-high(
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[I:%.*]] = load i32, ptr [[ARG1:%.*]], align 4
+; CHECK-NEXT:    [[I2:%.*]] = sext i32 [[I]] to i64
+; CHECK-NEXT:    [[I3:%.*]] = and i64 [[ARG:%.*]], -16
+; CHECK-NEXT:    [[I4:%.*]] = or i64 1, [[I3]]
+; CHECK-NEXT:    [[I5:%.*]] = icmp sgt i64 [[I4]], [[I2]]
+; CHECK-NEXT:    br i1 [[I5]], label [[BB10:%.*]], label [[BB6_PREHEADER:%.*]]
+; CHECK:       bb6.preheader:
+; CHECK-NEXT:    br label [[BB6:%.*]]
+; CHECK:       bb6:
+; CHECK-NEXT:    [[I7:%.*]] = phi i64 [ [[I8:%.*]], [[BB6]] ], [ [[I4]], [[BB6_PREHEADER]] ]
+; CHECK-NEXT:    [[I8]] = add i64 [[I7]], 1
+; CHECK-NEXT:    [[I9:%.*]] = icmp slt i64 [[I7]], [[I2]]
+; CHECK-NEXT:    br i1 [[I9]], label [[BB6]], label [[BB10_LOOPEXIT:%.*]]
+; CHECK:       bb10.loopexit:
+; CHECK-NEXT:    br label [[BB10]]
+; CHECK:       bb10:
+; CHECK-NEXT:    ret void
+;
+bb:
+  %i = load i32, ptr %arg1, align 4
+  %i2 = sext i32 %i to i64
+  %i3 = and i64 %arg, -16
+  %i4 = or i64 1, %i3
+  %i5 = icmp sgt i64 %i4, %i2
+  br i1 %i5, label %bb10, label %bb6
+
+bb6:                                              ; preds = %bb6, %bb
+  %i7 = phi i64 [ %i4, %bb ], [ %i8, %bb6 ]
+  %i8 = add i64 %i7, 1
+  %i9 = icmp slt i64 %i7, %i2
+  br i1 %i9, label %bb6, label %bb10
+
+bb10:                                             ; preds = %bb6, %bb
+  ret void
+}
+
+
+define void @mask-low(i64 %arg, ptr dereferenceable(4) %arg1) {
+; CHECK-LABEL: define {{[^@]+}}@mask-low(
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[I:%.*]] = load i32, ptr [[ARG1:%.*]], align 4
+; CHECK-NEXT:    [[I2:%.*]] = sext i32 [[I]] to i64
+; CHECK-NEXT:    [[I3:%.*]] = and i64 [[ARG:%.*]], 16
+; CHECK-NEXT:    [[I4:%.*]] = add i64 1, [[I3]]
+; CHECK-NEXT:    [[I5:%.*]] = icmp sgt i64 [[I4]], [[I2]]
+; CHECK-NEXT:    br i1 [[I5]], label [[BB10:%.*]], label [[BB6_PREHEADER:%.*]]
+; CHECK:       bb6.preheader:
+; CHECK-NEXT:    br label [[BB6:%.*]]
+; CHECK:       bb6:
+; CHECK-NEXT:    [[I7:%.*]] = phi i64 [ [[I8:%.*]], [[BB6]] ], [ [[I4]], [[BB6_PREHEADER]] ]
+; CHECK-NEXT:    [[I8]] = add i64 [[I7]], 1
+; CHECK-NEXT:    [[I9:%.*]] = icmp slt i64 [[I7]], [[I2]]
+; CHECK-NEXT:    br i1 [[I9]], label [[BB6]], label [[BB10_LOOPEXIT:%.*]]
+; CHECK:       bb10.loopexit:
+; CHECK-NEXT:    br label [[BB10]]
+; CHECK:       bb10:
+; CHECK-NEXT:    ret void
+;
+bb:
+  %i = load i32, ptr %arg1, align 4
+  %i2 = sext i32 %i to i64
+  %i3 = and i64 %arg, 16
+  %i4 = add i64 1, %i3
+  %i5 = icmp sgt i64 %i4, %i2
+  br i1 %i5, label %bb10, label %bb6
+
+bb6:                                              ; preds = %bb6, %bb
+  %i7 = phi i64 [ %i4, %bb ], [ %i8, %bb6 ]
+  %i8 = add i64 %i7, 1
+  %i9 = icmp slt i64 %i7, %i2
+  br i1 %i9, label %bb6, label %bb10
+
+bb10:                                             ; preds = %bb6, %bb
+  ret void
+}