[llvm] b242e85 - [AArch64][NFC] Prepare test cases (for D128302) to show more accurate cost estimation of extract-element could generate better assembly code.
Mingming Liu via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 7 09:39:52 PDT 2022
Author: Mingming Liu
Date: 2022-07-07T09:39:29-07:00
New Revision: b242e8502cbc47357e9bc70bb38138a7cdc0f81f
URL: https://github.com/llvm/llvm-project/commit/b242e8502cbc47357e9bc70bb38138a7cdc0f81f
DIFF: https://github.com/llvm/llvm-project/commit/b242e8502cbc47357e9bc70bb38138a7cdc0f81f.diff
LOG: [AArch64][NFC] Prepare test cases (for D128302) to show more accurate cost estimation of extract-element could generate better assembly code.
Pre-commit the test cases (for D128302) to show that more accurate cost
estimation of extract-element could generate better code.
Differential Revision: https://reviews.llvm.org/D128945
Added:
llvm/test/Transforms/LICM/AArch64/extract-element.ll
Modified:
llvm/test/Analysis/CostModel/AArch64/kryo.ll
Removed:
################################################################################
diff --git a/llvm/test/Analysis/CostModel/AArch64/kryo.ll b/llvm/test/Analysis/CostModel/AArch64/kryo.ll
index 0be76f724b40f..ef4ade31c4654 100644
--- a/llvm/test/Analysis/CostModel/AArch64/kryo.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/kryo.ll
@@ -24,3 +24,27 @@ define void @vectorInstrCost() {
ret void
}
+
+; CHECK-LABEL: vectorInstrExtractCost
+define i64 @vectorInstrExtractCost(<4 x i64> %vecreg) {
+
+ ; Vector extracts - extracting each element at index 0 is considered
+ ; free in the current implementation. When extracting element at index
+ ; 2, 2 is rounded to 0, so extracting element at index 2 has cost 0 as
+ ; well.
+ ;
+ ; CHECK: cost of 2 {{.*}} extractelement <4 x i64> %vecreg, i32 1
+ ; CHECK: cost of 0 {{.*}} extractelement <4 x i64> %vecreg, i32 2
+ %t1 = extractelement <4 x i64> %vecreg, i32 1
+ %t2 = extractelement <4 x i64> %vecreg, i32 2
+ %ele = add i64 %t2, 1
+ %cond = icmp eq i64 %t1, %ele
+
+ ; CHECK: cost of 0 {{.*}} extractelement <4 x i64> %vecreg, i32 0
+ ; CHECK: cost of 2 {{.*}} extractelement <4 x i64> %vecreg, i32 3
+ %t0 = extractelement <4 x i64> %vecreg, i32 0
+ %t3 = extractelement <4 x i64> %vecreg, i32 3
+ %val = select i1 %cond, i64 %t0 , i64 %t3
+
+ ret i64 %val
+}
diff --git a/llvm/test/Transforms/LICM/AArch64/extract-element.ll b/llvm/test/Transforms/LICM/AArch64/extract-element.ll
new file mode 100644
index 0000000000000..b156b81d6708d
--- /dev/null
+++ b/llvm/test/Transforms/LICM/AArch64/extract-element.ll
@@ -0,0 +1,67 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -licm -mtriple aarch64-linux-gnu -S < %s | FileCheck %s
+
+define i1 @func(ptr %0, i64 %1) {
+; CHECK-LABEL: @func(
+; CHECK-NEXT: br label [[TMP3:%.*]]
+; CHECK: 3:
+; CHECK-NEXT: [[TMP4:%.*]] = phi i64 [ 0, [[TMP2:%.*]] ], [ [[TMP12:%.*]], [[TMP11:%.*]] ]
+; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i64 [[TMP4]], [[TMP1:%.*]]
+; CHECK-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[DOTSPLIT_LOOP_EXIT2:%.*]]
+; CHECK: 6:
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds <1 x i64>, ptr [[TMP0:%.*]], i64 [[TMP4]]
+; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr [[TMP7]], align 8
+; CHECK-NEXT: [[TMP9:%.*]] = extractelement <1 x i64> [[TMP8]], i64 0
+; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[TMP9]], -1
+; CHECK-NEXT: br i1 [[TMP10]], label [[TMP11]], label [[DOTSPLIT_LOOP_EXIT:%.*]]
+; CHECK: 11:
+; CHECK-NEXT: [[TMP12]] = add i64 [[TMP4]], 1
+; CHECK-NEXT: br label [[TMP3]]
+; CHECK: .split.loop.exit:
+; CHECK-NEXT: [[DOTLCSSA7:%.*]] = phi <1 x i64> [ [[TMP8]], [[TMP6]] ]
+; CHECK-NEXT: [[DOTLCSSA6:%.*]] = phi i64 [ [[TMP4]], [[TMP6]] ]
+; CHECK-NEXT: [[DOTPH:%.*]] = phi i1 [ [[TMP5]], [[TMP6]] ]
+; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[DOTLCSSA7]], i64 0
+; CHECK-NEXT: [[TMP14:%.*]] = xor i64 [[TMP13]], -1
+; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[TMP14]], [[DOTLCSSA6]]
+; CHECK-NEXT: [[TMP16:%.*]] = icmp uge i64 [[TMP15]], [[TMP1]]
+; CHECK-NEXT: br label [[TMP17:%.*]]
+; CHECK: .split.loop.exit2:
+; CHECK-NEXT: [[DOTPH3:%.*]] = phi i1 [ [[TMP5]], [[TMP3]] ]
+; CHECK-NEXT: [[DOTPH4:%.*]] = phi i1 [ undef, [[TMP3]] ]
+; CHECK-NEXT: br label [[TMP17]]
+; CHECK: 17:
+; CHECK-NEXT: [[TMP18:%.*]] = phi i1 [ [[DOTPH]], [[DOTSPLIT_LOOP_EXIT]] ], [ [[DOTPH3]], [[DOTSPLIT_LOOP_EXIT2]] ]
+; CHECK-NEXT: [[TMP19:%.*]] = phi i1 [ [[TMP16]], [[DOTSPLIT_LOOP_EXIT]] ], [ [[DOTPH4]], [[DOTSPLIT_LOOP_EXIT2]] ]
+; CHECK-NEXT: [[TMP20:%.*]] = xor i1 [[TMP18]], true
+; CHECK-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], i1 true, i1 [[TMP19]]
+; CHECK-NEXT: ret i1 [[TMP21]]
+;
+ br label %3
+
+3: ; preds = %14, %2
+ %4 = phi i64 [ 0, %2 ], [ %15, %14 ]
+ %5 = icmp ult i64 %4, %1
+ br i1 %5, label %6, label %16
+
+6: ; preds = %3
+ %7 = getelementptr inbounds <1 x i64>, ptr %0, i64 %4
+ %8 = load <1 x i64>, ptr %7, align 8
+ %9 = extractelement <1 x i64> %8, i64 0
+ %10 = icmp eq i64 %9, -1
+ %11 = xor i64 %9, -1
+ %12 = add i64 %11, %4
+ %13 = icmp uge i64 %12, %1
+ br i1 %10, label %14, label %16
+
+14: ; preds = %6
+ %15 = add i64 %4, 1
+ br label %3
+
+16: ; preds = %3, %6
+ %17 = phi i1 [ %5, %3 ], [ %5, %6 ]
+ %18 = phi i1 [ %13, %6 ], [ undef, %3 ]
+ %19 = xor i1 %17, true
+ %20 = select i1 %19, i1 true, i1 %18
+ ret i1 %20
+}
More information about the llvm-commits
mailing list