[llvm] b242e85 - [AArch64][NFC] Prepare test cases (for D128302) to show more accurate cost estimation of extract-element could generate better assembly code.

Mingming Liu via llvm-commits llvm-commits at lists.llvm.org
Thu Jul 7 09:39:52 PDT 2022


Author: Mingming Liu
Date: 2022-07-07T09:39:29-07:00
New Revision: b242e8502cbc47357e9bc70bb38138a7cdc0f81f

URL: https://github.com/llvm/llvm-project/commit/b242e8502cbc47357e9bc70bb38138a7cdc0f81f
DIFF: https://github.com/llvm/llvm-project/commit/b242e8502cbc47357e9bc70bb38138a7cdc0f81f.diff

LOG: [AArch64][NFC] Prepare test cases (for D128302) to show more accurate cost estimation of extract-element could generate better assembly code.

Pre-commit the test cases (for D128302) to show that more accurate cost
estimation of extract-element could generate better code.

Differential Revision: https://reviews.llvm.org/D128945

Added: 
    llvm/test/Transforms/LICM/AArch64/extract-element.ll

Modified: 
    llvm/test/Analysis/CostModel/AArch64/kryo.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/Analysis/CostModel/AArch64/kryo.ll b/llvm/test/Analysis/CostModel/AArch64/kryo.ll
index 0be76f724b40f..ef4ade31c4654 100644
--- a/llvm/test/Analysis/CostModel/AArch64/kryo.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/kryo.ll
@@ -24,3 +24,27 @@ define void @vectorInstrCost() {
 
     ret void
 }
+
+; CHECK-LABEL: vectorInstrExtractCost
+define i64 @vectorInstrExtractCost(<4 x i64> %vecreg) {
+    
+    ; Vector extracts - extracting each element at index 0 is considered
+    ; free in the current implementation. When extracting element at index
+    ; 2, 2 is rounded to 0, so extracting element at index 2 has cost 0 as 
+    ; well.
+    ;
+    ; CHECK: cost of 2 {{.*}} extractelement <4 x i64> %vecreg, i32 1
+    ; CHECK: cost of 0 {{.*}} extractelement <4 x i64> %vecreg, i32 2
+    %t1 = extractelement <4 x i64> %vecreg, i32 1
+    %t2 = extractelement <4 x i64> %vecreg, i32 2
+    %ele = add i64 %t2, 1
+    %cond = icmp eq i64 %t1, %ele
+
+    ; CHECK: cost of 0 {{.*}} extractelement <4 x i64> %vecreg, i32 0
+    ; CHECK: cost of 2 {{.*}} extractelement <4 x i64> %vecreg, i32 3
+    %t0 = extractelement <4 x i64> %vecreg, i32 0
+    %t3 = extractelement <4 x i64> %vecreg, i32 3
+    %val = select i1 %cond, i64 %t0 , i64 %t3
+
+    ret i64 %val
+}

diff  --git a/llvm/test/Transforms/LICM/AArch64/extract-element.ll b/llvm/test/Transforms/LICM/AArch64/extract-element.ll
new file mode 100644
index 0000000000000..b156b81d6708d
--- /dev/null
+++ b/llvm/test/Transforms/LICM/AArch64/extract-element.ll
@@ -0,0 +1,67 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -licm -mtriple aarch64-linux-gnu -S < %s | FileCheck %s
+
+define i1 @func(ptr %0, i64 %1) {
+; CHECK-LABEL: @func(
+; CHECK-NEXT:    br label [[TMP3:%.*]]
+; CHECK:       3:
+; CHECK-NEXT:    [[TMP4:%.*]] = phi i64 [ 0, [[TMP2:%.*]] ], [ [[TMP12:%.*]], [[TMP11:%.*]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp ult i64 [[TMP4]], [[TMP1:%.*]]
+; CHECK-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[DOTSPLIT_LOOP_EXIT2:%.*]]
+; CHECK:       6:
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds <1 x i64>, ptr [[TMP0:%.*]], i64 [[TMP4]]
+; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr [[TMP7]], align 8
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <1 x i64> [[TMP8]], i64 0
+; CHECK-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[TMP9]], -1
+; CHECK-NEXT:    br i1 [[TMP10]], label [[TMP11]], label [[DOTSPLIT_LOOP_EXIT:%.*]]
+; CHECK:       11:
+; CHECK-NEXT:    [[TMP12]] = add i64 [[TMP4]], 1
+; CHECK-NEXT:    br label [[TMP3]]
+; CHECK:       .split.loop.exit:
+; CHECK-NEXT:    [[DOTLCSSA7:%.*]] = phi <1 x i64> [ [[TMP8]], [[TMP6]] ]
+; CHECK-NEXT:    [[DOTLCSSA6:%.*]] = phi i64 [ [[TMP4]], [[TMP6]] ]
+; CHECK-NEXT:    [[DOTPH:%.*]] = phi i1 [ [[TMP5]], [[TMP6]] ]
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[DOTLCSSA7]], i64 0
+; CHECK-NEXT:    [[TMP14:%.*]] = xor i64 [[TMP13]], -1
+; CHECK-NEXT:    [[TMP15:%.*]] = add i64 [[TMP14]], [[DOTLCSSA6]]
+; CHECK-NEXT:    [[TMP16:%.*]] = icmp uge i64 [[TMP15]], [[TMP1]]
+; CHECK-NEXT:    br label [[TMP17:%.*]]
+; CHECK:       .split.loop.exit2:
+; CHECK-NEXT:    [[DOTPH3:%.*]] = phi i1 [ [[TMP5]], [[TMP3]] ]
+; CHECK-NEXT:    [[DOTPH4:%.*]] = phi i1 [ undef, [[TMP3]] ]
+; CHECK-NEXT:    br label [[TMP17]]
+; CHECK:       17:
+; CHECK-NEXT:    [[TMP18:%.*]] = phi i1 [ [[DOTPH]], [[DOTSPLIT_LOOP_EXIT]] ], [ [[DOTPH3]], [[DOTSPLIT_LOOP_EXIT2]] ]
+; CHECK-NEXT:    [[TMP19:%.*]] = phi i1 [ [[TMP16]], [[DOTSPLIT_LOOP_EXIT]] ], [ [[DOTPH4]], [[DOTSPLIT_LOOP_EXIT2]] ]
+; CHECK-NEXT:    [[TMP20:%.*]] = xor i1 [[TMP18]], true
+; CHECK-NEXT:    [[TMP21:%.*]] = select i1 [[TMP20]], i1 true, i1 [[TMP19]]
+; CHECK-NEXT:    ret i1 [[TMP21]]
+;
+  br label %3
+
+3:                                                ; preds = %14, %2
+  %4 = phi i64 [ 0, %2 ], [ %15, %14 ]
+  %5 = icmp ult i64 %4, %1
+  br i1 %5, label %6, label %16
+
+6:                                                ; preds = %3
+  %7 = getelementptr inbounds <1 x i64>, ptr %0, i64 %4
+  %8 = load <1 x i64>, ptr %7, align 8
+  %9 = extractelement <1 x i64> %8, i64 0
+  %10 = icmp eq i64 %9, -1
+  %11 = xor i64 %9, -1
+  %12 = add i64 %11, %4
+  %13 = icmp uge i64 %12, %1
+  br i1 %10, label %14, label %16
+
+14:                                               ; preds = %6
+  %15 = add i64 %4, 1
+  br label %3
+
+16:                                               ; preds = %3, %6
+  %17 = phi i1 [ %5, %3 ], [ %5, %6 ]
+  %18 = phi i1 [ %13, %6 ], [ undef, %3 ]
+  %19 = xor i1 %17, true
+  %20 = select i1 %19, i1 true, i1 %18
+  ret i1 %20
+}


        


More information about the llvm-commits mailing list