[PATCH] D159356: [AArch64] Check FeatureAddrLSLFast to fold base address with scale

Allen zhong via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Fri Sep 1 06:20:15 PDT 2023


Allen created this revision.
Allen added reviewers: dmgreen, chill, efriedma, harviniriawan.
Herald added subscribers: hiraditya, kristof.beyls.
Herald added a project: All.
Allen requested review of this revision.
Herald added subscribers: llvm-commits, wangpc.
Herald added a project: LLVM.

We Split LSLFast into Addr and ALU parts on D157982 <https://reviews.llvm.org/D157982>, then
add the related subtarget features FeatureAddrLSLFast to
check the contant foldable in the shift of the addressing mode.

  FeatureAddrLSLFast: Shifts of 1, 2 and 3 (Scales of 2, 4 and 8) are cheap.


https://reviews.llvm.org/D159356

Files:
  llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
  llvm/test/CodeGen/AArch64/arm64-codegen-prepare-extload.ll
  llvm/test/Transforms/Inline/AArch64/ext.ll
  llvm/test/Transforms/SLPVectorizer/AArch64/ext-trunc.ll


Index: llvm/test/Transforms/SLPVectorizer/AArch64/ext-trunc.ll
===================================================================
--- llvm/test/Transforms/SLPVectorizer/AArch64/ext-trunc.ll
+++ llvm/test/Transforms/SLPVectorizer/AArch64/ext-trunc.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -S -passes=slp-vectorizer -mtriple=aarch64--linux-gnu < %s | FileCheck %s
+; RUN: opt -S -passes=slp-vectorizer -mtriple=aarch64--linux-gnu -mattr=+addr-lsl-fast < %s | FileCheck %s
 
 target datalayout = "e-m:e-i32:64-i128:128-n32:64-S128"
 
Index: llvm/test/Transforms/Inline/AArch64/ext.ll
===================================================================
--- llvm/test/Transforms/Inline/AArch64/ext.ll
+++ llvm/test/Transforms/Inline/AArch64/ext.ll
@@ -11,7 +11,7 @@
 
 ; sext can be folded into gep.
 ; CHECK: Analyzing call of inner1
-; CHECK: NumInstructionsSimplified: 3
+; CHECK: NumInstructionsSimplified: 2
 ; CHECK: NumInstructions: 4
 define i32 @inner1(ptr %ptr, i32 %i) {
   %E = sext i32 %i to i64
Index: llvm/test/CodeGen/AArch64/arm64-codegen-prepare-extload.ll
===================================================================
--- llvm/test/CodeGen/AArch64/arm64-codegen-prepare-extload.ll
+++ llvm/test/CodeGen/AArch64/arm64-codegen-prepare-extload.ll
@@ -1,6 +1,6 @@
-; RUN: opt -codegenprepare < %s -mtriple=aarch64-apple-ios -S | FileCheck -enable-var-scope %s --check-prefix=OPTALL --check-prefix=OPT --check-prefix=NONSTRESS
-; RUN: opt -codegenprepare < %s -mtriple=aarch64-apple-ios -S -stress-cgp-ext-ld-promotion | FileCheck -enable-var-scope %s --check-prefix=OPTALL --check-prefix=OPT --check-prefix=STRESS
-; RUN: opt -codegenprepare < %s -mtriple=aarch64-apple-ios -S -disable-cgp-ext-ld-promotion | FileCheck -enable-var-scope %s --check-prefix=OPTALL --check-prefix=DISABLE
+; RUN: opt -codegenprepare < %s -mtriple=aarch64-apple-ios -S -mattr=+addr-lsl-fast | FileCheck -enable-var-scope %s --check-prefix=OPTALL --check-prefix=OPT --check-prefix=NONSTRESS
+; RUN: opt -codegenprepare < %s -mtriple=aarch64-apple-ios -S -stress-cgp-ext-ld-promotion -mattr=+addr-lsl-fast | FileCheck -enable-var-scope %s --check-prefix=OPTALL --check-prefix=OPT --check-prefix=STRESS
+; RUN: opt -codegenprepare < %s -mtriple=aarch64-apple-ios -S -disable-cgp-ext-ld-promotion -mattr=+addr-lsl-fast | FileCheck -enable-var-scope %s --check-prefix=OPTALL --check-prefix=DISABLE
 
 ; CodeGenPrepare should move the zext into the block with the load
 ; so that SelectionDAG can select it with the load.
@@ -454,16 +454,16 @@
 ; The input has one free zext and one free sext. If we would have promoted
 ; all the way through the load we would end up with a free zext and a
 ; non-free sext (of %b).
-; OPTALL-LABEL: @doNotPromoteFreeSExtFromAddrMode128
+; OPTALL-LABEL: @promoteFreeSExtFromAddrMode128
 ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, ptr %p
 ;
 ; STRESS-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
 ; STRESS-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i32 %b to i64
 ; STRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ZEXT64]], [[SEXTB]]
 ;
-; NONSTRESS-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
-; NONSTRESS-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b
-; NONSTRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64
+; NONSTRESS-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
+; NONSTRESS-NEXT: [[SEXT64:%[a-zA-Z_0-9-]+]] = sext i32 %b to i64
+; NONSTRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ZEXT64]], [[SEXT64]]
 ;
 ; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
 ; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b
@@ -472,7 +472,7 @@
 ; OPTALL-NEXT: [[GEP:%[a-zA-Z_0-9-]+]] = getelementptr inbounds i128, ptr %addr, i64 [[IDX64]]
 ; OPTALL-NEXT: store i128 %stuff, ptr [[GEP]]
 ; OPTALL-NEXT: ret void
-define void @doNotPromoteFreeSExtFromAddrMode128(ptr %p, i32 %b, ptr %addr, i128 %stuff) {
+define void @promoteFreeSExtFromAddrMode128(ptr %p, i32 %b, ptr %addr, i128 %stuff) {
 entry:
   %t = load i8, ptr %p
   %zextt = zext i8 %t to i32
Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
===================================================================
--- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -14191,8 +14191,9 @@
           llvm::countr_zero(DL.getTypeStoreSizeInBits(IdxTy).getFixedValue()) -
           3;
       // Is the constant foldable in the shift of the addressing mode?
-      // I.e., shift amount is between 1 and 4 inclusive.
-      if (ShiftAmt == 0 || ShiftAmt > 4)
+      // I.e., shift amount is between 1 and 3 inclusive.
+      if (!Subtarget->hasFeature(AArch64::FeatureAddrLSLFast) ||
+          ShiftAmt == 0 || ShiftAmt > 3)
         return false;
       break;
     }


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D159356.555352.patch
Type: text/x-patch
Size: 4879 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20230901/28a8049d/attachment.bin>


More information about the llvm-commits mailing list