[PATCH] D159356: [AArch64] Check FeatureAddrLSLFast to fold base address with scale
Allen zhong via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 1 06:20:15 PDT 2023
Allen created this revision.
Allen added reviewers: dmgreen, chill, efriedma, harviniriawan.
Herald added subscribers: hiraditya, kristof.beyls.
Herald added a project: All.
Allen requested review of this revision.
Herald added subscribers: llvm-commits, wangpc.
Herald added a project: LLVM.
We Split LSLFast into Addr and ALU parts on D157982 <https://reviews.llvm.org/D157982>, then
add the related subtarget features FeatureAddrLSLFast to
check the contant foldable in the shift of the addressing mode.
FeatureAddrLSLFast: Shifts of 1, 2 and 3 (Scales of 2, 4 and 8) are cheap.
https://reviews.llvm.org/D159356
Files:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/test/CodeGen/AArch64/arm64-codegen-prepare-extload.ll
llvm/test/Transforms/Inline/AArch64/ext.ll
llvm/test/Transforms/SLPVectorizer/AArch64/ext-trunc.ll
Index: llvm/test/Transforms/SLPVectorizer/AArch64/ext-trunc.ll
===================================================================
--- llvm/test/Transforms/SLPVectorizer/AArch64/ext-trunc.ll
+++ llvm/test/Transforms/SLPVectorizer/AArch64/ext-trunc.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -S -passes=slp-vectorizer -mtriple=aarch64--linux-gnu < %s | FileCheck %s
+; RUN: opt -S -passes=slp-vectorizer -mtriple=aarch64--linux-gnu -mattr=+addr-lsl-fast < %s | FileCheck %s
target datalayout = "e-m:e-i32:64-i128:128-n32:64-S128"
Index: llvm/test/Transforms/Inline/AArch64/ext.ll
===================================================================
--- llvm/test/Transforms/Inline/AArch64/ext.ll
+++ llvm/test/Transforms/Inline/AArch64/ext.ll
@@ -11,7 +11,7 @@
; sext can be folded into gep.
; CHECK: Analyzing call of inner1
-; CHECK: NumInstructionsSimplified: 3
+; CHECK: NumInstructionsSimplified: 2
; CHECK: NumInstructions: 4
define i32 @inner1(ptr %ptr, i32 %i) {
%E = sext i32 %i to i64
Index: llvm/test/CodeGen/AArch64/arm64-codegen-prepare-extload.ll
===================================================================
--- llvm/test/CodeGen/AArch64/arm64-codegen-prepare-extload.ll
+++ llvm/test/CodeGen/AArch64/arm64-codegen-prepare-extload.ll
@@ -1,6 +1,6 @@
-; RUN: opt -codegenprepare < %s -mtriple=aarch64-apple-ios -S | FileCheck -enable-var-scope %s --check-prefix=OPTALL --check-prefix=OPT --check-prefix=NONSTRESS
-; RUN: opt -codegenprepare < %s -mtriple=aarch64-apple-ios -S -stress-cgp-ext-ld-promotion | FileCheck -enable-var-scope %s --check-prefix=OPTALL --check-prefix=OPT --check-prefix=STRESS
-; RUN: opt -codegenprepare < %s -mtriple=aarch64-apple-ios -S -disable-cgp-ext-ld-promotion | FileCheck -enable-var-scope %s --check-prefix=OPTALL --check-prefix=DISABLE
+; RUN: opt -codegenprepare < %s -mtriple=aarch64-apple-ios -S -mattr=+addr-lsl-fast | FileCheck -enable-var-scope %s --check-prefix=OPTALL --check-prefix=OPT --check-prefix=NONSTRESS
+; RUN: opt -codegenprepare < %s -mtriple=aarch64-apple-ios -S -stress-cgp-ext-ld-promotion -mattr=+addr-lsl-fast | FileCheck -enable-var-scope %s --check-prefix=OPTALL --check-prefix=OPT --check-prefix=STRESS
+; RUN: opt -codegenprepare < %s -mtriple=aarch64-apple-ios -S -disable-cgp-ext-ld-promotion -mattr=+addr-lsl-fast | FileCheck -enable-var-scope %s --check-prefix=OPTALL --check-prefix=DISABLE
; CodeGenPrepare should move the zext into the block with the load
; so that SelectionDAG can select it with the load.
@@ -454,16 +454,16 @@
; The input has one free zext and one free sext. If we would have promoted
; all the way through the load we would end up with a free zext and a
; non-free sext (of %b).
-; OPTALL-LABEL: @doNotPromoteFreeSExtFromAddrMode128
+; OPTALL-LABEL: @promoteFreeSExtFromAddrMode128
; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, ptr %p
;
; STRESS-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
; STRESS-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i32 %b to i64
; STRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ZEXT64]], [[SEXTB]]
;
-; NONSTRESS-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
-; NONSTRESS-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b
-; NONSTRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64
+; NONSTRESS-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
+; NONSTRESS-NEXT: [[SEXT64:%[a-zA-Z_0-9-]+]] = sext i32 %b to i64
+; NONSTRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ZEXT64]], [[SEXT64]]
;
; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b
@@ -472,7 +472,7 @@
; OPTALL-NEXT: [[GEP:%[a-zA-Z_0-9-]+]] = getelementptr inbounds i128, ptr %addr, i64 [[IDX64]]
; OPTALL-NEXT: store i128 %stuff, ptr [[GEP]]
; OPTALL-NEXT: ret void
-define void @doNotPromoteFreeSExtFromAddrMode128(ptr %p, i32 %b, ptr %addr, i128 %stuff) {
+define void @promoteFreeSExtFromAddrMode128(ptr %p, i32 %b, ptr %addr, i128 %stuff) {
entry:
%t = load i8, ptr %p
%zextt = zext i8 %t to i32
Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
===================================================================
--- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -14191,8 +14191,9 @@
llvm::countr_zero(DL.getTypeStoreSizeInBits(IdxTy).getFixedValue()) -
3;
// Is the constant foldable in the shift of the addressing mode?
- // I.e., shift amount is between 1 and 4 inclusive.
- if (ShiftAmt == 0 || ShiftAmt > 4)
+ // I.e., shift amount is between 1 and 3 inclusive.
+ if (!Subtarget->hasFeature(AArch64::FeatureAddrLSLFast) ||
+ ShiftAmt == 0 || ShiftAmt > 3)
return false;
break;
}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D159356.555352.patch
Type: text/x-patch
Size: 4879 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20230901/28a8049d/attachment.bin>
More information about the llvm-commits
mailing list