[llvm] 5d29d75 - [AArch64] Predicate SSHLL;SCVTF patterns behind UseAlternateSExtLoadCVTF32
David Green via llvm-commits
llvm-commits at lists.llvm.org
Mon May 16 10:00:35 PDT 2022
Author: David Green
Date: 2022-05-16T18:00:30+01:00
New Revision: 5d29d752735e71b73a54bfc9ab747384be9e4246
URL: https://github.com/llvm/llvm-project/commit/5d29d752735e71b73a54bfc9ab747384be9e4246
DIFF: https://github.com/llvm/llvm-project/commit/5d29d752735e71b73a54bfc9ab747384be9e4246.diff
LOG: [AArch64] Predicate SSHLL;SCVTF patterns behind UseAlternateSExtLoadCVTF32
There have been some patterns in the AArch64 backend to optimize code of
the form:
ldrsh w8, [x0]
scvtf s0, w8
to:
ldr h0, [x0]
sshll v0.4s, v0.4h, #0
scvtf s0, s0
The idea is to remove the GRP->FPR move, but in reality is making code
larger and slower (or the same) on all the cpus I tried.
This patch adds the UseAlternateSExtLoadCVTF32 predicate similar to
nearby related pattern.
Differential Revision: https://reviews.llvm.org/D125470
Added:
Modified:
llvm/lib/Target/AArch64/AArch64InstrInfo.td
llvm/test/CodeGen/AArch64/arm64-scvt.ll
llvm/test/CodeGen/AArch64/int-to-fp-no-neon.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 0940457eace3..5292dda9240b 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -6790,7 +6790,8 @@ class SExtLoadi16CVTf32Pat<dag addrmode, dag INST>
INST,
hsub),
0),
- ssub)))>, Requires<[NotForCodeSize, HasNEON]>;
+ ssub)))>,
+ Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32, HasNEON]>;
def : SExtLoadi16CVTf32Pat<(ro16.Wpat GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext),
(LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext)>;
@@ -6843,7 +6844,8 @@ class SExtLoadi32CVTf64Pat<dag addrmode, dag INST>
INST,
ssub),
0),
- dsub)))>, Requires<[NotForCodeSize, HasNEON]>;
+ dsub)))>,
+ Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32, HasNEON]>;
def : SExtLoadi32CVTf64Pat<(ro32.Wpat GPR64sp:$Rn, GPR32:$Rm, ro32.Wext:$ext),
(LDRSroW GPR64sp:$Rn, GPR32:$Rm, ro32.Wext:$ext)>;
diff --git a/llvm/test/CodeGen/AArch64/arm64-scvt.ll b/llvm/test/CodeGen/AArch64/arm64-scvt.ll
index 069f33daaad7..85b94c4c49ab 100644
--- a/llvm/test/CodeGen/AArch64/arm64-scvt.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-scvt.ll
@@ -486,13 +486,20 @@ entry:
}
define float @sfct2(i16* nocapture %sp0) {
-; CHECK-LABEL: sfct2:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ldr h0, [x0, #2]
-; CHECK-NEXT: sshll v0.4s, v0.4h, #0
-; CHECK-NEXT: scvtf s0, s0
-; CHECK-NEXT: fmul s0, s0, s0
-; CHECK-NEXT: ret
+; CHECK-CYC-LABEL: sfct2:
+; CHECK-CYC: // %bb.0: // %entry
+; CHECK-CYC-NEXT: ldr h0, [x0, #2]
+; CHECK-CYC-NEXT: sshll v0.4s, v0.4h, #0
+; CHECK-CYC-NEXT: scvtf s0, s0
+; CHECK-CYC-NEXT: fmul s0, s0, s0
+; CHECK-CYC-NEXT: ret
+;
+; CHECK-A57-LABEL: sfct2:
+; CHECK-A57: // %bb.0: // %entry
+; CHECK-A57-NEXT: ldrsh w8, [x0, #2]
+; CHECK-A57-NEXT: scvtf s0, w8
+; CHECK-A57-NEXT: fmul s0, s0, s0
+; CHECK-A57-NEXT: ret
entry:
%addr = getelementptr i16, i16* %sp0, i64 1
%pix_sp0.0.copyload = load i16, i16* %addr, align 1
@@ -558,13 +565,20 @@ entry:
}
define float @sfct6(i16* nocapture %sp0, i64 %offset) {
-; CHECK-LABEL: sfct6:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ldr h0, [x0, x1, lsl #1]
-; CHECK-NEXT: sshll v0.4s, v0.4h, #0
-; CHECK-NEXT: scvtf s0, s0
-; CHECK-NEXT: fmul s0, s0, s0
-; CHECK-NEXT: ret
+; CHECK-CYC-LABEL: sfct6:
+; CHECK-CYC: // %bb.0: // %entry
+; CHECK-CYC-NEXT: ldr h0, [x0, x1, lsl #1]
+; CHECK-CYC-NEXT: sshll v0.4s, v0.4h, #0
+; CHECK-CYC-NEXT: scvtf s0, s0
+; CHECK-CYC-NEXT: fmul s0, s0, s0
+; CHECK-CYC-NEXT: ret
+;
+; CHECK-A57-LABEL: sfct6:
+; CHECK-A57: // %bb.0: // %entry
+; CHECK-A57-NEXT: ldrsh w8, [x0, x1, lsl #1]
+; CHECK-A57-NEXT: scvtf s0, w8
+; CHECK-A57-NEXT: fmul s0, s0, s0
+; CHECK-A57-NEXT: ret
entry:
%addr = getelementptr i16, i16* %sp0, i64 %offset
%pix_sp0.0.copyload = load i16, i16* %addr, align 1
@@ -645,13 +659,20 @@ entry:
}
define double @sfct11(i32* nocapture %sp0) {
-; CHECK-LABEL: sfct11:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ldr s0, [x0, #4]
-; CHECK-NEXT: sshll v0.2d, v0.2s, #0
-; CHECK-NEXT: scvtf d0, d0
-; CHECK-NEXT: fmul d0, d0, d0
-; CHECK-NEXT: ret
+; CHECK-CYC-LABEL: sfct11:
+; CHECK-CYC: // %bb.0: // %entry
+; CHECK-CYC-NEXT: ldr s0, [x0, #4]
+; CHECK-CYC-NEXT: sshll v0.2d, v0.2s, #0
+; CHECK-CYC-NEXT: scvtf d0, d0
+; CHECK-CYC-NEXT: fmul d0, d0, d0
+; CHECK-CYC-NEXT: ret
+;
+; CHECK-A57-LABEL: sfct11:
+; CHECK-A57: // %bb.0: // %entry
+; CHECK-A57-NEXT: ldr w8, [x0, #4]
+; CHECK-A57-NEXT: scvtf d0, w8
+; CHECK-A57-NEXT: fmul d0, d0, d0
+; CHECK-A57-NEXT: ret
entry:
%addr = getelementptr i32, i32* %sp0, i64 1
%pix_sp0.0.copyload = load i32, i32* %addr, align 1
@@ -716,13 +737,20 @@ entry:
}
define double @sfct15(i32* nocapture %sp0, i64 %offset) {
-; CHECK-LABEL: sfct15:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ldr s0, [x0, x1, lsl #2]
-; CHECK-NEXT: sshll v0.2d, v0.2s, #0
-; CHECK-NEXT: scvtf d0, d0
-; CHECK-NEXT: fmul d0, d0, d0
-; CHECK-NEXT: ret
+; CHECK-CYC-LABEL: sfct15:
+; CHECK-CYC: // %bb.0: // %entry
+; CHECK-CYC-NEXT: ldr s0, [x0, x1, lsl #2]
+; CHECK-CYC-NEXT: sshll v0.2d, v0.2s, #0
+; CHECK-CYC-NEXT: scvtf d0, d0
+; CHECK-CYC-NEXT: fmul d0, d0, d0
+; CHECK-CYC-NEXT: ret
+;
+; CHECK-A57-LABEL: sfct15:
+; CHECK-A57: // %bb.0: // %entry
+; CHECK-A57-NEXT: ldr w8, [x0, x1, lsl #2]
+; CHECK-A57-NEXT: scvtf d0, w8
+; CHECK-A57-NEXT: fmul d0, d0, d0
+; CHECK-A57-NEXT: ret
entry:
%addr = getelementptr i32, i32* %sp0, i64 %offset
%pix_sp0.0.copyload = load i32, i32* %addr, align 1
@@ -774,13 +802,20 @@ entry:
}
define float @sfct18(i16* nocapture %sp0) {
-; CHECK-LABEL: sfct18:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldur h0, [x0, #1]
-; CHECK-NEXT: sshll v0.4s, v0.4h, #0
-; CHECK-NEXT: scvtf s0, s0
-; CHECK-NEXT: fmul s0, s0, s0
-; CHECK-NEXT: ret
+; CHECK-CYC-LABEL: sfct18:
+; CHECK-CYC: // %bb.0:
+; CHECK-CYC-NEXT: ldur h0, [x0, #1]
+; CHECK-CYC-NEXT: sshll v0.4s, v0.4h, #0
+; CHECK-CYC-NEXT: scvtf s0, s0
+; CHECK-CYC-NEXT: fmul s0, s0, s0
+; CHECK-CYC-NEXT: ret
+;
+; CHECK-A57-LABEL: sfct18:
+; CHECK-A57: // %bb.0:
+; CHECK-A57-NEXT: ldursh w8, [x0, #1]
+; CHECK-A57-NEXT: scvtf s0, w8
+; CHECK-A57-NEXT: fmul s0, s0, s0
+; CHECK-A57-NEXT: ret
%bitcast = ptrtoint i16* %sp0 to i64
%add = add i64 %bitcast, 1
%addr = inttoptr i64 %add to i16*
@@ -868,13 +903,20 @@ define double @sfct22(i16* nocapture %sp0) {
}
define double @sfct23(i32* nocapture %sp0) {
-; CHECK-LABEL: sfct23:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldur s0, [x0, #1]
-; CHECK-NEXT: sshll v0.2d, v0.2s, #0
-; CHECK-NEXT: scvtf d0, d0
-; CHECK-NEXT: fmul d0, d0, d0
-; CHECK-NEXT: ret
+; CHECK-CYC-LABEL: sfct23:
+; CHECK-CYC: // %bb.0:
+; CHECK-CYC-NEXT: ldur s0, [x0, #1]
+; CHECK-CYC-NEXT: sshll v0.2d, v0.2s, #0
+; CHECK-CYC-NEXT: scvtf d0, d0
+; CHECK-CYC-NEXT: fmul d0, d0, d0
+; CHECK-CYC-NEXT: ret
+;
+; CHECK-A57-LABEL: sfct23:
+; CHECK-A57: // %bb.0:
+; CHECK-A57-NEXT: ldur w8, [x0, #1]
+; CHECK-A57-NEXT: scvtf d0, w8
+; CHECK-A57-NEXT: fmul d0, d0, d0
+; CHECK-A57-NEXT: ret
%bitcast = ptrtoint i32* %sp0 to i64
%add = add i64 %bitcast, 1
%addr = inttoptr i64 %add to i32*
diff --git a/llvm/test/CodeGen/AArch64/int-to-fp-no-neon.ll b/llvm/test/CodeGen/AArch64/int-to-fp-no-neon.ll
index 9fba4be13339..82e625af477b 100644
--- a/llvm/test/CodeGen/AArch64/int-to-fp-no-neon.ll
+++ b/llvm/test/CodeGen/AArch64/int-to-fp-no-neon.ll
@@ -1,8 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
;; These test functions previously triggered the following error when emitting machine code:
;; LLVM ERROR: Attempting to emit UCVTFv1i64 instruction but the Feature_HasNEON predicate(s) are not met
-; RUN: llc -mtriple=aarch64 -mattr=+neon,+fullfp16 < %s | FileCheck %s --check-prefixes=CHECK,NEON-ENABLED
-; RUN: llc -mtriple=aarch64 -mattr=-neon,+fullfp16 < %s | FileCheck %s --check-prefixes=CHECK,NEON-DISABLED
+; RUN: llc -mtriple=aarch64 -mattr=+neon,+fullfp16,+alternate-sextload-cvt-f32-pattern < %s | FileCheck %s --check-prefixes=CHECK,NEON-ENABLED
+; RUN: llc -mtriple=aarch64 -mattr=-neon,+fullfp16,+alternate-sextload-cvt-f32-pattern < %s | FileCheck %s --check-prefixes=CHECK,NEON-DISABLED
;; Emit an object file so that verifyPredicates is called (it is not used for ASM output).
; RUN: llc -mtriple=aarch64 -mattr=-neon,+fullfp16 -o /dev/null %s --asm-show-inst -filetype=obj
@@ -206,11 +206,19 @@ entry:
}
define float @si8_to_float(i8* %i, float* %f) {
-; CHECK-LABEL: si8_to_float:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ldrsb w8, [x0]
-; CHECK-NEXT: scvtf s0, w8
-; CHECK-NEXT: ret
+; NEON-ENABLED-LABEL: si8_to_float:
+; NEON-ENABLED: // %bb.0: // %entry
+; NEON-ENABLED-NEXT: ldr b0, [x0]
+; NEON-ENABLED-NEXT: sshll v0.8h, v0.8b, #0
+; NEON-ENABLED-NEXT: sshll v0.4s, v0.4h, #0
+; NEON-ENABLED-NEXT: scvtf s0, s0
+; NEON-ENABLED-NEXT: ret
+;
+; NEON-DISABLED-LABEL: si8_to_float:
+; NEON-DISABLED: // %bb.0: // %entry
+; NEON-DISABLED-NEXT: ldrsb w8, [x0]
+; NEON-DISABLED-NEXT: scvtf s0, w8
+; NEON-DISABLED-NEXT: ret
entry:
%ld = load i8, i8* %i, align 1
%conv = sitofp i8 %ld to float
@@ -230,11 +238,19 @@ entry:
}
define double @si16_to_double(i16* %i, float* %f) {
-; CHECK-LABEL: si16_to_double:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ldrsh w8, [x0]
-; CHECK-NEXT: scvtf d0, w8
-; CHECK-NEXT: ret
+; NEON-ENABLED-LABEL: si16_to_double:
+; NEON-ENABLED: // %bb.0: // %entry
+; NEON-ENABLED-NEXT: ldr h0, [x0]
+; NEON-ENABLED-NEXT: sshll v0.4s, v0.4h, #0
+; NEON-ENABLED-NEXT: sshll v0.2d, v0.2s, #0
+; NEON-ENABLED-NEXT: scvtf d0, d0
+; NEON-ENABLED-NEXT: ret
+;
+; NEON-DISABLED-LABEL: si16_to_double:
+; NEON-DISABLED: // %bb.0: // %entry
+; NEON-DISABLED-NEXT: ldrsh w8, [x0]
+; NEON-DISABLED-NEXT: scvtf d0, w8
+; NEON-DISABLED-NEXT: ret
entry:
%ld = load i16, i16* %i, align 1
%conv = sitofp i16 %ld to double
More information about the llvm-commits
mailing list