[llvm] 0955125 - [AArch64] Add missing HasNEON predicates to int->float patterns
Alex Richardson via llvm-commits
llvm-commits at lists.llvm.org
Sat May 14 07:43:20 PDT 2022
Author: Alex Richardson
Date: 2022-05-14T14:15:36Z
New Revision: 09551251e32a0a00486c3de5e29b8df0aa4e5540
URL: https://github.com/llvm/llvm-project/commit/09551251e32a0a00486c3de5e29b8df0aa4e5540
DIFF: https://github.com/llvm/llvm-project/commit/09551251e32a0a00486c3de5e29b8df0aa4e5540.diff
LOG: [AArch64] Add missing HasNEON predicates to int->float patterns
I was trying to compile code with -march=+nosimd and hit various
instruction predicate verification errors, this patch should address the
ones I saw in integer to floating-pointer conversions.
I noticed that for signed conversions, some non-NEON instruction sequences
are shorter. I don't know if the longer one is still faster on current
architectures (the patterns date back to the initial backend import)
Reviewed By: dmgreen
Differential Revision: https://reviews.llvm.org/D125308
Added:
Modified:
llvm/lib/Target/AArch64/AArch64InstrInfo.td
llvm/test/CodeGen/AArch64/int-to-fp-no-neon.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index f8ccc460ab662..0940457eace34 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -5025,8 +5025,6 @@ def : Pat<(f16 (any_sint_to_fp (i32 (any_fp_to_sint f16:$Rn)))),
def : Pat<(f16 (any_uint_to_fp (i32 (any_fp_to_uint f16:$Rn)))),
(UCVTFv1i16 (f16 (FCVTZUv1f16 f16:$Rn)))>;
}
-}
-
// If an integer is about to be converted to a floating point value,
// just load it on the floating point unit.
// Here are the patterns for 8 and 16-bits to float.
@@ -5113,6 +5111,7 @@ def : Pat <(f64 (uint_to_fp (i32
(LDURSi GPR64sp:$Rn, simm9:$offset), ssub))>;
// 64-bits -> double are handled in target specific dag combine:
// performIntToFpCombine.
+} // let Predicates = [HasNEON]
//===----------------------------------------------------------------------===//
// Advanced SIMD three
diff erent-sized vector instructions.
@@ -6771,7 +6770,7 @@ class SExtLoadi8CVTf32Pat<dag addrmode, dag INST>
dsub)),
0),
ssub)))>,
- Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32]>;
+ Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32, HasNEON]>;
def : SExtLoadi8CVTf32Pat<(ro8.Wpat GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$ext),
(LDRBroW GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$ext)>;
@@ -6791,7 +6790,7 @@ class SExtLoadi16CVTf32Pat<dag addrmode, dag INST>
INST,
hsub),
0),
- ssub)))>, Requires<[NotForCodeSize]>;
+ ssub)))>, Requires<[NotForCodeSize, HasNEON]>;
def : SExtLoadi16CVTf32Pat<(ro16.Wpat GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext),
(LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext)>;
@@ -6825,7 +6824,7 @@ class SExtLoadi16CVTf64Pat<dag addrmode, dag INST>
dsub)),
0),
dsub)))>,
- Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32]>;
+ Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32, HasNEON]>;
def : SExtLoadi16CVTf64Pat<(ro16.Wpat GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext),
(LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext)>;
@@ -6844,7 +6843,7 @@ class SExtLoadi32CVTf64Pat<dag addrmode, dag INST>
INST,
ssub),
0),
- dsub)))>, Requires<[NotForCodeSize]>;
+ dsub)))>, Requires<[NotForCodeSize, HasNEON]>;
def : SExtLoadi32CVTf64Pat<(ro32.Wpat GPR64sp:$Rn, GPR32:$Rm, ro32.Wext:$ext),
(LDRSroW GPR64sp:$Rn, GPR32:$Rm, ro32.Wext:$ext)>;
diff --git a/llvm/test/CodeGen/AArch64/int-to-fp-no-neon.ll b/llvm/test/CodeGen/AArch64/int-to-fp-no-neon.ll
index 18bcfce05e801..9fba4be133392 100644
--- a/llvm/test/CodeGen/AArch64/int-to-fp-no-neon.ll
+++ b/llvm/test/CodeGen/AArch64/int-to-fp-no-neon.ll
@@ -4,14 +4,20 @@
; RUN: llc -mtriple=aarch64 -mattr=+neon,+fullfp16 < %s | FileCheck %s --check-prefixes=CHECK,NEON-ENABLED
; RUN: llc -mtriple=aarch64 -mattr=-neon,+fullfp16 < %s | FileCheck %s --check-prefixes=CHECK,NEON-DISABLED
;; Emit an object file so that verifyPredicates is called (it is not used for ASM output).
-; RUNTODO: llc -mtriple=aarch64 -mattr=-neon,+fullfp16 -o /dev/null %s --asm-show-inst -filetype=obj
+; RUN: llc -mtriple=aarch64 -mattr=-neon,+fullfp16 -o /dev/null %s --asm-show-inst -filetype=obj
define double @ui8_to_double(i8* %i, float* %f) {
-; CHECK-LABEL: ui8_to_double:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ldr b0, [x0]
-; CHECK-NEXT: ucvtf d0, d0
-; CHECK-NEXT: ret
+; NEON-ENABLED-LABEL: ui8_to_double:
+; NEON-ENABLED: // %bb.0: // %entry
+; NEON-ENABLED-NEXT: ldr b0, [x0]
+; NEON-ENABLED-NEXT: ucvtf d0, d0
+; NEON-ENABLED-NEXT: ret
+;
+; NEON-DISABLED-LABEL: ui8_to_double:
+; NEON-DISABLED: // %bb.0: // %entry
+; NEON-DISABLED-NEXT: ldrb w8, [x0]
+; NEON-DISABLED-NEXT: ucvtf d0, w8
+; NEON-DISABLED-NEXT: ret
entry:
%ld = load i8, i8* %i, align 1
%conv = uitofp i8 %ld to double
@@ -19,11 +25,17 @@ entry:
}
define float @ui8_to_float(i8* %i, float* %f) {
-; CHECK-LABEL: ui8_to_float:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ldr b0, [x0]
-; CHECK-NEXT: ucvtf s0, s0
-; CHECK-NEXT: ret
+; NEON-ENABLED-LABEL: ui8_to_float:
+; NEON-ENABLED: // %bb.0: // %entry
+; NEON-ENABLED-NEXT: ldr b0, [x0]
+; NEON-ENABLED-NEXT: ucvtf s0, s0
+; NEON-ENABLED-NEXT: ret
+;
+; NEON-DISABLED-LABEL: ui8_to_float:
+; NEON-DISABLED: // %bb.0: // %entry
+; NEON-DISABLED-NEXT: ldrb w8, [x0]
+; NEON-DISABLED-NEXT: ucvtf s0, w8
+; NEON-DISABLED-NEXT: ret
entry:
%ld = load i8, i8* %i, align 1
%conv = uitofp i8 %ld to float
@@ -43,11 +55,17 @@ entry:
}
define double @ui16_to_double(i16* %i, float* %f) {
-; CHECK-LABEL: ui16_to_double:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ldr h0, [x0]
-; CHECK-NEXT: ucvtf d0, d0
-; CHECK-NEXT: ret
+; NEON-ENABLED-LABEL: ui16_to_double:
+; NEON-ENABLED: // %bb.0: // %entry
+; NEON-ENABLED-NEXT: ldr h0, [x0]
+; NEON-ENABLED-NEXT: ucvtf d0, d0
+; NEON-ENABLED-NEXT: ret
+;
+; NEON-DISABLED-LABEL: ui16_to_double:
+; NEON-DISABLED: // %bb.0: // %entry
+; NEON-DISABLED-NEXT: ldrh w8, [x0]
+; NEON-DISABLED-NEXT: ucvtf d0, w8
+; NEON-DISABLED-NEXT: ret
entry:
%ld = load i16, i16* %i, align 1
%conv = uitofp i16 %ld to double
@@ -55,11 +73,17 @@ entry:
}
define float @ui16_to_float(i16* %i, float* %f) {
-; CHECK-LABEL: ui16_to_float:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ldr h0, [x0]
-; CHECK-NEXT: ucvtf s0, s0
-; CHECK-NEXT: ret
+; NEON-ENABLED-LABEL: ui16_to_float:
+; NEON-ENABLED: // %bb.0: // %entry
+; NEON-ENABLED-NEXT: ldr h0, [x0]
+; NEON-ENABLED-NEXT: ucvtf s0, s0
+; NEON-ENABLED-NEXT: ret
+;
+; NEON-DISABLED-LABEL: ui16_to_float:
+; NEON-DISABLED: // %bb.0: // %entry
+; NEON-DISABLED-NEXT: ldrh w8, [x0]
+; NEON-DISABLED-NEXT: ucvtf s0, w8
+; NEON-DISABLED-NEXT: ret
entry:
%ld = load i16, i16* %i, align 1
%conv = uitofp i16 %ld to float
@@ -79,11 +103,17 @@ entry:
}
define double @ui32_to_double(i32* %i, float* %f) {
-; CHECK-LABEL: ui32_to_double:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ldr s0, [x0]
-; CHECK-NEXT: ucvtf d0, d0
-; CHECK-NEXT: ret
+; NEON-ENABLED-LABEL: ui32_to_double:
+; NEON-ENABLED: // %bb.0: // %entry
+; NEON-ENABLED-NEXT: ldr s0, [x0]
+; NEON-ENABLED-NEXT: ucvtf d0, d0
+; NEON-ENABLED-NEXT: ret
+;
+; NEON-DISABLED-LABEL: ui32_to_double:
+; NEON-DISABLED: // %bb.0: // %entry
+; NEON-DISABLED-NEXT: ldr w8, [x0]
+; NEON-DISABLED-NEXT: ucvtf d0, w8
+; NEON-DISABLED-NEXT: ret
entry:
%ld = load i32, i32* %i, align 1
%conv = uitofp i32 %ld to double
@@ -212,12 +242,18 @@ entry:
}
define float @si16_to_float(i16* %i, float* %f) {
-; CHECK-LABEL: si16_to_float:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ldr h0, [x0]
-; CHECK-NEXT: sshll v0.4s, v0.4h, #0
-; CHECK-NEXT: scvtf s0, s0
-; CHECK-NEXT: ret
+; NEON-ENABLED-LABEL: si16_to_float:
+; NEON-ENABLED: // %bb.0: // %entry
+; NEON-ENABLED-NEXT: ldr h0, [x0]
+; NEON-ENABLED-NEXT: sshll v0.4s, v0.4h, #0
+; NEON-ENABLED-NEXT: scvtf s0, s0
+; NEON-ENABLED-NEXT: ret
+;
+; NEON-DISABLED-LABEL: si16_to_float:
+; NEON-DISABLED: // %bb.0: // %entry
+; NEON-DISABLED-NEXT: ldrsh w8, [x0]
+; NEON-DISABLED-NEXT: scvtf s0, w8
+; NEON-DISABLED-NEXT: ret
entry:
%ld = load i16, i16* %i, align 1
%conv = sitofp i16 %ld to float
@@ -237,12 +273,18 @@ entry:
}
define double @si32_to_double(i32* %i, float* %f) {
-; CHECK-LABEL: si32_to_double:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ldr s0, [x0]
-; CHECK-NEXT: sshll v0.2d, v0.2s, #0
-; CHECK-NEXT: scvtf d0, d0
-; CHECK-NEXT: ret
+; NEON-ENABLED-LABEL: si32_to_double:
+; NEON-ENABLED: // %bb.0: // %entry
+; NEON-ENABLED-NEXT: ldr s0, [x0]
+; NEON-ENABLED-NEXT: sshll v0.2d, v0.2s, #0
+; NEON-ENABLED-NEXT: scvtf d0, d0
+; NEON-ENABLED-NEXT: ret
+;
+; NEON-DISABLED-LABEL: si32_to_double:
+; NEON-DISABLED: // %bb.0: // %entry
+; NEON-DISABLED-NEXT: ldr w8, [x0]
+; NEON-DISABLED-NEXT: scvtf d0, w8
+; NEON-DISABLED-NEXT: ret
entry:
%ld = load i32, i32* %i, align 1
%conv = sitofp i32 %ld to double
More information about the llvm-commits
mailing list