[llvm] 0955125 - [AArch64] Add missing HasNEON predicates to int->float patterns

Alex Richardson via llvm-commits llvm-commits at lists.llvm.org
Sat May 14 07:43:20 PDT 2022


Author: Alex Richardson
Date: 2022-05-14T14:15:36Z
New Revision: 09551251e32a0a00486c3de5e29b8df0aa4e5540

URL: https://github.com/llvm/llvm-project/commit/09551251e32a0a00486c3de5e29b8df0aa4e5540
DIFF: https://github.com/llvm/llvm-project/commit/09551251e32a0a00486c3de5e29b8df0aa4e5540.diff

LOG: [AArch64] Add missing HasNEON predicates to int->float patterns

I was trying to compile code with -march=+nosimd and hit various
instruction predicate verification errors, this patch should address the
ones I saw in integer to floating-pointer conversions.

I noticed that for signed conversions, some non-NEON instruction sequences
are shorter. I don't know if the longer one is still faster on current
architectures (the patterns date back to the initial backend import)

Reviewed By: dmgreen

Differential Revision: https://reviews.llvm.org/D125308

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64InstrInfo.td
    llvm/test/CodeGen/AArch64/int-to-fp-no-neon.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index f8ccc460ab662..0940457eace34 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -5025,8 +5025,6 @@ def : Pat<(f16 (any_sint_to_fp (i32 (any_fp_to_sint f16:$Rn)))),
 def : Pat<(f16 (any_uint_to_fp (i32 (any_fp_to_uint f16:$Rn)))),
           (UCVTFv1i16 (f16 (FCVTZUv1f16 f16:$Rn)))>;
 }
-}
-
 // If an integer is about to be converted to a floating point value,
 // just load it on the floating point unit.
 // Here are the patterns for 8 and 16-bits to float.
@@ -5113,6 +5111,7 @@ def : Pat <(f64 (uint_to_fp (i32
                           (LDURSi GPR64sp:$Rn, simm9:$offset), ssub))>;
 // 64-bits -> double are handled in target specific dag combine:
 // performIntToFpCombine.
+} // let Predicates = [HasNEON]
 
 //===----------------------------------------------------------------------===//
 // Advanced SIMD three 
diff erent-sized vector instructions.
@@ -6771,7 +6770,7 @@ class SExtLoadi8CVTf32Pat<dag addrmode, dag INST>
                                   dsub)),
                                0),
                              ssub)))>,
-    Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32]>;
+    Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32, HasNEON]>;
 
 def : SExtLoadi8CVTf32Pat<(ro8.Wpat GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$ext),
                           (LDRBroW  GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$ext)>;
@@ -6791,7 +6790,7 @@ class SExtLoadi16CVTf32Pat<dag addrmode, dag INST>
                                   INST,
                                   hsub),
                                 0),
-                            ssub)))>, Requires<[NotForCodeSize]>;
+                            ssub)))>, Requires<[NotForCodeSize, HasNEON]>;
 
 def : SExtLoadi16CVTf32Pat<(ro16.Wpat GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext),
                            (LDRHroW   GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext)>;
@@ -6825,7 +6824,7 @@ class SExtLoadi16CVTf64Pat<dag addrmode, dag INST>
                                    dsub)),
                                0),
                              dsub)))>,
-    Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32]>;
+    Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32, HasNEON]>;
 
 def : SExtLoadi16CVTf64Pat<(ro16.Wpat GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext),
                            (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext)>;
@@ -6844,7 +6843,7 @@ class SExtLoadi32CVTf64Pat<dag addrmode, dag INST>
                                   INST,
                                   ssub),
                                0),
-                             dsub)))>, Requires<[NotForCodeSize]>;
+                             dsub)))>, Requires<[NotForCodeSize, HasNEON]>;
 
 def : SExtLoadi32CVTf64Pat<(ro32.Wpat GPR64sp:$Rn, GPR32:$Rm, ro32.Wext:$ext),
                            (LDRSroW GPR64sp:$Rn, GPR32:$Rm, ro32.Wext:$ext)>;

diff  --git a/llvm/test/CodeGen/AArch64/int-to-fp-no-neon.ll b/llvm/test/CodeGen/AArch64/int-to-fp-no-neon.ll
index 18bcfce05e801..9fba4be133392 100644
--- a/llvm/test/CodeGen/AArch64/int-to-fp-no-neon.ll
+++ b/llvm/test/CodeGen/AArch64/int-to-fp-no-neon.ll
@@ -4,14 +4,20 @@
 ; RUN: llc -mtriple=aarch64 -mattr=+neon,+fullfp16 < %s | FileCheck %s --check-prefixes=CHECK,NEON-ENABLED
 ; RUN: llc -mtriple=aarch64 -mattr=-neon,+fullfp16 < %s | FileCheck %s --check-prefixes=CHECK,NEON-DISABLED
 ;; Emit an object file so that verifyPredicates is called (it is not used for ASM output).
-; RUNTODO: llc -mtriple=aarch64 -mattr=-neon,+fullfp16 -o /dev/null %s --asm-show-inst -filetype=obj
+; RUN: llc -mtriple=aarch64 -mattr=-neon,+fullfp16 -o /dev/null %s --asm-show-inst -filetype=obj
 
 define double @ui8_to_double(i8* %i, float* %f) {
-; CHECK-LABEL: ui8_to_double:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ldr b0, [x0]
-; CHECK-NEXT:    ucvtf d0, d0
-; CHECK-NEXT:    ret
+; NEON-ENABLED-LABEL: ui8_to_double:
+; NEON-ENABLED:       // %bb.0: // %entry
+; NEON-ENABLED-NEXT:    ldr b0, [x0]
+; NEON-ENABLED-NEXT:    ucvtf d0, d0
+; NEON-ENABLED-NEXT:    ret
+;
+; NEON-DISABLED-LABEL: ui8_to_double:
+; NEON-DISABLED:       // %bb.0: // %entry
+; NEON-DISABLED-NEXT:    ldrb w8, [x0]
+; NEON-DISABLED-NEXT:    ucvtf d0, w8
+; NEON-DISABLED-NEXT:    ret
 entry:
   %ld = load i8, i8* %i, align 1
   %conv = uitofp i8 %ld to double
@@ -19,11 +25,17 @@ entry:
 }
 
 define float @ui8_to_float(i8* %i, float* %f) {
-; CHECK-LABEL: ui8_to_float:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ldr b0, [x0]
-; CHECK-NEXT:    ucvtf s0, s0
-; CHECK-NEXT:    ret
+; NEON-ENABLED-LABEL: ui8_to_float:
+; NEON-ENABLED:       // %bb.0: // %entry
+; NEON-ENABLED-NEXT:    ldr b0, [x0]
+; NEON-ENABLED-NEXT:    ucvtf s0, s0
+; NEON-ENABLED-NEXT:    ret
+;
+; NEON-DISABLED-LABEL: ui8_to_float:
+; NEON-DISABLED:       // %bb.0: // %entry
+; NEON-DISABLED-NEXT:    ldrb w8, [x0]
+; NEON-DISABLED-NEXT:    ucvtf s0, w8
+; NEON-DISABLED-NEXT:    ret
 entry:
   %ld = load i8, i8* %i, align 1
   %conv = uitofp i8 %ld to float
@@ -43,11 +55,17 @@ entry:
 }
 
 define double @ui16_to_double(i16* %i, float* %f) {
-; CHECK-LABEL: ui16_to_double:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ldr h0, [x0]
-; CHECK-NEXT:    ucvtf d0, d0
-; CHECK-NEXT:    ret
+; NEON-ENABLED-LABEL: ui16_to_double:
+; NEON-ENABLED:       // %bb.0: // %entry
+; NEON-ENABLED-NEXT:    ldr h0, [x0]
+; NEON-ENABLED-NEXT:    ucvtf d0, d0
+; NEON-ENABLED-NEXT:    ret
+;
+; NEON-DISABLED-LABEL: ui16_to_double:
+; NEON-DISABLED:       // %bb.0: // %entry
+; NEON-DISABLED-NEXT:    ldrh w8, [x0]
+; NEON-DISABLED-NEXT:    ucvtf d0, w8
+; NEON-DISABLED-NEXT:    ret
 entry:
   %ld = load i16, i16* %i, align 1
   %conv = uitofp i16 %ld to double
@@ -55,11 +73,17 @@ entry:
 }
 
 define float @ui16_to_float(i16* %i, float* %f) {
-; CHECK-LABEL: ui16_to_float:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ldr h0, [x0]
-; CHECK-NEXT:    ucvtf s0, s0
-; CHECK-NEXT:    ret
+; NEON-ENABLED-LABEL: ui16_to_float:
+; NEON-ENABLED:       // %bb.0: // %entry
+; NEON-ENABLED-NEXT:    ldr h0, [x0]
+; NEON-ENABLED-NEXT:    ucvtf s0, s0
+; NEON-ENABLED-NEXT:    ret
+;
+; NEON-DISABLED-LABEL: ui16_to_float:
+; NEON-DISABLED:       // %bb.0: // %entry
+; NEON-DISABLED-NEXT:    ldrh w8, [x0]
+; NEON-DISABLED-NEXT:    ucvtf s0, w8
+; NEON-DISABLED-NEXT:    ret
 entry:
   %ld = load i16, i16* %i, align 1
   %conv = uitofp i16 %ld to float
@@ -79,11 +103,17 @@ entry:
 }
 
 define double @ui32_to_double(i32* %i, float* %f) {
-; CHECK-LABEL: ui32_to_double:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ldr s0, [x0]
-; CHECK-NEXT:    ucvtf d0, d0
-; CHECK-NEXT:    ret
+; NEON-ENABLED-LABEL: ui32_to_double:
+; NEON-ENABLED:       // %bb.0: // %entry
+; NEON-ENABLED-NEXT:    ldr s0, [x0]
+; NEON-ENABLED-NEXT:    ucvtf d0, d0
+; NEON-ENABLED-NEXT:    ret
+;
+; NEON-DISABLED-LABEL: ui32_to_double:
+; NEON-DISABLED:       // %bb.0: // %entry
+; NEON-DISABLED-NEXT:    ldr w8, [x0]
+; NEON-DISABLED-NEXT:    ucvtf d0, w8
+; NEON-DISABLED-NEXT:    ret
 entry:
   %ld = load i32, i32* %i, align 1
   %conv = uitofp i32 %ld to double
@@ -212,12 +242,18 @@ entry:
 }
 
 define float @si16_to_float(i16* %i, float* %f) {
-; CHECK-LABEL: si16_to_float:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ldr h0, [x0]
-; CHECK-NEXT:    sshll v0.4s, v0.4h, #0
-; CHECK-NEXT:    scvtf s0, s0
-; CHECK-NEXT:    ret
+; NEON-ENABLED-LABEL: si16_to_float:
+; NEON-ENABLED:       // %bb.0: // %entry
+; NEON-ENABLED-NEXT:    ldr h0, [x0]
+; NEON-ENABLED-NEXT:    sshll v0.4s, v0.4h, #0
+; NEON-ENABLED-NEXT:    scvtf s0, s0
+; NEON-ENABLED-NEXT:    ret
+;
+; NEON-DISABLED-LABEL: si16_to_float:
+; NEON-DISABLED:       // %bb.0: // %entry
+; NEON-DISABLED-NEXT:    ldrsh w8, [x0]
+; NEON-DISABLED-NEXT:    scvtf s0, w8
+; NEON-DISABLED-NEXT:    ret
 entry:
   %ld = load i16, i16* %i, align 1
   %conv = sitofp i16 %ld to float
@@ -237,12 +273,18 @@ entry:
 }
 
 define double @si32_to_double(i32* %i, float* %f) {
-; CHECK-LABEL: si32_to_double:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ldr s0, [x0]
-; CHECK-NEXT:    sshll v0.2d, v0.2s, #0
-; CHECK-NEXT:    scvtf d0, d0
-; CHECK-NEXT:    ret
+; NEON-ENABLED-LABEL: si32_to_double:
+; NEON-ENABLED:       // %bb.0: // %entry
+; NEON-ENABLED-NEXT:    ldr s0, [x0]
+; NEON-ENABLED-NEXT:    sshll v0.2d, v0.2s, #0
+; NEON-ENABLED-NEXT:    scvtf d0, d0
+; NEON-ENABLED-NEXT:    ret
+;
+; NEON-DISABLED-LABEL: si32_to_double:
+; NEON-DISABLED:       // %bb.0: // %entry
+; NEON-DISABLED-NEXT:    ldr w8, [x0]
+; NEON-DISABLED-NEXT:    scvtf d0, w8
+; NEON-DISABLED-NEXT:    ret
 entry:
   %ld = load i32, i32* %i, align 1
   %conv = sitofp i32 %ld to double


        


More information about the llvm-commits mailing list