[llvm] 93b20e7 - [AArch64] Extend int-to-fp load optimization to support f16 (#168076)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Nov 23 04:32:07 PST 2025
Author: Guy David
Date: 2025-11-23T14:32:03+02:00
New Revision: 93b20e7d1f1d72c19c450a81ef5d84376e474b77
URL: https://github.com/llvm/llvm-project/commit/93b20e7d1f1d72c19c450a81ef5d84376e474b77
DIFF: https://github.com/llvm/llvm-project/commit/93b20e7d1f1d72c19c450a81ef5d84376e474b77.diff
LOG: [AArch64] Extend int-to-fp load optimization to support f16 (#168076)
Added:
Modified:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/lib/Target/AArch64/AArch64InstrInfo.td
llvm/test/CodeGen/AArch64/int-to-fp-no-neon.ll
llvm/test/CodeGen/AArch64/itofp.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index ac3745ea5c274..0bbe117ecf51f 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -20146,7 +20146,9 @@ static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG,
return Res;
EVT VT = N->getValueType(0);
- if (VT != MVT::f32 && VT != MVT::f64)
+ if (VT != MVT::f16 && VT != MVT::f32 && VT != MVT::f64)
+ return SDValue();
+ if (VT == MVT::f16 && !Subtarget->hasFullFP16())
return SDValue();
// Only optimize when the source and destination types have the same width.
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 50a3a4ab8d8b6..881f7707f0eb7 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -7014,6 +7014,19 @@ multiclass UIntToFPROLoadPat<ValueType DstTy, ValueType SrcTy,
sub))>;
}
+let Predicates = [HasNEONandIsSME2p2StreamingSafe, HasFullFP16] in {
+defm : UIntToFPROLoadPat<f16, i32, zextloadi8,
+ UCVTFv1i16, ro8, LDRBroW, LDRBroX, bsub>;
+def : Pat <(f16 (uint_to_fp (i32
+ (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))),
+ (UCVTFv1i16 (INSERT_SUBREG (f16 (IMPLICIT_DEF)),
+ (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub))>;
+def : Pat <(f16 (uint_to_fp (i32
+ (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))))),
+ (UCVTFv1i16 (INSERT_SUBREG (f16 (IMPLICIT_DEF)),
+ (LDURBi GPR64sp:$Rn, simm9:$offset), bsub))>;
+}
+
defm : UIntToFPROLoadPat<f32, i32, zextloadi8,
UCVTFv1i32, ro8, LDRBroW, LDRBroX, bsub>;
def : Pat <(f32 (uint_to_fp (i32
diff --git a/llvm/test/CodeGen/AArch64/int-to-fp-no-neon.ll b/llvm/test/CodeGen/AArch64/int-to-fp-no-neon.ll
index 478ccf58f32c5..56e4b1988b8d1 100644
--- a/llvm/test/CodeGen/AArch64/int-to-fp-no-neon.ll
+++ b/llvm/test/CodeGen/AArch64/int-to-fp-no-neon.ll
@@ -43,11 +43,17 @@ entry:
}
define half @ui8_to_half(ptr %i, ptr %f) {
-; CHECK-LABEL: ui8_to_half:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ldrb w8, [x0]
-; CHECK-NEXT: ucvtf h0, w8
-; CHECK-NEXT: ret
+; NEON-ENABLED-LABEL: ui8_to_half:
+; NEON-ENABLED: // %bb.0: // %entry
+; NEON-ENABLED-NEXT: ldr b0, [x0]
+; NEON-ENABLED-NEXT: ucvtf h0, h0
+; NEON-ENABLED-NEXT: ret
+;
+; NEON-DISABLED-LABEL: ui8_to_half:
+; NEON-DISABLED: // %bb.0: // %entry
+; NEON-DISABLED-NEXT: ldrb w8, [x0]
+; NEON-DISABLED-NEXT: ucvtf h0, w8
+; NEON-DISABLED-NEXT: ret
entry:
%ld = load i8, ptr %i, align 1
%conv = uitofp i8 %ld to half
@@ -91,11 +97,17 @@ entry:
}
define half @ui16_to_half(ptr %i, ptr %f) {
-; CHECK-LABEL: ui16_to_half:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ldrh w8, [x0]
-; CHECK-NEXT: ucvtf h0, w8
-; CHECK-NEXT: ret
+; NEON-ENABLED-LABEL: ui16_to_half:
+; NEON-ENABLED: // %bb.0: // %entry
+; NEON-ENABLED-NEXT: ldr h0, [x0]
+; NEON-ENABLED-NEXT: ucvtf h0, h0
+; NEON-ENABLED-NEXT: ret
+;
+; NEON-DISABLED-LABEL: ui16_to_half:
+; NEON-DISABLED: // %bb.0: // %entry
+; NEON-DISABLED-NEXT: ldrh w8, [x0]
+; NEON-DISABLED-NEXT: ucvtf h0, w8
+; NEON-DISABLED-NEXT: ret
entry:
%ld = load i16, ptr %i, align 1
%conv = uitofp i16 %ld to half
@@ -277,11 +289,17 @@ entry:
}
define half @si16_to_half(ptr %i, ptr %f) {
-; CHECK-LABEL: si16_to_half:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ldrsh w8, [x0]
-; CHECK-NEXT: scvtf h0, w8
-; CHECK-NEXT: ret
+; NEON-ENABLED-LABEL: si16_to_half:
+; NEON-ENABLED: // %bb.0: // %entry
+; NEON-ENABLED-NEXT: ldr h0, [x0]
+; NEON-ENABLED-NEXT: scvtf h0, h0
+; NEON-ENABLED-NEXT: ret
+;
+; NEON-DISABLED-LABEL: si16_to_half:
+; NEON-DISABLED: // %bb.0: // %entry
+; NEON-DISABLED-NEXT: ldrsh w8, [x0]
+; NEON-DISABLED-NEXT: scvtf h0, w8
+; NEON-DISABLED-NEXT: ret
entry:
%ld = load i16, ptr %i, align 1
%conv = sitofp i16 %ld to half
diff --git a/llvm/test/CodeGen/AArch64/itofp.ll b/llvm/test/CodeGen/AArch64/itofp.ll
index e526a9f7bc0f6..26a933d400f29 100644
--- a/llvm/test/CodeGen/AArch64/itofp.ll
+++ b/llvm/test/CodeGen/AArch64/itofp.ll
@@ -1090,11 +1090,17 @@ define half @stofp_load_i16_f16(ptr %p) {
; CHECK-NOFP16-NEXT: fcvt h0, s0
; CHECK-NOFP16-NEXT: ret
;
-; CHECK-FP16-LABEL: stofp_load_i16_f16:
-; CHECK-FP16: // %bb.0: // %entry
-; CHECK-FP16-NEXT: ldrsh w8, [x0]
-; CHECK-FP16-NEXT: scvtf h0, w8
-; CHECK-FP16-NEXT: ret
+; CHECK-FP16-SD-LABEL: stofp_load_i16_f16:
+; CHECK-FP16-SD: // %bb.0: // %entry
+; CHECK-FP16-SD-NEXT: ldr h0, [x0]
+; CHECK-FP16-SD-NEXT: scvtf h0, h0
+; CHECK-FP16-SD-NEXT: ret
+;
+; CHECK-FP16-GI-LABEL: stofp_load_i16_f16:
+; CHECK-FP16-GI: // %bb.0: // %entry
+; CHECK-FP16-GI-NEXT: ldrsh w8, [x0]
+; CHECK-FP16-GI-NEXT: scvtf h0, w8
+; CHECK-FP16-GI-NEXT: ret
entry:
%a = load i16, ptr %p
%c = sitofp i16 %a to half
@@ -1109,11 +1115,17 @@ define half @utofp_load_i16_f16(ptr %p) {
; CHECK-NOFP16-NEXT: fcvt h0, s0
; CHECK-NOFP16-NEXT: ret
;
-; CHECK-FP16-LABEL: utofp_load_i16_f16:
-; CHECK-FP16: // %bb.0: // %entry
-; CHECK-FP16-NEXT: ldrh w8, [x0]
-; CHECK-FP16-NEXT: ucvtf h0, w8
-; CHECK-FP16-NEXT: ret
+; CHECK-FP16-SD-LABEL: utofp_load_i16_f16:
+; CHECK-FP16-SD: // %bb.0: // %entry
+; CHECK-FP16-SD-NEXT: ldr h0, [x0]
+; CHECK-FP16-SD-NEXT: ucvtf h0, h0
+; CHECK-FP16-SD-NEXT: ret
+;
+; CHECK-FP16-GI-LABEL: utofp_load_i16_f16:
+; CHECK-FP16-GI: // %bb.0: // %entry
+; CHECK-FP16-GI-NEXT: ldrh w8, [x0]
+; CHECK-FP16-GI-NEXT: ucvtf h0, w8
+; CHECK-FP16-GI-NEXT: ret
entry:
%a = load i16, ptr %p
%c = uitofp i16 %a to half
@@ -1149,8 +1161,8 @@ define half @utofp_load_i8_f16(ptr %p) {
;
; CHECK-FP16-LABEL: utofp_load_i8_f16:
; CHECK-FP16: // %bb.0: // %entry
-; CHECK-FP16-NEXT: ldrb w8, [x0]
-; CHECK-FP16-NEXT: ucvtf h0, w8
+; CHECK-FP16-NEXT: ldr b0, [x0]
+; CHECK-FP16-NEXT: ucvtf h0, h0
; CHECK-FP16-NEXT: ret
entry:
%a = load i8, ptr %p
More information about the llvm-commits
mailing list