[llvm] c663b25 - [AArch64][GISel] Add FP16 fcmp lowering
David Green via llvm-commits
llvm-commits at lists.llvm.org
Mon Apr 17 09:22:52 PDT 2023
Author: David Green
Date: 2023-04-17T17:22:46+01:00
New Revision: c663b2576fc214ff7d46b5713a8afabc4e2965ef
URL: https://github.com/llvm/llvm-project/commit/c663b2576fc214ff7d46b5713a8afabc4e2965ef
DIFF: https://github.com/llvm/llvm-project/commit/c663b2576fc214ff7d46b5713a8afabc4e2965ef.diff
LOG: [AArch64][GISel] Add FP16 fcmp lowering
This adds v4f16 and v8f16 lowering for fp16 vector compares. It splits the
getActionDefinitionsBuilder of G_FCMP from G_ICMP, as they are quite different
operations, and adds fp16 vector lowering.
Differential Revision: https://reviews.llvm.org/D147947
Added:
Modified:
llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
llvm/test/CodeGen/AArch64/GlobalISel/legalize-fcmp.mir
llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
llvm/test/CodeGen/AArch64/GlobalISel/lower-neon-vector-fcmp.mir
llvm/test/CodeGen/AArch64/vacg.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index f4aaa5886d6dd..e43c1ef521779 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -418,7 +418,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
})
.clampScalar(0, MinFPScalar, s128);
- getActionDefinitionsBuilder({G_ICMP, G_FCMP})
+ getActionDefinitionsBuilder(G_ICMP)
.legalFor({{s32, s32},
{s32, s64},
{s32, p0},
@@ -449,6 +449,43 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
s64)
.clampNumElements(0, v2s32, v4s32);
+ getActionDefinitionsBuilder(G_FCMP)
+ // If we don't have full FP16 support, then scalarize the elements of
+ // vectors containing fp16 types.
+ .fewerElementsIf(
+ [=](const LegalityQuery &Query) {
+ const auto &Ty = Query.Types[0];
+ return Ty.isVector() && Ty.getElementType() == s16 && !HasFP16;
+ },
+ [=](const LegalityQuery &Query) { return std::make_pair(0, s16); })
+ // If we don't have full FP16 support, then widen s16 to s32 if we
+ // encounter it.
+ .widenScalarIf(
+ [=](const LegalityQuery &Query) {
+ return Query.Types[0] == s16 && !HasFP16;
+ },
+ [=](const LegalityQuery &Query) { return std::make_pair(0, s32); })
+ .legalFor({{s16, s16},
+ {s32, s32},
+ {s32, s64},
+ {v4s32, v4s32},
+ {v2s32, v2s32},
+ {v2s64, v2s64},
+ {v4s16, v4s16},
+ {v8s16, v8s16}})
+ .widenScalarOrEltToNextPow2(1)
+ .clampScalar(1, s32, s64)
+ .clampScalar(0, s32, s32)
+ .minScalarEltSameAsIf(
+ [=](const LegalityQuery &Query) {
+ const LLT &Ty = Query.Types[0];
+ const LLT &SrcTy = Query.Types[1];
+ return Ty.isVector() && !SrcTy.getElementType().isPointer() &&
+ Ty.getElementType() != SrcTy.getElementType();
+ },
+ 0, 1)
+ .clampNumElements(0, v2s32, v4s32);
+
// Extensions
auto ExtLegalFunc = [=](const LegalityQuery &Query) {
unsigned DstSize = Query.Types[0].getSizeInBits();
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
index 7894c05218ebd..ce2189db8c61c 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
@@ -961,9 +961,10 @@ static bool lowerVectorFCMP(MachineInstr &MI, MachineRegisterInfo &MRI,
const auto Pred =
static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
Register LHS = MI.getOperand(2).getReg();
- // TODO: Handle v4s16 case.
unsigned EltSize = MRI.getType(LHS).getScalarSizeInBits();
- if (EltSize != 32 && EltSize != 64)
+ if (EltSize == 16 && !ST.hasFullFP16())
+ return false;
+ if (EltSize != 16 && EltSize != 32 && EltSize != 64)
return false;
Register RHS = MI.getOperand(3).getReg();
auto Splat = getAArch64VectorSplat(*MRI.getVRegDef(RHS), MRI);
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fcmp.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fcmp.mir
index 8f6dbb6fd58ed..355cf193272e6 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fcmp.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fcmp.mir
@@ -23,3 +23,99 @@ body: |
$w0 = COPY %5(s32)
...
+---
+name: legalize_v8s16
+alignment: 4
+legalized: true
+body: |
+ bb.0:
+ liveins: $q0, $q1
+
+ ; CHECK-LABEL: name: legalize_v8s16
+ ; CHECK: liveins: $q0, $q1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %lhs:_(<8 x s16>) = COPY $q0
+ ; CHECK-NEXT: %rhs:_(<8 x s16>) = COPY $q1
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES %lhs(<8 x s16>)
+ ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16), [[UV10:%[0-9]+]]:_(s16), [[UV11:%[0-9]+]]:_(s16), [[UV12:%[0-9]+]]:_(s16), [[UV13:%[0-9]+]]:_(s16), [[UV14:%[0-9]+]]:_(s16), [[UV15:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES %rhs(<8 x s16>)
+ ; CHECK-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[UV]](s16)
+ ; CHECK-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[UV8]](s16)
+ ; CHECK-NEXT: [[FCMP:%[0-9]+]]:_(s32) = G_FCMP floatpred(oeq), [[FPEXT]](s32), [[FPEXT1]]
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[FCMP]](s32)
+ ; CHECK-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[UV1]](s16)
+ ; CHECK-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[UV9]](s16)
+ ; CHECK-NEXT: [[FCMP1:%[0-9]+]]:_(s32) = G_FCMP floatpred(oeq), [[FPEXT2]](s32), [[FPEXT3]]
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[FCMP1]](s32)
+ ; CHECK-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[UV2]](s16)
+ ; CHECK-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[UV10]](s16)
+ ; CHECK-NEXT: [[FCMP2:%[0-9]+]]:_(s32) = G_FCMP floatpred(oeq), [[FPEXT4]](s32), [[FPEXT5]]
+ ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[FCMP2]](s32)
+ ; CHECK-NEXT: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[UV3]](s16)
+ ; CHECK-NEXT: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[UV11]](s16)
+ ; CHECK-NEXT: [[FCMP3:%[0-9]+]]:_(s32) = G_FCMP floatpred(oeq), [[FPEXT6]](s32), [[FPEXT7]]
+ ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[FCMP3]](s32)
+ ; CHECK-NEXT: [[FPEXT8:%[0-9]+]]:_(s32) = G_FPEXT [[UV4]](s16)
+ ; CHECK-NEXT: [[FPEXT9:%[0-9]+]]:_(s32) = G_FPEXT [[UV12]](s16)
+ ; CHECK-NEXT: [[FCMP4:%[0-9]+]]:_(s32) = G_FCMP floatpred(oeq), [[FPEXT8]](s32), [[FPEXT9]]
+ ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[FCMP4]](s32)
+ ; CHECK-NEXT: [[FPEXT10:%[0-9]+]]:_(s32) = G_FPEXT [[UV5]](s16)
+ ; CHECK-NEXT: [[FPEXT11:%[0-9]+]]:_(s32) = G_FPEXT [[UV13]](s16)
+ ; CHECK-NEXT: [[FCMP5:%[0-9]+]]:_(s32) = G_FCMP floatpred(oeq), [[FPEXT10]](s32), [[FPEXT11]]
+ ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[FCMP5]](s32)
+ ; CHECK-NEXT: [[FPEXT12:%[0-9]+]]:_(s32) = G_FPEXT [[UV6]](s16)
+ ; CHECK-NEXT: [[FPEXT13:%[0-9]+]]:_(s32) = G_FPEXT [[UV14]](s16)
+ ; CHECK-NEXT: [[FCMP6:%[0-9]+]]:_(s32) = G_FCMP floatpred(oeq), [[FPEXT12]](s32), [[FPEXT13]]
+ ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[FCMP6]](s32)
+ ; CHECK-NEXT: [[FPEXT14:%[0-9]+]]:_(s32) = G_FPEXT [[UV7]](s16)
+ ; CHECK-NEXT: [[FPEXT15:%[0-9]+]]:_(s32) = G_FPEXT [[UV15]](s16)
+ ; CHECK-NEXT: [[FCMP7:%[0-9]+]]:_(s32) = G_FCMP floatpred(oeq), [[FPEXT14]](s32), [[FPEXT15]]
+ ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[FCMP7]](s32)
+ ; CHECK-NEXT: %fcmp:_(<8 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16), [[TRUNC4]](s16), [[TRUNC5]](s16), [[TRUNC6]](s16), [[TRUNC7]](s16)
+ ; CHECK-NEXT: $q0 = COPY %fcmp(<8 x s16>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0
+ %lhs:_(<8 x s16>) = COPY $q0
+ %rhs:_(<8 x s16>) = COPY $q1
+ %fcmp:_(<8 x s16>) = G_FCMP floatpred(oeq), %lhs(<8 x s16>), %rhs
+ $q0 = COPY %fcmp(<8 x s16>)
+ RET_ReallyLR implicit $q0
+
+...
+---
+name: legalize_v4s16
+alignment: 4
+legalized: true
+body: |
+ bb.0:
+ liveins: $d0, $d1
+
+ ; CHECK-LABEL: name: legalize_v4s16
+ ; CHECK: liveins: $d0, $d1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %lhs:_(<4 x s16>) = COPY $d0
+ ; CHECK-NEXT: %rhs:_(<4 x s16>) = COPY $d1
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES %lhs(<4 x s16>)
+ ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES %rhs(<4 x s16>)
+ ; CHECK-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[UV]](s16)
+ ; CHECK-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[UV4]](s16)
+ ; CHECK-NEXT: [[FCMP:%[0-9]+]]:_(s32) = G_FCMP floatpred(oeq), [[FPEXT]](s32), [[FPEXT1]]
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[FCMP]](s32)
+ ; CHECK-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[UV1]](s16)
+ ; CHECK-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[UV5]](s16)
+ ; CHECK-NEXT: [[FCMP1:%[0-9]+]]:_(s32) = G_FCMP floatpred(oeq), [[FPEXT2]](s32), [[FPEXT3]]
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[FCMP1]](s32)
+ ; CHECK-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[UV2]](s16)
+ ; CHECK-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[UV6]](s16)
+ ; CHECK-NEXT: [[FCMP2:%[0-9]+]]:_(s32) = G_FCMP floatpred(oeq), [[FPEXT4]](s32), [[FPEXT5]]
+ ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[FCMP2]](s32)
+ ; CHECK-NEXT: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[UV3]](s16)
+ ; CHECK-NEXT: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[UV7]](s16)
+ ; CHECK-NEXT: [[FCMP3:%[0-9]+]]:_(s32) = G_FCMP floatpred(oeq), [[FPEXT6]](s32), [[FPEXT7]]
+ ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[FCMP3]](s32)
+ ; CHECK-NEXT: %fcmp:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16)
+ ; CHECK-NEXT: $d0 = COPY %fcmp(<4 x s16>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $d0
+ %lhs:_(<4 x s16>) = COPY $d0
+ %rhs:_(<4 x s16>) = COPY $d1
+ %fcmp:_(<4 x s16>) = G_FCMP floatpred(oeq), %lhs(<4 x s16>), %rhs
+ $d0 = COPY %fcmp(<4 x s16>)
+ RET_ReallyLR implicit $d0
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
index e13bee2e5725f..e206ec9d03ea4 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
@@ -324,7 +324,6 @@
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: G_FCMP (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
-# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: G_SELECT (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/lower-neon-vector-fcmp.mir b/llvm/test/CodeGen/AArch64/GlobalISel/lower-neon-vector-fcmp.mir
index 17109e16947d3..8a0edbc99e352 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/lower-neon-vector-fcmp.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/lower-neon-vector-fcmp.mir
@@ -1,5 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple aarch64 -run-pass=aarch64-postlegalizer-lowering -verify-machineinstrs %s -o - | FileCheck %s
+# RUN: llc -mtriple aarch64 -run-pass=aarch64-postlegalizer-lowering -mattr=+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s
...
---
name: oeq
@@ -702,20 +702,20 @@ body: |
...
---
-name: dont_lower_s16
+name: lower_v8s16
alignment: 4
legalized: true
body: |
bb.0:
liveins: $q0, $q1
- ; CHECK-LABEL: name: dont_lower_s16
+ ; CHECK-LABEL: name: lower_v8s16
; CHECK: liveins: $q0, $q1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %lhs:_(<8 x s16>) = COPY $q0
; CHECK-NEXT: %rhs:_(<8 x s16>) = COPY $q1
- ; CHECK-NEXT: %fcmp:_(<8 x s16>) = G_FCMP floatpred(oeq), %lhs(<8 x s16>), %rhs
- ; CHECK-NEXT: $q0 = COPY %fcmp(<8 x s16>)
+ ; CHECK-NEXT: [[FCMEQ:%[0-9]+]]:_(<8 x s16>) = G_FCMEQ %lhs, %rhs(<8 x s16>)
+ ; CHECK-NEXT: $q0 = COPY [[FCMEQ]](<8 x s16>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%lhs:_(<8 x s16>) = COPY $q0
%rhs:_(<8 x s16>) = COPY $q1
@@ -723,6 +723,29 @@ body: |
$q0 = COPY %fcmp(<8 x s16>)
RET_ReallyLR implicit $q0
+...
+---
+name: lower_v4s16
+alignment: 4
+legalized: true
+body: |
+ bb.0:
+ liveins: $d0, $d1
+
+ ; CHECK-LABEL: name: lower_v4s16
+ ; CHECK: liveins: $d0, $d1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %lhs:_(<4 x s16>) = COPY $d0
+ ; CHECK-NEXT: %rhs:_(<4 x s16>) = COPY $d1
+ ; CHECK-NEXT: [[FCMEQ:%[0-9]+]]:_(<4 x s16>) = G_FCMEQ %lhs, %rhs(<4 x s16>)
+ ; CHECK-NEXT: $d0 = COPY [[FCMEQ]](<4 x s16>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $d0
+ %lhs:_(<4 x s16>) = COPY $d0
+ %rhs:_(<4 x s16>) = COPY $d1
+ %fcmp:_(<4 x s16>) = G_FCMP floatpred(oeq), %lhs(<4 x s16>), %rhs
+ $d0 = COPY %fcmp(<4 x s16>)
+ RET_ReallyLR implicit $d0
+
...
---
name: is_not_nan
diff --git a/llvm/test/CodeGen/AArch64/vacg.ll b/llvm/test/CodeGen/AArch64/vacg.ll
index 402d260806675..c3556e09b0c94 100644
--- a/llvm/test/CodeGen/AArch64/vacg.ll
+++ b/llvm/test/CodeGen/AArch64/vacg.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=aarch64-none-eabi -mattr=+fullfp16 | FileCheck %s
+; RUN: llc < %s -mtriple=aarch64-none-eabi -mattr=+fullfp16 -global-isel | FileCheck %s
define <4 x i32> @gt_v4f32(<4 x float> %a, <4 x float> %b) {
More information about the llvm-commits
mailing list