[llvm] e478385 - [ARM] Fix instruction selection for ARMISD::CMOV with f16 type
Victor Campos via llvm-commits
llvm-commits at lists.llvm.org
Fri Nov 29 02:40:42 PST 2019
Author: Victor Campos
Date: 2019-11-29T10:40:37Z
New Revision: e478385e7708d0bcef43559651e6d62e387a507a
URL: https://github.com/llvm/llvm-project/commit/e478385e7708d0bcef43559651e6d62e387a507a
DIFF: https://github.com/llvm/llvm-project/commit/e478385e7708d0bcef43559651e6d62e387a507a.diff
LOG: [ARM] Fix instruction selection for ARMISD::CMOV with f16 type
Summary:
In the cases where the CMOV (f16) SDNode is used with condition codes
LT, LE, VC or NE, it is successfully selected into a VSEL instruction.
In the remaining cases, however, instruction selection fails since VSEL
does not support other condition codes.
This patch handles such cases by using the single-precision version of
the VMOV instruction.
Reviewers: ostannard, dmgreen
Reviewed By: dmgreen
Subscribers: kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D70667
Added:
llvm/test/CodeGen/ARM/cmov_fp16.ll
Modified:
llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
llvm/lib/Target/ARM/ARMInstrVFP.td
Removed:
################################################################################
diff --git a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index 563fdda56104..de4377ec5a47 100644
--- a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -1213,9 +1213,10 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
MBBI = NewMI;
return true;
}
+ case ARM::VMOVHcc:
case ARM::VMOVScc:
case ARM::VMOVDcc: {
- unsigned newOpc = Opcode == ARM::VMOVScc ? ARM::VMOVS : ARM::VMOVD;
+ unsigned newOpc = Opcode != ARM::VMOVDcc ? ARM::VMOVS : ARM::VMOVD;
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(newOpc),
MI.getOperand(1).getReg())
.add(MI.getOperand(2))
diff --git a/llvm/lib/Target/ARM/ARMInstrVFP.td b/llvm/lib/Target/ARM/ARMInstrVFP.td
index fdd961bfbb2f..90be9a0333ed 100644
--- a/llvm/lib/Target/ARM/ARMInstrVFP.td
+++ b/llvm/lib/Target/ARM/ARMInstrVFP.td
@@ -2279,6 +2279,12 @@ def VMOVScc : PseudoInst<(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm, cmovpred:$p),
[(set (f32 SPR:$Sd),
(ARMcmov SPR:$Sn, SPR:$Sm, cmovpred:$p))]>,
RegConstraint<"$Sn = $Sd">, Requires<[HasFPRegs]>;
+
+def VMOVHcc : PseudoInst<(outs HPR:$Sd), (ins HPR:$Sn, HPR:$Sm, cmovpred:$p),
+ IIC_fpUNA16,
+ [(set (f16 HPR:$Sd),
+ (ARMcmov HPR:$Sn, HPR:$Sm, cmovpred:$p))]>,
+ RegConstraint<"$Sd = $Sn">, Requires<[HasFPRegs]>;
} // hasSideEffects
//===----------------------------------------------------------------------===//
diff --git a/llvm/test/CodeGen/ARM/cmov_fp16.ll b/llvm/test/CodeGen/ARM/cmov_fp16.ll
new file mode 100644
index 000000000000..925fed582811
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/cmov_fp16.ll
@@ -0,0 +1,261 @@
+; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+fullfp16 %s -o - | FileCheck %s --check-prefixes CHECK-THUMB,CHECK
+; RUN: llc -mtriple=armv8.2a-arm-none-eabi -mattr=+fullfp16 %s -o - | FileCheck %s --check-prefixes CHECK-ARM,CHECK
+
+define i32 @test_ne(i32 %x, i32 %y, i32 %a, i32 %b) {
+; CHECK-LABEL: test_ne:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov s2, r0
+; CHECK-NEXT: cmp r2, r3
+; CHECK-NEXT: vmov s0, r1
+; CHECK-NEXT: vcvt.f16.u32 s2, s2
+; CHECK-NEXT: vcvt.f16.u32 s0, s0
+; CHECK-NEXT: vseleq.f16 s0, s0, s2
+; CHECK-NEXT: vmov.f16 r0, s0
+; CHECK-NEXT: bx lr
+entry:
+ %x.half = uitofp i32 %x to half
+ %y.half = uitofp i32 %y to half
+ %cmp = icmp ne i32 %a, %b
+ %cond = select i1 %cmp, half %x.half, half %y.half
+ %0 = bitcast half %cond to i16
+ %1 = zext i16 %0 to i32
+ ret i32 %1
+}
+
+define i32 @test_eq(i32 %x, i32 %y, i32 %a, i32 %b) {
+; CHECK-LABEL: test_eq:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov s2, r1
+; CHECK-NEXT: cmp r2, r3
+; CHECK-NEXT: vmov s0, r0
+; CHECK-NEXT: vcvt.f16.u32 s2, s2
+; CHECK-NEXT: vcvt.f16.u32 s0, s0
+; CHECK-NEXT: vseleq.f16 s0, s0, s2
+; CHECK-NEXT: vmov.f16 r0, s0
+; CHECK-NEXT: bx lr
+entry:
+ %x.half = uitofp i32 %x to half
+ %y.half = uitofp i32 %y to half
+ %cmp = icmp eq i32 %a, %b
+ %cond = select i1 %cmp, half %x.half, half %y.half
+ %0 = bitcast half %cond to i16
+ %1 = zext i16 %0 to i32
+ ret i32 %1
+}
+
+define i32 @test_gt(i32 %x, i32 %y, i32 %a, i32 %b) {
+; CHECK-LABEL: test_gt:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov s2, r1
+; CHECK-NEXT: cmp r2, r3
+; CHECK-NEXT: vmov s0, r0
+; CHECK-NEXT: vcvt.f16.u32 s2, s2
+; CHECK-NEXT: vcvt.f16.u32 s0, s0
+; CHECK-NEXT: vselgt.f16 s0, s0, s2
+; CHECK-NEXT: vmov.f16 r0, s0
+; CHECK-NEXT: bx lr
+entry:
+ %x.half = uitofp i32 %x to half
+ %y.half = uitofp i32 %y to half
+ %cmp = icmp sgt i32 %a, %b
+ %cond = select i1 %cmp, half %x.half, half %y.half
+ %0 = bitcast half %cond to i16
+ %1 = zext i16 %0 to i32
+ ret i32 %1
+}
+
+define i32 @test_ge(i32 %x, i32 %y, i32 %a, i32 %b) {
+; CHECK-LABEL: test_ge:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov s2, r1
+; CHECK-NEXT: cmp r2, r3
+; CHECK-NEXT: vmov s0, r0
+; CHECK-NEXT: vcvt.f16.u32 s2, s2
+; CHECK-NEXT: vcvt.f16.u32 s0, s0
+; CHECK-NEXT: vselge.f16 s0, s0, s2
+; CHECK-NEXT: vmov.f16 r0, s0
+; CHECK-NEXT: bx lr
+entry:
+ %x.half = uitofp i32 %x to half
+ %y.half = uitofp i32 %y to half
+ %cmp = icmp sge i32 %a, %b
+ %cond = select i1 %cmp, half %x.half, half %y.half
+ %0 = bitcast half %cond to i16
+ %1 = zext i16 %0 to i32
+ ret i32 %1
+}
+
+define i32 @test_lt(i32 %x, i32 %y, i32 %a, i32 %b) {
+; CHECK-LABEL: test_lt:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov s2, r0
+; CHECK-NEXT: cmp r2, r3
+; CHECK-NEXT: vmov s0, r1
+; CHECK-NEXT: vcvt.f16.u32 s2, s2
+; CHECK-NEXT: vcvt.f16.u32 s0, s0
+; CHECK-NEXT: vselge.f16 s0, s0, s2
+; CHECK-NEXT: vmov.f16 r0, s0
+; CHECK-NEXT: bx lr
+entry:
+ %x.half = uitofp i32 %x to half
+ %y.half = uitofp i32 %y to half
+ %cmp = icmp slt i32 %a, %b
+ %cond = select i1 %cmp, half %x.half, half %y.half
+ %0 = bitcast half %cond to i16
+ %1 = zext i16 %0 to i32
+ ret i32 %1
+}
+
+define i32 @test_le(i32 %x, i32 %y, i32 %a, i32 %b) {
+; CHECK-LABEL: test_le:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov s2, r0
+; CHECK-NEXT: cmp r2, r3
+; CHECK-NEXT: vmov s0, r1
+; CHECK-NEXT: vcvt.f16.u32 s2, s2
+; CHECK-NEXT: vcvt.f16.u32 s0, s0
+; CHECK-NEXT: vselgt.f16 s0, s0, s2
+; CHECK-NEXT: vmov.f16 r0, s0
+; CHECK-NEXT: bx lr
+entry:
+ %x.half = uitofp i32 %x to half
+ %y.half = uitofp i32 %y to half
+ %cmp = icmp sle i32 %a, %b
+ %cond = select i1 %cmp, half %x.half, half %y.half
+ %0 = bitcast half %cond to i16
+ %1 = zext i16 %0 to i32
+ ret i32 %1
+}
+
+define i32 @test_hi(i32 %x, i32 %y, i32 %a, i32 %b) {
+; CHECK-THUMB-LABEL: test_hi:
+; CHECK-THUMB: @ %bb.0: @ %entry
+; CHECK-THUMB-NEXT: vmov s2, r0
+; CHECK-THUMB-NEXT: cmp r2, r3
+; CHECK-THUMB-NEXT: vmov s0, r1
+; CHECK-THUMB-NEXT: vcvt.f16.u32 s2, s2
+; CHECK-THUMB-NEXT: vcvt.f16.u32 s0, s0
+; CHECK-THUMB-NEXT: it hi
+; CHECK-THUMB-NEXT: vmovhi.f32 s0, s2
+; CHECK-THUMB-NEXT: vmov.f16 r0, s0
+; CHECK-THUMB-NEXT: bx lr
+;
+; CHECK-ARM-LABEL: test_hi:
+; CHECK-ARM: @ %bb.0: @ %entry
+; CHECK-ARM-NEXT: vmov s2, r0
+; CHECK-ARM-NEXT: cmp r2, r3
+; CHECK-ARM-NEXT: vmov s0, r1
+; CHECK-ARM-NEXT: vcvt.f16.u32 s2, s2
+; CHECK-ARM-NEXT: vcvt.f16.u32 s0, s0
+; CHECK-ARM-NEXT: vmovhi.f32 s0, s2
+; CHECK-ARM-NEXT: vmov.f16 r0, s0
+; CHECK-ARM-NEXT: bx lr
+entry:
+ %x.half = uitofp i32 %x to half
+ %y.half = uitofp i32 %y to half
+ %cmp = icmp ugt i32 %a, %b
+ %cond = select i1 %cmp, half %x.half, half %y.half
+ %0 = bitcast half %cond to i16
+ %1 = zext i16 %0 to i32
+ ret i32 %1
+}
+
+define i32 @test_hs(i32 %x, i32 %y, i32 %a, i32 %b) {
+; CHECK-THUMB-LABEL: test_hs:
+; CHECK-THUMB: @ %bb.0: @ %entry
+; CHECK-THUMB-NEXT: vmov s2, r0
+; CHECK-THUMB-NEXT: cmp r2, r3
+; CHECK-THUMB-NEXT: vmov s0, r1
+; CHECK-THUMB-NEXT: vcvt.f16.u32 s2, s2
+; CHECK-THUMB-NEXT: vcvt.f16.u32 s0, s0
+; CHECK-THUMB-NEXT: it hs
+; CHECK-THUMB-NEXT: vmovhs.f32 s0, s2
+; CHECK-THUMB-NEXT: vmov.f16 r0, s0
+; CHECK-THUMB-NEXT: bx lr
+;
+; CHECK-ARM-LABEL: test_hs:
+; CHECK-ARM: @ %bb.0: @ %entry
+; CHECK-ARM-NEXT: vmov s2, r0
+; CHECK-ARM-NEXT: cmp r2, r3
+; CHECK-ARM-NEXT: vmov s0, r1
+; CHECK-ARM-NEXT: vcvt.f16.u32 s2, s2
+; CHECK-ARM-NEXT: vcvt.f16.u32 s0, s0
+; CHECK-ARM-NEXT: vmovhs.f32 s0, s2
+; CHECK-ARM-NEXT: vmov.f16 r0, s0
+; CHECK-ARM-NEXT: bx lr
+entry:
+ %x.half = uitofp i32 %x to half
+ %y.half = uitofp i32 %y to half
+ %cmp = icmp uge i32 %a, %b
+ %cond = select i1 %cmp, half %x.half, half %y.half
+ %0 = bitcast half %cond to i16
+ %1 = zext i16 %0 to i32
+ ret i32 %1
+}
+
+define i32 @test_lo(i32 %x, i32 %y, i32 %a, i32 %b) {
+; CHECK-THUMB-LABEL: test_lo:
+; CHECK-THUMB: @ %bb.0: @ %entry
+; CHECK-THUMB-NEXT: vmov s2, r0
+; CHECK-THUMB-NEXT: cmp r2, r3
+; CHECK-THUMB-NEXT: vmov s0, r1
+; CHECK-THUMB-NEXT: vcvt.f16.u32 s2, s2
+; CHECK-THUMB-NEXT: vcvt.f16.u32 s0, s0
+; CHECK-THUMB-NEXT: it lo
+; CHECK-THUMB-NEXT: vmovlo.f32 s0, s2
+; CHECK-THUMB-NEXT: vmov.f16 r0, s0
+; CHECK-THUMB-NEXT: bx lr
+;
+; CHECK-ARM-LABEL: test_lo:
+; CHECK-ARM: @ %bb.0: @ %entry
+; CHECK-ARM-NEXT: vmov s2, r0
+; CHECK-ARM-NEXT: cmp r2, r3
+; CHECK-ARM-NEXT: vmov s0, r1
+; CHECK-ARM-NEXT: vcvt.f16.u32 s2, s2
+; CHECK-ARM-NEXT: vcvt.f16.u32 s0, s0
+; CHECK-ARM-NEXT: vmovlo.f32 s0, s2
+; CHECK-ARM-NEXT: vmov.f16 r0, s0
+; CHECK-ARM-NEXT: bx lr
+entry:
+ %x.half = uitofp i32 %x to half
+ %y.half = uitofp i32 %y to half
+ %cmp = icmp ult i32 %a, %b
+ %cond = select i1 %cmp, half %x.half, half %y.half
+ %0 = bitcast half %cond to i16
+ %1 = zext i16 %0 to i32
+ ret i32 %1
+}
+
+define i32 @test_ls(i32 %x, i32 %y, i32 %a, i32 %b) {
+; CHECK-THUMB-LABEL: test_ls:
+; CHECK-THUMB: @ %bb.0: @ %entry
+; CHECK-THUMB-NEXT: vmov s2, r0
+; CHECK-THUMB-NEXT: cmp r2, r3
+; CHECK-THUMB-NEXT: vmov s0, r1
+; CHECK-THUMB-NEXT: vcvt.f16.u32 s2, s2
+; CHECK-THUMB-NEXT: vcvt.f16.u32 s0, s0
+; CHECK-THUMB-NEXT: it ls
+; CHECK-THUMB-NEXT: vmovls.f32 s0, s2
+; CHECK-THUMB-NEXT: vmov.f16 r0, s0
+; CHECK-THUMB-NEXT: bx lr
+;
+; CHECK-ARM-LABEL: test_ls:
+; CHECK-ARM: @ %bb.0: @ %entry
+; CHECK-ARM-NEXT: vmov s2, r0
+; CHECK-ARM-NEXT: cmp r2, r3
+; CHECK-ARM-NEXT: vmov s0, r1
+; CHECK-ARM-NEXT: vcvt.f16.u32 s2, s2
+; CHECK-ARM-NEXT: vcvt.f16.u32 s0, s0
+; CHECK-ARM-NEXT: vmovls.f32 s0, s2
+; CHECK-ARM-NEXT: vmov.f16 r0, s0
+; CHECK-ARM-NEXT: bx lr
+entry:
+ %x.half = uitofp i32 %x to half
+ %y.half = uitofp i32 %y to half
+ %cmp = icmp ule i32 %a, %b
+ %cond = select i1 %cmp, half %x.half, half %y.half
+ %0 = bitcast half %cond to i16
+ %1 = zext i16 %0 to i32
+ ret i32 %1
+}
+
More information about the llvm-commits
mailing list