[llvm] 5b7e27a - [ARM][CGP] Fix scalar condition selects for MVE

David Green via llvm-commits llvm-commits at lists.llvm.org
Tue Aug 25 04:09:19 PDT 2020


Author: David Green
Date: 2020-08-25T12:09:06+01:00
New Revision: 5b7e27a4db95a07cc140e3980a49a1ee3fb2052c

URL: https://github.com/llvm/llvm-project/commit/5b7e27a4db95a07cc140e3980a49a1ee3fb2052c
DIFF: https://github.com/llvm/llvm-project/commit/5b7e27a4db95a07cc140e3980a49a1ee3fb2052c.diff

LOG: [ARM][CGP] Fix scalar condition selects for MVE

The arm backend does not handle select/select_cc on vectors with scalar
conditions, preferring to expand them in codegenprepare instead. This
usually works except when optimizing for size, where the optsize check
would end up overruling the backend isSelectSupported check.

We could handle the selects in ISel too, but this seems like smaller
code than trying to splat the condition to all lanes.

Differential Revision: https://reviews.llvm.org/D86433

Added: 
    llvm/test/CodeGen/Thumb2/mve-selectcc.ll

Modified: 
    llvm/lib/CodeGen/CodeGenPrepare.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index a85ac80ef365..86b5d2055f55 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -6482,9 +6482,7 @@ bool CodeGenPrepare::optimizeFunnelShift(IntrinsicInst *Fsh) {
 /// If we have a SelectInst that will likely profit from branch prediction,
 /// turn it into a branch.
 bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
-  // If branch conversion isn't desirable, exit early.
-  if (DisableSelectToBranch || OptSize ||
-      llvm::shouldOptimizeForSize(SI->getParent(), PSI, BFI.get()))
+  if (DisableSelectToBranch)
     return false;
 
   // Find all consecutive select instructions that share the same condition.
@@ -6520,7 +6518,8 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
     SelectKind = TargetLowering::ScalarValSelect;
 
   if (TLI->isSelectSupported(SelectKind) &&
-      !isFormingBranchFromSelectProfitable(TTI, TLI, SI))
+      (!isFormingBranchFromSelectProfitable(TTI, TLI, SI) || OptSize ||
+       llvm::shouldOptimizeForSize(SI->getParent(), PSI, BFI.get())))
     return false;
 
   // The DominatorTree needs to be rebuilt by any consumers after this

diff  --git a/llvm/test/CodeGen/Thumb2/mve-selectcc.ll b/llvm/test/CodeGen/Thumb2/mve-selectcc.ll
new file mode 100644
index 000000000000..8712417a4a72
--- /dev/null
+++ b/llvm/test/CodeGen/Thumb2/mve-selectcc.ll
@@ -0,0 +1,198 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK
+
+define arm_aapcs_vfpcc <4 x i32> @test_v4i32(i32 %x, <4 x i32> %s0, <4 x i32> %s1) {
+; CHECK-LABEL: test_v4i32:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    it eq
+; CHECK-NEXT:    bxeq lr
+; CHECK-NEXT:    vmov q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %c = icmp eq i32 %x, 0
+  %s = select i1 %c,  <4 x i32> %s0, <4 x i32> %s1
+  ret <4 x i32> %s
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_v8i16(i32 %x, <8 x i16> %s0, <8 x i16> %s1) {
+; CHECK-LABEL: test_v8i16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    it eq
+; CHECK-NEXT:    bxeq lr
+; CHECK-NEXT:    vmov q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %c = icmp eq i32 %x, 0
+  %s = select i1 %c,  <8 x i16> %s0, <8 x i16> %s1
+  ret <8 x i16> %s
+}
+
+define arm_aapcs_vfpcc <16 x i8> @test_v16i8(i32 %x, <16 x i8> %s0, <16 x i8> %s1) {
+; CHECK-LABEL: test_v16i8:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    it eq
+; CHECK-NEXT:    bxeq lr
+; CHECK-NEXT:    vmov q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %c = icmp eq i32 %x, 0
+  %s = select i1 %c,  <16 x i8> %s0, <16 x i8> %s1
+  ret <16 x i8> %s
+}
+
+define arm_aapcs_vfpcc <2 x i64> @test_v2i64(i32 %x, <2 x i64> %s0, <2 x i64> %s1) {
+; CHECK-LABEL: test_v2i64:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    it eq
+; CHECK-NEXT:    bxeq lr
+; CHECK-NEXT:    vmov q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %c = icmp eq i32 %x, 0
+  %s = select i1 %c,  <2 x i64> %s0, <2 x i64> %s1
+  ret <2 x i64> %s
+}
+
+define arm_aapcs_vfpcc <4 x float> @test_v4float(i32 %x, <4 x float> %s0, <4 x float> %s1) {
+; CHECK-LABEL: test_v4float:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    it eq
+; CHECK-NEXT:    bxeq lr
+; CHECK-NEXT:    vmov q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %c = icmp eq i32 %x, 0
+  %s = select i1 %c,  <4 x float> %s0, <4 x float> %s1
+  ret <4 x float> %s
+}
+
+define arm_aapcs_vfpcc <8 x half> @test_v8half(i32 %x, <8 x half> %s0, <8 x half> %s1) {
+; CHECK-LABEL: test_v8half:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    it eq
+; CHECK-NEXT:    bxeq lr
+; CHECK-NEXT:    vmov q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %c = icmp eq i32 %x, 0
+  %s = select i1 %c,  <8 x half> %s0, <8 x half> %s1
+  ret <8 x half> %s
+}
+
+define arm_aapcs_vfpcc <2 x double> @test_v2double(i32 %x, <2 x double> %s0, <2 x double> %s1) {
+; CHECK-LABEL: test_v2double:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    it eq
+; CHECK-NEXT:    bxeq lr
+; CHECK-NEXT:    vmov q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %c = icmp eq i32 %x, 0
+  %s = select i1 %c,  <2 x double> %s0, <2 x double> %s1
+  ret <2 x double> %s
+}
+
+define arm_aapcs_vfpcc <4 x i32> @minsize_v4i32(i32 %x, <4 x i32> %s0, <4 x i32> %s1) minsize {
+; CHECK-LABEL: minsize_v4i32:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    cbz r0, .LBB7_2
+; CHECK-NEXT:  @ %bb.1: @ %select.false
+; CHECK-NEXT:    vmov q0, q1
+; CHECK-NEXT:  .LBB7_2: @ %select.end
+; CHECK-NEXT:    bx lr
+entry:
+  %c = icmp eq i32 %x, 0
+  %s = select i1 %c,  <4 x i32> %s0, <4 x i32> %s1
+  ret <4 x i32> %s
+}
+
+define arm_aapcs_vfpcc <8 x i16> @minsize_v8i16(i32 %x, <8 x i16> %s0, <8 x i16> %s1) minsize {
+; CHECK-LABEL: minsize_v8i16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    cbz r0, .LBB8_2
+; CHECK-NEXT:  @ %bb.1: @ %select.false
+; CHECK-NEXT:    vmov q0, q1
+; CHECK-NEXT:  .LBB8_2: @ %select.end
+; CHECK-NEXT:    bx lr
+entry:
+  %c = icmp eq i32 %x, 0
+  %s = select i1 %c,  <8 x i16> %s0, <8 x i16> %s1
+  ret <8 x i16> %s
+}
+
+define arm_aapcs_vfpcc <16 x i8> @minsize_v16i8(i32 %x, <16 x i8> %s0, <16 x i8> %s1) minsize {
+; CHECK-LABEL: minsize_v16i8:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    cbz r0, .LBB9_2
+; CHECK-NEXT:  @ %bb.1: @ %select.false
+; CHECK-NEXT:    vmov q0, q1
+; CHECK-NEXT:  .LBB9_2: @ %select.end
+; CHECK-NEXT:    bx lr
+entry:
+  %c = icmp eq i32 %x, 0
+  %s = select i1 %c,  <16 x i8> %s0, <16 x i8> %s1
+  ret <16 x i8> %s
+}
+
+define arm_aapcs_vfpcc <2 x i64> @minsize_v2i64(i32 %x, <2 x i64> %s0, <2 x i64> %s1) minsize {
+; CHECK-LABEL: minsize_v2i64:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    cbz r0, .LBB10_2
+; CHECK-NEXT:  @ %bb.1: @ %select.false
+; CHECK-NEXT:    vmov q0, q1
+; CHECK-NEXT:  .LBB10_2: @ %select.end
+; CHECK-NEXT:    bx lr
+entry:
+  %c = icmp eq i32 %x, 0
+  %s = select i1 %c,  <2 x i64> %s0, <2 x i64> %s1
+  ret <2 x i64> %s
+}
+
+define arm_aapcs_vfpcc <4 x float> @minsize_v4float(i32 %x, <4 x float> %s0, <4 x float> %s1) minsize {
+; CHECK-LABEL: minsize_v4float:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    cbz r0, .LBB11_2
+; CHECK-NEXT:  @ %bb.1: @ %select.false
+; CHECK-NEXT:    vmov q0, q1
+; CHECK-NEXT:  .LBB11_2: @ %select.end
+; CHECK-NEXT:    bx lr
+entry:
+  %c = icmp eq i32 %x, 0
+  %s = select i1 %c,  <4 x float> %s0, <4 x float> %s1
+  ret <4 x float> %s
+}
+
+define arm_aapcs_vfpcc <8 x half> @minsize_v8half(i32 %x, <8 x half> %s0, <8 x half> %s1) minsize {
+; CHECK-LABEL: minsize_v8half:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    cbz r0, .LBB12_2
+; CHECK-NEXT:  @ %bb.1: @ %select.false
+; CHECK-NEXT:    vmov q0, q1
+; CHECK-NEXT:  .LBB12_2: @ %select.end
+; CHECK-NEXT:    bx lr
+entry:
+  %c = icmp eq i32 %x, 0
+  %s = select i1 %c,  <8 x half> %s0, <8 x half> %s1
+  ret <8 x half> %s
+}
+
+define arm_aapcs_vfpcc <2 x double> @minsize_v2double(i32 %x, <2 x double> %s0, <2 x double> %s1) minsize {
+; CHECK-LABEL: minsize_v2double:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    cbz r0, .LBB13_2
+; CHECK-NEXT:  @ %bb.1: @ %select.false
+; CHECK-NEXT:    vmov q0, q1
+; CHECK-NEXT:  .LBB13_2: @ %select.end
+; CHECK-NEXT:    bx lr
+entry:
+  %c = icmp eq i32 %x, 0
+  %s = select i1 %c,  <2 x double> %s0, <2 x double> %s1
+  ret <2 x double> %s
+}


        


More information about the llvm-commits mailing list