[llvm] [GlobalIsel][AArch64] MVP for vectorized selects. (PR #76104)

Wed Dec 20 13:48:48 PST 2023

https://github.com/tschuett created https://github.com/llvm/llvm-project/pull/76104

Try to select Selects where the condition is a vector.

Inspired by arm64-vselect.ll and extensions to it.

Note that there are not tests for all legal types.

>From 360acc9a0b540012e559dc94bb2177ef86f48d1a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= <schuett at gmail.com>
Date: Wed, 20 Dec 2023 22:22:23 +0100
Subject: [PATCH 1/2] [GlobalIsel][AArch64] MVP for vectorized selects.

Try to select Selects where the condition is a vector.

Inspired by arm64-vselect.ll and extensions to it.

Note that there are not tests for all legal types.
---
 .../GISel/AArch64InstructionSelector.cpp      | 34 +++++++++
 .../AArch64/GISel/AArch64LegalizerInfo.cpp    |  8 ++-
 .../AArch64/GlobalISel/select-select.mir      | 71 +++++++++++++------
 3 files changed, 91 insertions(+), 22 deletions(-)

diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index a4ace6cce46342..6fbd850395fda6 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -507,6 +507,9 @@ class AArch64InstructionSelector : public InstructionSelector {
   /// zero extended.
   bool isDef32(const MachineInstr &MI) const;
 
+  /// Select selects where the condition is a vector.
+  bool selectVectorSelect(GSelect &Sel, MachineIRBuilder &MIB) const;
+
   const AArch64TargetMachine &TM;
   const AArch64Subtarget &STI;
   const AArch64InstrInfo &TII;
@@ -1154,6 +1157,34 @@ static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
   return GenericOpc;
 }
 
+bool AArch64InstructionSelector::selectVectorSelect(
+    GSelect &Sel, MachineIRBuilder &MIB) const {
+  MachineRegisterInfo *MRI = MIB.getMRI();
+  Register Dst = Sel.getReg(0);
+  Register Cond = Sel.getCondReg();
+  Register True = Sel.getTrueReg();
+  Register False = Sel.getFalseReg();
+  LLT DestTy = MRI->getType(Dst);
+  LLT CondTy = MRI->getType(Cond);
+
+  // There are no scalable vectors yet.
+  if (CondTy.isScalable())
+    return false;
+
+  // We would need to sext the Cond to the Dest, i.e., sshll.
+  // It will fail for v2s64, v4s32, and v8s16.
+  if (CondTy != DestTy)
+    return false;
+
+  unsigned Opcode =
+      CondTy.getSizeInBits() == 128 ? AArch64::BSLv16i8 : AArch64::BSLv8i8;
+  auto SelMI = MIB.buildInstr(Opcode, {Dst}, {Cond, True, False});
+  constrainSelectedInstRegOperands(*SelMI, TII, TRI, RBI);
+
+  Sel.eraseFromParent();
+  return true;
+}
+
 MachineInstr *
 AArch64InstructionSelector::emitSelect(Register Dst, Register True,
                                        Register False, AArch64CC::CondCode CC,
@@ -3442,6 +3473,9 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
     const Register TReg = Sel.getTrueReg();
     const Register FReg = Sel.getFalseReg();
 
+    if (MRI.getType(CondReg).isVector())
+      return selectVectorSelect(Sel, MIB);
+
     if (tryOptSelect(Sel))
       return true;
 
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 8b909f53c84460..0af18108c6710f 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -708,7 +708,13 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
   getActionDefinitionsBuilder(G_BRINDIRECT).legalFor({p0});
 
   getActionDefinitionsBuilder(G_SELECT)
-      .legalFor({{s32, s32}, {s64, s32}, {p0, s32}})
+      .legalFor({{s32, s32},
+                 {s64, s32},
+                 {p0, s32},
+                 {v2s32, v2s32},
+                 {v4s16, v4s16},
+                 {v8s8, v8s8},
+                 {v16s8, v16s8}})
       .widenScalarToNextPow2(0)
       .clampScalar(0, s32, s64)
       .clampScalar(1, s32, s32)
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-select.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-select.mir
index e26c1431350979..8d125a81df49a5 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/select-select.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-select.mir
@@ -95,7 +95,6 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     liveins: $w0, $w1
-    ; G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc
 
     ; CHECK-LABEL: name: csinc_t_0_f_1
     ; CHECK: liveins: $w0, $w1
@@ -123,7 +122,6 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     liveins: $w0, $w1
-    ; G_SELECT cc 0, -1 -> CSINV zreg, zreg cc
 
     ; CHECK-LABEL: name: csinv_t_0_f_neg_1
     ; CHECK: liveins: $w0, $w1
@@ -151,7 +149,6 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     liveins: $w0, $w1, $w2
-    ; G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc
 
     ; CHECK-LABEL: name: csinc_t_1
     ; CHECK: liveins: $w0, $w1, $w2
@@ -180,7 +177,6 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     liveins: $w0, $w1, $w2
-    ; G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc
 
     ; CHECK-LABEL: name: csinv_t_neg_1
     ; CHECK: liveins: $w0, $w1, $w2
@@ -209,7 +205,6 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     liveins: $w0, $w1, $w2
-    ; G_SELECT cc, t, 1 -> CSINC t, zreg, cc
 
     ; CHECK-LABEL: name: csinc_f_1
     ; CHECK: liveins: $w0, $w1, $w2
@@ -238,7 +233,6 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     liveins: $w0, $w1, $w2
-    ; G_SELECT cc, t, -1 -> CSINC t, zreg, cc
 
     ; CHECK-LABEL: name: csinc_f_neg_1
     ; CHECK: liveins: $w0, $w1, $w2
@@ -267,7 +261,6 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     liveins: $w0, $w1
-    ; G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc
 
     ; CHECK-LABEL: name: csinc_t_1_no_cmp
     ; CHECK: liveins: $w0, $w1
@@ -294,7 +287,6 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     liveins: $w0, $w1
-    ; G_SELECT cc, t, 1 -> CSINC t, zreg, cc
 
     ; CHECK-LABEL: name: csinc_f_1_no_cmp
     ; CHECK: liveins: $w0, $w1
@@ -321,7 +313,6 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     liveins: $x0, $x1
-    ; G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc
 
     ; CHECK-LABEL: name: csinc_t_1_no_cmp_s64
     ; CHECK: liveins: $x0, $x1
@@ -350,7 +341,6 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     liveins: $w0, $w1, $w2
-    ; G_SELECT cc, true, (G_SUB 0, x) -> CSNEG true, x, cc
 
     ; CHECK-LABEL: name: csneg_s32
     ; CHECK: liveins: $w0, $w1, $w2
@@ -380,7 +370,6 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     liveins: $w0, $w1, $w2
-    ; G_SELECT cc, (G_SUB 0, %x), %false -> CSNEG %x, %false, inv_cc
 
     ; CHECK-LABEL: name: csneg_inverted_cc
     ; CHECK: liveins: $w0, $w1, $w2
@@ -410,7 +399,6 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     liveins: $x0, $x1, $x2
-    ; G_SELECT cc, true, (G_SUB 0, x) -> CSNEG true, x, cc
 
     ; CHECK-LABEL: name: csneg_s64
     ; CHECK: liveins: $x0, $x1, $x2
@@ -441,8 +429,6 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     liveins: $w0, $w1, $w2
-    ; We should prefer eliminating the G_SUB over eliminating the constant true
-    ; value.
 
     ; CHECK-LABEL: name: csneg_with_true_cst
     ; CHECK: liveins: $w0, $w1, $w2
@@ -472,7 +458,6 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     liveins: $w0, $w1, $w2
-    ; G_SELECT cc, true, (G_XOR x, -1) -> CSINV true, x, cc
 
     ; CHECK-LABEL: name: csinv_s32
     ; CHECK: liveins: $w0, $w1, $w2
@@ -502,7 +487,6 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     liveins: $w0, $w1, $w2
-    ; G_SELECT cc, (G_XOR x, -1), %false -> CSINV %x, %false, inv_cc
 
     ; CHECK-LABEL: name: csinv_inverted_cc
     ; CHECK: liveins: $w0, $w1, $w2
@@ -532,7 +516,6 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     liveins: $x0, $x1, $x2
-    ; G_SELECT cc, true, (G_XOR x, -1) -> CSINV true, x, cc
 
     ; CHECK-LABEL: name: csinv_s64
     ; CHECK: liveins: $x0, $x1, $x2
@@ -564,7 +547,6 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     liveins: $x0, $x1, $x2
-    ; zext(s32 -1) != s64 -1, so we can't fold it away.
 
     ; CHECK-LABEL: name: xor_not_negative_one
     ; CHECK: liveins: $x0, $x1, $x2
@@ -601,7 +583,6 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     liveins: $w0, $w1, $w2
-    ; G_SELECT cc, %true, (G_ADD %x, 1) -> CSINC %true, %x, cc
     ; CHECK-LABEL: name: csinc_s32
     ; CHECK: liveins: $w0, $w1, $w2
     ; CHECK-NEXT: {{  $}}
@@ -630,7 +611,6 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     liveins: $w0, $w1, $w2
-    ; G_SELECT cc, (G_ADD %x, 1), %false -> CSINC %x, %false, inv_cc
     ; CHECK-LABEL: name: csinc_s32_inverted_cc
     ; CHECK: liveins: $w0, $w1, $w2
     ; CHECK-NEXT: {{  $}}
@@ -659,7 +639,6 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     liveins: $x0, $x1, $x2
-    ; G_SELECT cc, %true, (G_PTR_ADD %x, 1) -> CSINC %true, %x, cc
 
     ; CHECK-LABEL: name: csinc_ptr_add
     ; CHECK: liveins: $x0, $x1, $x2
@@ -713,3 +692,53 @@ body:             |
     %select:gpr(s32) = G_SELECT %reg0, %xor, %sub
     $w0 = COPY %select(s32)
     RET_ReallyLR implicit $w0
+...
+---
+name:            select_vectorized_conditon_v2s32
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $w0, $w1, $w2
+    ; CHECK-LABEL: name: select_vectorized_conditon_v2s32
+    ; CHECK: liveins: $w0, $w1, $w2
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %reg0:gpr32all = COPY $w0
+    ; CHECK-NEXT: %reg1:gpr32 = COPY $w1
+    ; CHECK-NEXT: %reg2:gpr32all = COPY $w2
+    ; CHECK-NEXT: %reg3:gpr32 = COPY $w0
+    ; CHECK-NEXT: %reg4:gpr32all = COPY $w1
+    ; CHECK-NEXT: %reg5:gpr32 = COPY $w2
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF
+    ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], %reg0, %subreg.ssub
+    ; CHECK-NEXT: [[INSvi32gpr:%[0-9]+]]:fpr128 = INSvi32gpr [[INSERT_SUBREG]], 1, %reg1
+    ; CHECK-NEXT: %true:fpr64 = COPY [[INSvi32gpr]].dsub
+    ; CHECK-NEXT: [[DEF1:%[0-9]+]]:fpr128 = IMPLICIT_DEF
+    ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF1]], %reg2, %subreg.ssub
+    ; CHECK-NEXT: [[INSvi32gpr1:%[0-9]+]]:fpr128 = INSvi32gpr [[INSERT_SUBREG1]], 1, %reg3
+    ; CHECK-NEXT: %false:fpr64 = COPY [[INSvi32gpr1]].dsub
+    ; CHECK-NEXT: [[DEF2:%[0-9]+]]:fpr128 = IMPLICIT_DEF
+    ; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF2]], %reg4, %subreg.ssub
+    ; CHECK-NEXT: [[INSvi32gpr2:%[0-9]+]]:fpr128 = INSvi32gpr [[INSERT_SUBREG2]], 1, %reg5
+    ; CHECK-NEXT: %cond:fpr64 = COPY [[INSvi32gpr2]].dsub
+    ; CHECK-NEXT: %select:fpr64 = BSLv8i8 %cond, %true, %false
+    ; CHECK-NEXT: [[DEF3:%[0-9]+]]:fpr128 = IMPLICIT_DEF
+    ; CHECK-NEXT: [[INSERT_SUBREG3:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF3]], %select, %subreg.dsub
+    ; CHECK-NEXT: %extract:fpr32 = DUPi32 [[INSERT_SUBREG3]], 1
+    ; CHECK-NEXT: $w0 = COPY %extract
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0
+    %idx:gpr(s64) = G_CONSTANT i64 1
+    %reg0:gpr(s32) = COPY $w0
+    %reg1:gpr(s32) = COPY $w1
+    %reg2:gpr(s32) = COPY $w2
+    %reg3:gpr(s32) = COPY $w0
+    %reg4:gpr(s32) = COPY $w1
+    %reg5:gpr(s32) = COPY $w2
+    %true:fpr(<2 x s32>) = G_BUILD_VECTOR %reg0(s32), %reg1(s32)
+    %false:fpr(<2 x s32>) = G_BUILD_VECTOR %reg2(s32), %reg3(s32)
+    %cond:fpr(<2 x s32>) = G_BUILD_VECTOR %reg4(s32), %reg5(s32)
+    %select:fpr(<2 x s32>) = G_SELECT %cond, %true, %false
+    %extract:fpr(s32) = G_EXTRACT_VECTOR_ELT %select:fpr(<2 x s32>), %idx:gpr(s64)
+    $w0 = COPY %extract(s32)
+    RET_ReallyLR implicit $w0

>From b6a7d7418ad75eb52d7ef1c614821364d6398067 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= <schuett at gmail.com>
Date: Wed, 20 Dec 2023 22:45:34 +0100
Subject: [PATCH 2/2] preserve comments

---
 .../AArch64/GlobalISel/select-select.mir      | 20 +++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-select.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-select.mir
index 8d125a81df49a5..9602dd7ef0a7fd 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/select-select.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-select.mir
@@ -88,6 +88,7 @@ body:             |
     RET_ReallyLR implicit $w0
 ...
 ---
+# G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc
 name:            csinc_t_0_f_1
 legalized:       true
 regBankSelected: true
@@ -115,6 +116,7 @@ body:             |
     RET_ReallyLR implicit $w0
 ...
 ---
+# G_SELECT cc 0, -1 -> CSINV zreg, zreg cc
 name:            csinv_t_0_f_neg_1
 legalized:       true
 regBankSelected: true
@@ -142,6 +144,7 @@ body:             |
     RET_ReallyLR implicit $w0
 ...
 ---
+# G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc
 name:            csinc_t_1
 legalized:       true
 regBankSelected: true
@@ -198,6 +201,7 @@ body:             |
     RET_ReallyLR implicit $w0
 ...
 ---
+# G_SELECT cc, t, 1 -> CSINC t, zreg, cc
 name:            csinc_f_1
 legalized:       true
 regBankSelected: true
@@ -226,6 +230,7 @@ body:             |
     RET_ReallyLR implicit $w0
 ...
 ---
+# G_SELECT cc, t, -1 -> CSINC t, zreg, cc
 name:            csinc_f_neg_1
 legalized:       true
 regBankSelected: true
@@ -254,6 +259,7 @@ body:             |
     RET_ReallyLR implicit $w0
 ...
 ---
+# G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc
 name:            csinc_t_1_no_cmp
 legalized:       true
 regBankSelected: true
@@ -280,6 +286,7 @@ body:             |
 
 ...
 ---
+# G_SELECT cc, t, 1 -> CSINC t, zreg, cc
 name:            csinc_f_1_no_cmp
 legalized:       true
 regBankSelected: true
@@ -306,6 +313,7 @@ body:             |
 
 ...
 ---
+# G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc
 name:            csinc_t_1_no_cmp_s64
 legalized:       true
 regBankSelected: true
@@ -334,6 +342,7 @@ body:             |
 
 ...
 ---
+# G_SELECT cc, true, (G_SUB 0, x) -> CSNEG true, x, cc
 name:            csneg_s32
 legalized:       true
 regBankSelected: true
@@ -363,6 +372,7 @@ body:             |
 
 ...
 ---
+# G_SELECT cc, (G_SUB 0, %x), %false -> CSNEG %x, %false, inv_cc
 name:            csneg_inverted_cc
 legalized:       true
 regBankSelected: true
@@ -392,6 +402,7 @@ body:             |
 
 ...
 ---
+# G_SELECT cc, true, (G_SUB 0, x) -> CSNEG true, x, cc
 name:            csneg_s64
 legalized:       true
 regBankSelected: true
@@ -422,6 +433,8 @@ body:             |
     RET_ReallyLR implicit $x0
 ...
 ---
+# We should prefer eliminating the G_SUB over eliminating the constant true
+#  value.
 name:            csneg_with_true_cst
 legalized:       true
 regBankSelected: true
@@ -451,6 +464,7 @@ body:             |
     RET_ReallyLR implicit $w0
 ...
 ---
+# G_SELECT cc, true, (G_XOR x, -1) -> CSINV true, x, cc
 name:            csinv_s32
 legalized:       true
 regBankSelected: true
@@ -480,6 +494,7 @@ body:             |
 
 ...
 ---
+# G_SELECT cc, (G_XOR x, -1), %false -> CSINV %x, %false, inv_cc
 name:            csinv_inverted_cc
 legalized:       true
 regBankSelected: true
@@ -509,6 +524,7 @@ body:             |
 
 ...
 ---
+# G_SELECT cc, true, (G_XOR x, -1) -> CSINV true, x, cc
 name:            csinv_s64
 legalized:       true
 regBankSelected: true
@@ -540,6 +556,7 @@ body:             |
 
 ...
 ---
+# zext(s32 -1) != s64 -1, so we can't fold it away.
 name:            xor_not_negative_one
 legalized:       true
 regBankSelected: true
@@ -576,6 +593,7 @@ body:             |
 
 ...
 ---
+# G_SELECT cc, %true, (G_ADD %x, 1) -> CSINC %true, %x, cc
 name:            csinc_s32
 legalized:       true
 regBankSelected: true
@@ -604,6 +622,7 @@ body:             |
 
 ...
 ---
+# G_SELECT cc, (G_ADD %x, 1), %false -> CSINC %x, %false, inv_cc
 name:            csinc_s32_inverted_cc
 legalized:       true
 regBankSelected: true
@@ -632,6 +651,7 @@ body:             |
 
 ...
 ---
+# G_SELECT cc, %true, (G_PTR_ADD %x, 1) -> CSINC %true, %x, cc
 name:            csinc_ptr_add
 legalized:       true
 regBankSelected: true