[llvm] 91bbb91 - [AArch64][GlobalISel] Regbankselect + select @llvm.aarch64.neon.uaddlv

Mon Apr 19 10:48:14 PDT 2021

Author: Jessica Paquette
Date: 2021-04-19T10:47:49-07:00
New Revision: 91bbb914e01715b4b16d49c399b05310aa916cfe

URL: https://github.com/llvm/llvm-project/commit/91bbb914e01715b4b16d49c399b05310aa916cfe
DIFF: https://github.com/llvm/llvm-project/commit/91bbb914e01715b4b16d49c399b05310aa916cfe.diff

LOG: [AArch64][GlobalISel] Regbankselect + select @llvm.aarch64.neon.uaddlv

It turns out we actually import a bunch of selection code for intrinsics. The
imported code checks that the register banks on the G_INTRINSIC instruction
are correct. If so, it goes ahead and selects it.

This adds code to AArch64RegisterBankInfo to allow us to correctly determine
register banks on intrinsics which have known register bank constraints.

For now, this only handles @llvm.aarch64.neon.uaddlv. This is necessary for
porting AArch64TargetLowering::LowerCTPOP.

Also add a utility for getting the intrinsic ID from a G_INTRINSIC instruction.
This seems a little nicer than having to know about how intrinsic instructions
are structured.

Differential Revision: https://reviews.llvm.org/D100398

Added: 
    llvm/test/CodeGen/AArch64/GlobalISel/regbank-intrinsic.mir
    llvm/test/CodeGen/AArch64/GlobalISel/select-intrinsic-uaddlv.mir

Modified: 
    llvm/include/llvm/CodeGen/GlobalISel/Utils.h
    llvm/lib/CodeGen/GlobalISel/Utils.cpp
    llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
index 19a5589e7f5c7..1a592b6e576f4 100644

--- a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
@@ -380,5 +380,10 @@ int64_t getICmpTrueVal(const TargetLowering &TLI, bool IsVector, bool IsFP);
 /// Returns true if the given block should be optimized for size.
 bool shouldOptForSize(const MachineBasicBlock &MBB, ProfileSummaryInfo *PSI,
                       BlockFrequencyInfo *BFI);
+
+/// \returns the intrinsic ID for a G_INTRINSIC or G_INTRINSIC_W_SIDE_EFFECTS
+/// instruction \p MI.
+unsigned getIntrinsicID(const MachineInstr &MI);
+
 } // End namespace llvm.
 #endif

diff  --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index 68f51c3702e4c..0b89ae4903b1e 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -989,3 +989,12 @@ bool llvm::shouldOptForSize(const MachineBasicBlock &MBB,
   return F.hasOptSize() || F.hasMinSize() ||
          llvm::shouldOptimizeForSize(MBB.getBasicBlock(), PSI, BFI);
 }
+
+unsigned llvm::getIntrinsicID(const MachineInstr &MI) {
+#ifndef NDEBUG
+  unsigned Opc = MI.getOpcode();
+  assert(Opc == TargetOpcode::G_INTRINSIC ||
+         Opc == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
+#endif
+  return MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID();
+}

diff  --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
index 7410c7677fbb1..8b488eaa578b2 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
@@ -17,6 +17,7 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/GlobalISel/RegisterBank.h"
 #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
 #include "llvm/CodeGen/LowLevelType.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstr.h"
@@ -25,6 +26,7 @@
 #include "llvm/CodeGen/TargetOpcodes.h"
 #include "llvm/CodeGen/TargetRegisterInfo.h"
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/IntrinsicsAArch64.h"
 #include "llvm/Support/ErrorHandling.h"
 #include <algorithm>
 #include <cassert>
@@ -466,11 +468,24 @@ AArch64RegisterBankInfo::getSameKindOfOperandsMapping(
                                getValueMapping(RBIdx, Size), NumOperands);
 }
 
+/// \returns true if a given intrinsic \p ID only uses and defines FPRs.
+static bool isFPIntrinsic(unsigned ID) {
+  // TODO: Add more intrinsics.
+  switch (ID) {
+  default:
+    return false;
+  case Intrinsic::aarch64_neon_uaddlv:
+    return true;
+  }
+}
+
 bool AArch64RegisterBankInfo::hasFPConstraints(const MachineInstr &MI,
                                                const MachineRegisterInfo &MRI,
                                                const TargetRegisterInfo &TRI,
                                                unsigned Depth) const {
   unsigned Op = MI.getOpcode();
+  if (Op == TargetOpcode::G_INTRINSIC && isFPIntrinsic(getIntrinsicID(MI)))
+    return true;
 
   // Do we have an explicit floating point instruction?
   if (isPreISelGenericFloatingPointOpcode(Op))
@@ -915,6 +930,20 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
     // Assign them FPR for now.
     OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR, PMI_FirstFPR};
     break;
+  case TargetOpcode::G_INTRINSIC: {
+    // Check if we know that the intrinsic has any constraints on its register
+    // banks. If it does, then update the mapping accordingly.
+    unsigned ID = getIntrinsicID(MI);
+    unsigned Idx = 0;
+    if (!isFPIntrinsic(ID))
+      break;
+    for (const auto &Op : MI.explicit_operands()) {
+      if (Op.isReg())
+        OpRegBankIdx[Idx] = PMI_FirstFPR;
+      ++Idx;
+    }
+    break;
+  }
   }
 
   // Finally construct the computed mapping.

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-intrinsic.mir b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-intrinsic.mir
new file mode 100644
index 0000000000000..7e89c9917b506
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-intrinsic.mir
@@ -0,0 +1,70 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=aarch64 -run-pass=regbankselect -verify-machineinstrs %s -o - | FileCheck %s
+#
+# Verify register banks for intrinsics with known constraints. (E.g. all
+# operands must be FPRs.
+#
+
+...
+---
+name:            uaddlv_fpr
+alignment:       4
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $q0
+
+    ; CHECK-LABEL: name: uaddlv_fpr
+    ; CHECK: liveins: $q0
+    ; CHECK: %copy:fpr(<16 x s8>) = COPY $q0
+    ; CHECK: %intrin:fpr(s32) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), %copy(<16 x s8>)
+    ; CHECK: $w0 = COPY %intrin(s32)
+    ; CHECK: RET_ReallyLR implicit $w0
+    %copy:_(<16 x s8>) = COPY $q0
+    %intrin:_(s32) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), %copy(<16 x s8>)
+    $w0 = COPY %intrin(s32)
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            uaddlv_fpr_load
+alignment:       4
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $x0
+    ; CHECK-LABEL: name: uaddlv_fpr_load
+    ; CHECK: liveins: $x0
+    ; CHECK: %ptr:gpr(p0) = COPY $x0
+    ; CHECK: %load:fpr(<2 x s32>) = G_LOAD %ptr(p0) :: (load 8)
+    ; CHECK: %intrin:fpr(s32) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), %load(<2 x s32>)
+    ; CHECK: $w0 = COPY %intrin(s32)
+    ; CHECK: RET_ReallyLR implicit $w0
+    %ptr:_(p0) = COPY $x0
+    %load:_(<2 x s32>) = G_LOAD %ptr :: (load 8)
+    %intrin:_(s32) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), %load(<2 x s32>)
+    $w0 = COPY %intrin(s32)
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            uaddlv_fpr_store
+alignment:       4
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $x0, $x1
+    ; CHECK-LABEL: name: uaddlv_fpr_store
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK: %copy:gpr(<2 x s32>) = COPY $x0
+    ; CHECK: %ptr:gpr(p0) = COPY $x0
+    ; CHECK: [[COPY:%[0-9]+]]:fpr(<2 x s32>) = COPY %copy(<2 x s32>)
+    ; CHECK: %intrin:fpr(s32) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), [[COPY]](<2 x s32>)
+    ; CHECK: G_STORE %intrin(s32), %ptr(p0) :: (store 4)
+    %copy:_(<2 x s32>) = COPY $x0
+    %ptr:_(p0) = COPY $x0
+    %intrin:_(s32) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), %copy(<2 x s32>)
+    G_STORE %intrin, %ptr :: (store 4)

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-intrinsic-uaddlv.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-intrinsic-uaddlv.mir
new file mode 100644
index 0000000000000..9a81493d973aa
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-intrinsic-uaddlv.mir
@@ -0,0 +1,109 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=aarch64 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
+
+...
+---
+name:            uaddlv_v8s8
+legalized:       true
+regBankSelected: true
+body:             |
+  bb.0:
+    liveins: $d0
+    ; CHECK-LABEL: name: uaddlv_v8s8
+    ; CHECK: %copy:fpr64 = COPY $d0
+    ; CHECK: [[UADDLVv8i8v:%[0-9]+]]:fpr16 = UADDLVv8i8v %copy
+    ; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF
+    ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[UADDLVv8i8v]], %subreg.hsub
+    ; CHECK: %intrin:fpr32 = COPY [[INSERT_SUBREG]].ssub
+    ; CHECK: $w0 = COPY %intrin
+    ; CHECK: RET_ReallyLR implicit $w0
+    %copy:fpr(<8 x s8>) = COPY $d0
+    %intrin:fpr(s32) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), %copy(<8 x s8>)
+    $w0 = COPY %intrin(s32)
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            uaddlv_v16s8
+legalized:       true
+regBankSelected: true
+body:             |
+  bb.0:
+    liveins: $q0
+
+    ; CHECK-LABEL: name: uaddlv_v16s8
+    ; CHECK: %copy:fpr128 = COPY $q0
+    ; CHECK: [[UADDLVv16i8v:%[0-9]+]]:fpr16 = UADDLVv16i8v %copy
+    ; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF
+    ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[UADDLVv16i8v]], %subreg.hsub
+    ; CHECK: %intrin:fpr32 = COPY [[INSERT_SUBREG]].ssub
+    ; CHECK: $w0 = COPY %intrin
+    ; CHECK: RET_ReallyLR implicit $w0
+    %copy:fpr(<16 x s8>) = COPY $q0
+    %intrin:fpr(s32) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), %copy(<16 x s8>)
+    $w0 = COPY %intrin(s32)
+    RET_ReallyLR implicit $w0
+...
+---
+name:            uaddlv_v4s16
+legalized:       true
+regBankSelected: true
+body:             |
+  bb.0:
+    liveins: $d0
+    ; CHECK-LABEL: name: uaddlv_v4s16
+    ; CHECK: %copy:fpr64 = COPY $d0
+    ; CHECK: [[UADDLVv4i16v:%[0-9]+]]:fpr32 = UADDLVv4i16v %copy
+    ; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF
+    ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[UADDLVv4i16v]], %subreg.ssub
+    ; CHECK: %intrin:fpr32 = COPY [[INSERT_SUBREG]].ssub
+    ; CHECK: $w0 = COPY %intrin
+    ; CHECK: RET_ReallyLR implicit $w0
+    %copy:fpr(<4 x s16>) = COPY $d0
+    %intrin:fpr(s32) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), %copy(<4 x s16>)
+    $w0 = COPY %intrin(s32)
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            uaddlv_v8s16
+legalized:       true
+regBankSelected: true
+body:             |
+  bb.0:
+    liveins: $q0
+
+    ; CHECK-LABEL: name: uaddlv_v8s16
+    ; CHECK: %copy:fpr128 = COPY $q0
+    ; CHECK: [[UADDLVv8i16v:%[0-9]+]]:fpr32 = UADDLVv8i16v %copy
+    ; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF
+    ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[UADDLVv8i16v]], %subreg.ssub
+    ; CHECK: %intrin:fpr32 = COPY [[INSERT_SUBREG]].ssub
+    ; CHECK: $w0 = COPY %intrin
+    ; CHECK: RET_ReallyLR implicit $w0
+    %copy:fpr(<8 x s16>) = COPY $q0
+    %intrin:fpr(s32) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), %copy(<8 x s16>)
+    $w0 = COPY %intrin(s32)
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            uaddlv_v4s32
+legalized:       true
+regBankSelected: true
+body:             |
+  bb.0:
+    liveins: $q0
+
+    ; CHECK-LABEL: name: uaddlv_v4s32
+    ; CHECK: %copy:fpr128 = COPY $q0
+    ; CHECK: [[UADDLVv4i32v:%[0-9]+]]:fpr64 = UADDLVv4i32v %copy
+    ; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF
+    ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[UADDLVv4i32v]], %subreg.dsub
+    ; CHECK: %intrin:fpr64 = COPY [[INSERT_SUBREG]].dsub
+    ; CHECK: $x0 = COPY %intrin
+    ; CHECK: RET_ReallyLR implicit $x0
+    %copy:fpr(<4 x s32>) = COPY $q0
+    %intrin:fpr(s64) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), %copy(<4 x s32>)
+    $x0 = COPY %intrin(s64)
+    RET_ReallyLR implicit $x0