[llvm] 8071c2f - [AArch64][GlobalISel] Make <8 x s8> shifts legal.

Amara Emerson via llvm-commits llvm-commits at lists.llvm.org
Thu Oct 1 14:10:17 PDT 2020


Author: Amara Emerson
Date: 2020-10-01T14:10:10-07:00
New Revision: 8071c2f5c6149d0dc976819002dc46d9e7edfa40

URL: https://github.com/llvm/llvm-project/commit/8071c2f5c6149d0dc976819002dc46d9e7edfa40
DIFF: https://github.com/llvm/llvm-project/commit/8071c2f5c6149d0dc976819002dc46d9e7edfa40.diff

LOG: [AArch64][GlobalISel] Make <8 x s8> shifts legal.

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
    llvm/test/CodeGen/AArch64/GlobalISel/legalize-shift.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 1daa2b29b9d5..22e21b4bf082 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -22,8 +22,8 @@
 #include "llvm/ADT/Optional.h"
 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
 #include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
-#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
 #include "llvm/CodeGen/GlobalISel/Utils.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
@@ -34,8 +34,8 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/TargetOpcodes.h"
 #include "llvm/IR/Constants.h"
-#include "llvm/IR/Type.h"
 #include "llvm/IR/IntrinsicsAArch64.h"
+#include "llvm/IR/Type.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
@@ -101,8 +101,7 @@ class AArch64InstructionSelector : public InstructionSelector {
   bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
                            MachineRegisterInfo &MRI) const;
 
-  bool tryOptAndIntoCompareBranch(MachineInstr *LHS,
-                                  int64_t CmpConstant,
+  bool tryOptAndIntoCompareBranch(MachineInstr *LHS, int64_t CmpConstant,
                                   const CmpInst::Predicate &Pred,
                                   MachineBasicBlock *DstMBB,
                                   MachineIRBuilder &MIB) const;
@@ -313,10 +312,11 @@ class AArch64InstructionSelector : public InstructionSelector {
   /// Returns a \p ComplexRendererFns which contains a base, offset, and whether
   /// or not a shift + extend should be folded into an addressing mode. Returns
   /// None when this is not profitable or possible.
-  ComplexRendererFns
-  selectExtendedSHL(MachineOperand &Root, MachineOperand &Base,
-                    MachineOperand &Offset, unsigned SizeInBytes,
-                    bool WantsExt) const;
+  ComplexRendererFns selectExtendedSHL(MachineOperand &Root,
+                                       MachineOperand &Base,
+                                       MachineOperand &Offset,
+                                       unsigned SizeInBytes,
+                                       bool WantsExt) const;
   ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const;
   ComplexRendererFns selectAddrModeXRO(MachineOperand &Root,
                                        unsigned SizeInBytes) const;
@@ -360,7 +360,7 @@ class AArch64InstructionSelector : public InstructionSelector {
   /// subregister copy if necessary. Return either ExtReg, or the result of the
   /// new copy.
   Register narrowExtendRegIfNeeded(Register ExtReg,
-                                             MachineIRBuilder &MIB) const;
+                                   MachineIRBuilder &MIB) const;
   Register widenGPRBankRegIfNeeded(Register Reg, unsigned Size,
                                    MachineIRBuilder &MIB) const;
   ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const;
@@ -1423,7 +1423,8 @@ static Optional<int64_t> getVectorShiftImm(Register Reg,
   // Check all operands are identical immediates.
   int64_t ImmVal = 0;
   for (unsigned Idx = 1; Idx < OpMI->getNumOperands(); ++Idx) {
-    auto VRegAndVal = getConstantVRegValWithLookThrough(OpMI->getOperand(Idx).getReg(), MRI);
+    auto VRegAndVal =
+        getConstantVRegValWithLookThrough(OpMI->getOperand(Idx).getReg(), MRI);
     if (!VRegAndVal)
       return None;
 
@@ -1438,7 +1439,8 @@ static Optional<int64_t> getVectorShiftImm(Register Reg,
 
 /// Matches and returns the shift immediate value for a SHL instruction given
 /// a shift operand.
-static Optional<int64_t> getVectorSHLImm(LLT SrcTy, Register Reg, MachineRegisterInfo &MRI) {
+static Optional<int64_t> getVectorSHLImm(LLT SrcTy, Register Reg,
+                                         MachineRegisterInfo &MRI) {
   Optional<int64_t> ShiftImm = getVectorShiftImm(Reg, MRI);
   if (!ShiftImm)
     return None;
@@ -1498,6 +1500,8 @@ bool AArch64InstructionSelector::selectVectorSHL(
     Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16;
   } else if (Ty == LLT::vector(16, 8)) {
     Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8;
+  } else if (Ty == LLT::vector(8, 8)) {
+    Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8;
   } else {
     LLVM_DEBUG(dbgs() << "Unhandled G_SHL type");
     return false;
@@ -1557,6 +1561,9 @@ bool AArch64InstructionSelector::selectVectorAshrLshr(
   } else if (Ty == LLT::vector(16, 8)) {
     Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8;
     NegOpc = AArch64::NEGv8i16;
+  } else if (Ty == LLT::vector(8, 8)) {
+    Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8;
+    NegOpc = AArch64::NEGv8i8;
   } else {
     LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type");
     return false;
@@ -1603,8 +1610,9 @@ bool AArch64InstructionSelector::selectVaStartDarwin(
   return true;
 }
 
-void AArch64InstructionSelector::materializeLargeCMVal(
-    MachineInstr &I, const Value *V, unsigned OpFlags) const {
+void AArch64InstructionSelector::materializeLargeCMVal(MachineInstr &I,
+                                                       const Value *V,
+                                                       unsigned OpFlags) const {
   MachineBasicBlock &MBB = *I.getParent();
   MachineFunction &MF = *MBB.getParent();
   MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -1635,8 +1643,8 @@ void AArch64InstructionSelector::materializeLargeCMVal(
     constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI);
     return DstReg;
   };
-  Register DstReg = BuildMovK(MovZ.getReg(0),
-                              AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0);
+  Register DstReg =
+      BuildMovK(MovZ.getReg(0), AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0);
   DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
   BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
   return;
@@ -1707,8 +1715,8 @@ bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
 /// because the selector works bottom up, uses before defs. By the time we
 /// end up trying to select a G_PTR_ADD, we should have already attempted to
 /// fold this into addressing modes and were therefore unsuccessful.
-bool AArch64InstructionSelector::convertPtrAddToAdd(
-    MachineInstr &I, MachineRegisterInfo &MRI) {
+bool AArch64InstructionSelector::convertPtrAddToAdd(MachineInstr &I,
+                                                    MachineRegisterInfo &MRI) {
   assert(I.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
   Register DstReg = I.getOperand(0).getReg();
   Register AddOp1Reg = I.getOperand(1).getReg();
@@ -1886,7 +1894,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
   if (!I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) {
     // Certain non-generic instructions also need some special handling.
 
-    if (Opcode ==  TargetOpcode::LOAD_STACK_GUARD)
+    if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
       return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
 
     if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
@@ -1894,10 +1902,10 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
       const LLT DefTy = MRI.getType(DefReg);
 
       const RegClassOrRegBank &RegClassOrBank =
-        MRI.getRegClassOrRegBank(DefReg);
+          MRI.getRegClassOrRegBank(DefReg);
 
-      const TargetRegisterClass *DefRC
-        = RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
+      const TargetRegisterClass *DefRC =
+          RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
       if (!DefRC) {
         if (!DefTy.isValid()) {
           LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
@@ -1922,7 +1930,6 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
     return true;
   }
 
-
   if (I.getNumOperands() != I.getNumExplicitOperands()) {
     LLVM_DEBUG(
         dbgs() << "Generic instruction has unexpected implicit operands\n");
@@ -1998,10 +2005,9 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
                      .addUse(CondReg)
                      .addImm(1);
       constrainSelectedInstRegOperands(*CMP.getInstr(), TII, TRI, RBI);
-      auto Bcc =
-          BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::Bcc))
-              .addImm(AArch64CC::EQ)
-              .addMBB(DestMBB);
+      auto Bcc = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::Bcc))
+                     .addImm(AArch64CC::EQ)
+                     .addMBB(DestMBB);
 
       I.eraseFromParent();
       return constrainSelectedInstRegOperands(*Bcc.getInstr(), TII, TRI, RBI);
@@ -2375,8 +2381,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
       if (NewOpc == I.getOpcode())
         return nullptr;
       // Check if we can fold anything into the addressing mode.
-      auto AddrModeFns =
-          selectAddrModeIndexed(I.getOperand(1), MemSizeInBytes);
+      auto AddrModeFns = selectAddrModeIndexed(I.getOperand(1), MemSizeInBytes);
       if (!AddrModeFns) {
         // Can't fold anything. Use the original instruction.
         I.setDesc(TII.get(NewOpc));
@@ -2536,10 +2541,11 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
     // to the G_UADDO. CSINC increments the result when the predicate is false,
     // so to get the increment when it's true, we need to use the inverse. In
     // this case, we want to increment when carry is set.
-    auto CsetMI = MIRBuilder
-                      .buildInstr(AArch64::CSINCWr, {I.getOperand(1).getReg()},
-                                  {Register(AArch64::WZR), Register(AArch64::WZR)})
-                      .addImm(getInvertedCondCode(AArch64CC::HS));
+    auto CsetMI =
+        MIRBuilder
+            .buildInstr(AArch64::CSINCWr, {I.getOperand(1).getReg()},
+                        {Register(AArch64::WZR), Register(AArch64::WZR)})
+            .addImm(getInvertedCondCode(AArch64CC::HS));
     constrainSelectedInstRegOperands(*CsetMI, TII, TRI, RBI);
     I.eraseFromParent();
     return true;
@@ -2775,14 +2781,14 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
       }
 
       ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri,
-                             {DefReg}, {SrcReg})
-                  .addImm(0)
-                  .addImm(SrcSize - 1);
+                            {DefReg}, {SrcReg})
+                 .addImm(0)
+                 .addImm(SrcSize - 1);
     } else if (DstSize <= 32) {
       ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri,
-                             {DefReg}, {SrcReg})
-                  .addImm(0)
-                  .addImm(SrcSize - 1);
+                            {DefReg}, {SrcReg})
+                 .addImm(0)
+                 .addImm(SrcSize - 1);
     } else {
       return false;
     }
@@ -2999,7 +3005,7 @@ bool AArch64InstructionSelector::selectJumpTable(
   // We generate a MOVaddrJT which will get expanded to an ADRP + ADD later.
   MachineIRBuilder MIB(I);
   auto MovMI =
-    MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {})
+      MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {})
           .addJumpTableIndex(JTI, AArch64II::MO_PAGE)
           .addJumpTableIndex(JTI, AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
   I.eraseFromParent();
@@ -3226,62 +3232,54 @@ bool AArch64InstructionSelector::selectVectorICmp(
   // tablegen selector.
 
   static const unsigned OpcTable[4][4][9] = {
-      {
-          {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
-           0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
-           0 /* invalid */},
-          {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
-           0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
-           0 /* invalid */},
-          {AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8,
-           AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8,
-           AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8},
-          {AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8,
-           AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8,
-           AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8}
-      },
-      {
-          {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
-           0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
-           0 /* invalid */},
-          {AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16,
-           AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16,
-           AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16},
-          {AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16,
-           AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16,
-           AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16},
-          {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
-           0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
-           0 /* invalid */}
-      },
-      {
-          {AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32,
-           AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32,
-           AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32},
-          {AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32,
-           AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32,
-           AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32},
-          {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
-           0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
-           0 /* invalid */},
-          {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
-           0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
-           0 /* invalid */}
-      },
-      {
-          {AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64,
-           AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64,
-           AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64},
-          {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
-           0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
-           0 /* invalid */},
-          {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
-           0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
-           0 /* invalid */},
-          {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
-           0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
-           0 /* invalid */}
-      },
+      {{0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
+        0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
+        0 /* invalid */},
+       {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
+        0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
+        0 /* invalid */},
+       {AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8,
+        AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8,
+        AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8},
+       {AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8,
+        AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8,
+        AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8}},
+      {{0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
+        0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
+        0 /* invalid */},
+       {AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16,
+        AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16,
+        AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16},
+       {AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16,
+        AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16,
+        AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16},
+       {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
+        0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
+        0 /* invalid */}},
+      {{AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32,
+        AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32,
+        AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32},
+       {AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32,
+        AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32,
+        AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32},
+       {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
+        0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
+        0 /* invalid */},
+       {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
+        0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
+        0 /* invalid */}},
+      {{AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64,
+        AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64,
+        AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64},
+       {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
+        0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
+        0 /* invalid */},
+       {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
+        0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
+        0 /* invalid */},
+       {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
+        0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
+        0 /* invalid */}},
   };
   unsigned EltIdx = Log2_32(SrcEltSize / 8);
   unsigned NumEltsIdx = Log2_32(NumElts / 2);
@@ -3400,11 +3398,11 @@ bool AArch64InstructionSelector::selectMergeValues(
   Register SubToRegDef2 = MRI.createVirtualRegister(DstRC);
   // Need to anyext the second scalar before we can use bfm
   MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
-                                    TII.get(TargetOpcode::SUBREG_TO_REG))
-                                .addDef(SubToRegDef2)
-                                .addImm(0)
-                                .addUse(I.getOperand(2).getReg())
-                                .addImm(AArch64::sub_32);
+                                     TII.get(TargetOpcode::SUBREG_TO_REG))
+                                 .addDef(SubToRegDef2)
+                                 .addImm(0)
+                                 .addUse(I.getOperand(2).getReg())
+                                 .addImm(AArch64::sub_32);
   MachineInstr &BFM =
       *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri))
            .addDef(I.getOperand(0).getReg())
@@ -3732,10 +3730,11 @@ MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
                                     AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
     break;
   case 8:
-    LoadMI = &*MIRBuilder
-                 .buildInstr(AArch64::LDRDui, {&AArch64::FPR64RegClass}, {Adrp})
-                 .addConstantPoolIndex(
-                     CPIdx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
+    LoadMI =
+        &*MIRBuilder
+              .buildInstr(AArch64::LDRDui, {&AArch64::FPR64RegClass}, {Adrp})
+              .addConstantPoolIndex(CPIdx, 0,
+                                    AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
     break;
   default:
     LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "
@@ -4084,16 +4083,15 @@ MachineInstr *AArch64InstructionSelector::emitFMovForFConstant(
   return &I;
 }
 
-MachineInstr *
-AArch64InstructionSelector::emitCSetForICMP(Register DefReg, unsigned Pred,
-                                     MachineIRBuilder &MIRBuilder) const {
+MachineInstr *AArch64InstructionSelector::emitCSetForICMP(
+    Register DefReg, unsigned Pred, MachineIRBuilder &MIRBuilder) const {
   // CSINC increments the result when the predicate is false. Invert it.
   const AArch64CC::CondCode InvCC = changeICMPPredToAArch64CC(
       CmpInst::getInversePredicate((CmpInst::Predicate)Pred));
-  auto I =
-      MIRBuilder
-    .buildInstr(AArch64::CSINCWr, {DefReg}, {Register(AArch64::WZR), Register(AArch64::WZR)})
-          .addImm(InvCC);
+  auto I = MIRBuilder
+               .buildInstr(AArch64::CSINCWr, {DefReg},
+                           {Register(AArch64::WZR), Register(AArch64::WZR)})
+               .addImm(InvCC);
   constrainSelectedInstRegOperands(*I, TII, TRI, RBI);
   return &*I;
 }
@@ -4291,8 +4289,7 @@ MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
     if (!ValAndVReg || ValAndVReg->Value != 0)
       return nullptr;
 
-    return emitTST(LHSDef->getOperand(1),
-                   LHSDef->getOperand(2), MIRBuilder);
+    return emitTST(LHSDef->getOperand(1), LHSDef->getOperand(2), MIRBuilder);
   }
 
   return nullptr;
@@ -4971,7 +4968,9 @@ bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I,
         MIRBuilder.buildInstr(AArch64::LDRXui, {TmpReg}, {FrameAddr}).addImm(1);
         MIRBuilder.buildInstr(AArch64::XPACI, {DstReg}, {TmpReg});
       } else {
-        MIRBuilder.buildInstr(AArch64::LDRXui, {Register(AArch64::LR)}, {FrameAddr}).addImm(1);
+        MIRBuilder
+            .buildInstr(AArch64::LDRXui, {Register(AArch64::LR)}, {FrameAddr})
+            .addImm(1);
         MIRBuilder.buildInstr(AArch64::XPACLRI);
         MIRBuilder.buildCopy({DstReg}, {Register(AArch64::LR)});
       }
@@ -5129,9 +5128,11 @@ static bool isSignExtendShiftType(AArch64_AM::ShiftExtendType Type) {
 }
 
 InstructionSelector::ComplexRendererFns
-AArch64InstructionSelector::selectExtendedSHL(
-    MachineOperand &Root, MachineOperand &Base, MachineOperand &Offset,
-    unsigned SizeInBytes, bool WantsExt) const {
+AArch64InstructionSelector::selectExtendedSHL(MachineOperand &Root,
+                                              MachineOperand &Base,
+                                              MachineOperand &Offset,
+                                              unsigned SizeInBytes,
+                                              bool WantsExt) const {
   assert(Base.isReg() && "Expected base to be a register operand");
   assert(Offset.isReg() && "Expected offset to be a register operand");
 
@@ -5270,8 +5271,8 @@ AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
 ///
 /// Where x2 is the base register, and x3 is an offset register.
 ///
-/// When possible (or profitable) to fold a G_PTR_ADD into the address calculation,
-/// this will do so. Otherwise, it will return None.
+/// When possible (or profitable) to fold a G_PTR_ADD into the address
+/// calculation, this will do so. Otherwise, it will return None.
 InstructionSelector::ComplexRendererFns
 AArch64InstructionSelector::selectAddrModeRegisterOffset(
     MachineOperand &Root) const {
@@ -5337,8 +5338,7 @@ AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root,
 
     // Skip immediates that can be selected in the load/store addresing
     // mode.
-    if (ImmOff % SizeInBytes == 0 && ImmOff >= 0 &&
-        ImmOff < (0x1000 << Scale))
+    if (ImmOff % SizeInBytes == 0 && ImmOff >= 0 && ImmOff < (0x1000 << Scale))
       return None;
 
     // Helper lambda to decide whether or not it is preferable to emit an add.
@@ -5488,9 +5488,8 @@ AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root,
 }
 
 InstructionSelector::ComplexRendererFns
-AArch64InstructionSelector::tryFoldAddLowIntoImm(MachineInstr &RootDef,
-                                                 unsigned Size,
-                                                 MachineRegisterInfo &MRI) const {
+AArch64InstructionSelector::tryFoldAddLowIntoImm(
+    MachineInstr &RootDef, unsigned Size, MachineRegisterInfo &MRI) const {
   if (RootDef.getOpcode() != AArch64::G_ADD_LOW)
     return None;
   MachineInstr &Adrp = *MRI.getVRegDef(RootDef.getOperand(1).getReg());
@@ -5498,7 +5497,8 @@ AArch64InstructionSelector::tryFoldAddLowIntoImm(MachineInstr &RootDef,
     return None;
 
   // TODO: add heuristics like isWorthFoldingADDlow() from SelectionDAG.
-  // TODO: Need to check GV's offset % size if doing offset folding into globals.
+  // TODO: Need to check GV's offset % size if doing offset folding into
+  // globals.
   assert(Adrp.getOperand(1).getOffset() == 0 && "Unexpected offset in global");
   auto GV = Adrp.getOperand(1).getGlobal();
   if (GV->isThreadLocal())
@@ -5755,11 +5755,10 @@ Register AArch64InstructionSelector::widenGPRBankRegIfNeeded(
   assert(SubReg && "Couldn't determine subregister?");
 
   // Build the SUBREG_TO_REG and return the new, widened register.
-  auto SubRegToReg =
-      MIB.buildInstr(AArch64::SUBREG_TO_REG, {WideRC}, {})
-          .addImm(0)
-          .addUse(Reg)
-          .addImm(SubReg);
+  auto SubRegToReg = MIB.buildInstr(AArch64::SUBREG_TO_REG, {WideRC}, {})
+                         .addImm(0)
+                         .addUse(Reg)
+                         .addImm(SubReg);
   constrainSelectedInstRegOperands(*SubRegToReg, TII, TRI, RBI);
   return SubRegToReg.getReg(0);
 }
@@ -5843,8 +5842,9 @@ void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB,
   MIB.addImm(CstVal.getValue());
 }
 
-void AArch64InstructionSelector::renderLogicalImm32(
-  MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
+void AArch64InstructionSelector::renderLogicalImm32(MachineInstrBuilder &MIB,
+                                                    const MachineInstr &I,
+                                                    int OpIdx) const {
   assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
          "Expected G_CONSTANT");
   uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
@@ -5852,8 +5852,9 @@ void AArch64InstructionSelector::renderLogicalImm32(
   MIB.addImm(Enc);
 }
 
-void AArch64InstructionSelector::renderLogicalImm64(
-  MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
+void AArch64InstructionSelector::renderLogicalImm64(MachineInstrBuilder &MIB,
+                                                    const MachineInstr &I,
+                                                    int OpIdx) const {
   assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
          "Expected G_CONSTANT");
   uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
@@ -5890,7 +5891,6 @@ bool AArch64InstructionSelector::isDef32(const MachineInstr &MI) const {
   }
 }
 
-
 // Perform fixups on the given PHI instruction's operands to force them all
 // to be the same as the destination regbank.
 static void fixupPHIOpBanks(MachineInstr &MI, MachineRegisterInfo &MRI,
@@ -5986,4 +5986,4 @@ createAArch64InstructionSelector(const AArch64TargetMachine &TM,
                                  AArch64RegisterBankInfo &RBI) {
   return new AArch64InstructionSelector(TM, Subtarget, RBI);
 }
-}
+} // namespace llvm

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shift.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shift.mir
index 8a28012766ef..071af05b0424 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shift.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shift.mir
@@ -431,3 +431,46 @@ body:             |
     $q0 = COPY %3(<4 x s32>)
     $q1 = COPY %4(<4 x s32>)
 ...
+---
+name: test_shl_v8s8
+body:             |
+  bb.0:
+    ; CHECK-LABEL: name: test_shl_v8s8
+    ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s8>) = COPY $d0
+    ; CHECK: [[COPY1:%[0-9]+]]:_(<8 x s8>) = COPY $d1
+    ; CHECK: [[SHL:%[0-9]+]]:_(<8 x s8>) = G_SHL [[COPY]], [[COPY1]](<8 x s8>)
+    ; CHECK: $d0 = COPY [[SHL]](<8 x s8>)
+    %0:_(<8 x s8>) = COPY $d0
+    %1:_(<8 x s8>) = COPY $d1
+    %2:_(<8 x s8>) = G_SHL %0, %1
+    $d0 = COPY %2
+...
+---
+name: test_ashr_v8s8
+body:             |
+  bb.0:
+    ; CHECK-LABEL: name: test_ashr_v8s8
+    ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s8>) = COPY $d0
+    ; CHECK: [[COPY1:%[0-9]+]]:_(<8 x s8>) = COPY $d1
+    ; CHECK: [[ASHR:%[0-9]+]]:_(<8 x s8>) = G_ASHR [[COPY]], [[COPY1]](<8 x s8>)
+    ; CHECK: $d0 = COPY [[ASHR]](<8 x s8>)
+    %0:_(<8 x s8>) = COPY $d0
+    %1:_(<8 x s8>) = COPY $d1
+    %2:_(<8 x s8>) = G_ASHR %0, %1
+    $d0 = COPY %2
+...
+
+---
+name: test_lshr_v8s8
+body:             |
+  bb.0:
+    ; CHECK-LABEL: name: test_lshr_v8s8
+    ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s8>) = COPY $d0
+    ; CHECK: [[COPY1:%[0-9]+]]:_(<8 x s8>) = COPY $d1
+    ; CHECK: [[LSHR:%[0-9]+]]:_(<8 x s8>) = G_LSHR [[COPY]], [[COPY1]](<8 x s8>)
+    ; CHECK: $d0 = COPY [[LSHR]](<8 x s8>)
+    %0:_(<8 x s8>) = COPY $d0
+    %1:_(<8 x s8>) = COPY $d1
+    %2:_(<8 x s8>) = G_LSHR %0, %1
+    $d0 = COPY %2
+...


        


More information about the llvm-commits mailing list