[llvm] r370410 - [AArch64][GlobalISel] Select arithmetic extended register patterns

Thu Aug 29 14:53:58 PDT 2019

Author: paquette
Date: Thu Aug 29 14:53:58 2019
New Revision: 370410

URL: http://llvm.org/viewvc/llvm-project?rev=370410&view=rev
Log:
[AArch64][GlobalISel] Select arithmetic extended register patterns

This teaches GISel to select patterns which fold an extend plus optional shift
into the addressing mode. In particular, adds and subs.

Factor out the arith extended register ComplexPatterns in AArch64InstrFormats.td
and create GISel equivalents.

Add some equivalent functions to the ones in AArch64ISelDAGToDAG:

- `selectArithExtendedRegister`
- `narrowExtendRegIfNeeded`
- `getExtendTypeForInst`

`getExtendTypeForInst` includes the checks for loads and stores. This will be
used for WRO addressing modes in loads + stores.

Teach selectCopy to properly handle subregister copies on the same bank in
order to support `narrowExtendRegIfNeeded`. The extended register must be a
GPR32, so we need to support same-bank subregister copies.

Fix a bug in getSubRegForClass which would cause registers on things like
GPR32common to end up getting ssub. Just change the check to look for FPR32
rather than GPR32.

For tests:

- Add select-arith-extended-reg.mir
- Update addsub_ext.ll to include GlobalISel checks

Differential Revision: https://reviews.llvm.org/D66835

Added:
    llvm/trunk/test/CodeGen/AArch64/GlobalISel/select-arith-extended-reg.mir
Modified:
    llvm/trunk/lib/Target/AArch64/AArch64InstrFormats.td
    llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td
    llvm/trunk/lib/Target/AArch64/AArch64InstructionSelector.cpp
    llvm/trunk/test/CodeGen/AArch64/addsub_ext.ll

Modified: llvm/trunk/lib/Target/AArch64/AArch64InstrFormats.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64InstrFormats.td?rev=370410&r1=370409&r2=370410&view=diff
==============================================================================

--- llvm/trunk/lib/Target/AArch64/AArch64InstrFormats.td (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64InstrFormats.td Thu Aug 29 14:53:58 2019
@@ -944,6 +944,21 @@ class arith_extended_reg32to64<ValueType
   let MIOperandInfo = (ops GPR32, arith_extend64);
 }
 
+def arith_extended_reg32_i32 : arith_extended_reg32<i32>;
+def gi_arith_extended_reg32_i32 :
+    GIComplexOperandMatcher<s32, "selectArithExtendedRegister">,
+    GIComplexPatternEquiv<arith_extended_reg32_i32>;
+
+def arith_extended_reg32_i64 : arith_extended_reg32<i64>;
+def gi_arith_extended_reg32_i64 :
+    GIComplexOperandMatcher<s64, "selectArithExtendedRegister">,
+    GIComplexPatternEquiv<arith_extended_reg32_i64>;
+
+def arith_extended_reg32to64_i64 : arith_extended_reg32to64<i64>;
+def gi_arith_extended_reg32to64_i64 :
+    GIComplexOperandMatcher<s64, "selectArithExtendedRegister">,
+    GIComplexPatternEquiv<arith_extended_reg32to64_i64>;
+
 // Floating-point immediate.
 def fpimm16 : Operand<f16>,
               FPImmLeaf<f16, [{
@@ -2215,11 +2230,11 @@ multiclass AddSub<bit isSub, string mnem
   // Add/Subtract extended register
   let AddedComplexity = 1, hasSideEffects = 0 in {
   def Wrx : BaseAddSubEReg<isSub, 0, GPR32sp, GPR32sp,
-                           arith_extended_reg32<i32>, mnemonic, OpNode> {
+                           arith_extended_reg32_i32, mnemonic, OpNode> {
     let Inst{31} = 0;
   }
   def Xrx : BaseAddSubEReg<isSub, 0, GPR64sp, GPR64sp,
-                           arith_extended_reg32to64<i64>, mnemonic, OpNode> {
+                           arith_extended_reg32to64_i64, mnemonic, OpNode> {
     let Inst{31} = 1;
   }
   }
@@ -2289,11 +2304,11 @@ multiclass AddSubS<bit isSub, string mne
   // Add/Subtract extended register
   let AddedComplexity = 1 in {
   def Wrx : BaseAddSubEReg<isSub, 1, GPR32, GPR32sp,
-                           arith_extended_reg32<i32>, mnemonic, OpNode> {
+                           arith_extended_reg32_i32, mnemonic, OpNode> {
     let Inst{31} = 0;
   }
   def Xrx : BaseAddSubEReg<isSub, 1, GPR64, GPR64sp,
-                           arith_extended_reg32<i64>, mnemonic, OpNode> {
+                           arith_extended_reg32_i64, mnemonic, OpNode> {
     let Inst{31} = 1;
   }
   }

Modified: llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td?rev=370410&r1=370409&r2=370410&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td Thu Aug 29 14:53:58 2019
@@ -1035,10 +1035,10 @@ def : Pat<(sub GPR32:$Rn, arith_shifted_
 def : Pat<(sub GPR64:$Rn, arith_shifted_reg64:$Rm),
           (SUBSXrs GPR64:$Rn, arith_shifted_reg64:$Rm)>;
 let AddedComplexity = 1 in {
-def : Pat<(sub GPR32sp:$R2, arith_extended_reg32<i32>:$R3),
-          (SUBSWrx GPR32sp:$R2, arith_extended_reg32<i32>:$R3)>;
-def : Pat<(sub GPR64sp:$R2, arith_extended_reg32to64<i64>:$R3),
-          (SUBSXrx GPR64sp:$R2, arith_extended_reg32to64<i64>:$R3)>;
+def : Pat<(sub GPR32sp:$R2, arith_extended_reg32_i32:$R3),
+          (SUBSWrx GPR32sp:$R2, arith_extended_reg32_i32:$R3)>;
+def : Pat<(sub GPR64sp:$R2, arith_extended_reg32to64_i64:$R3),
+          (SUBSXrx GPR64sp:$R2, arith_extended_reg32to64_i64:$R3)>;
 }
 
 // Because of the immediate format for add/sub-imm instructions, the

Modified: llvm/trunk/lib/Target/AArch64/AArch64InstructionSelector.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64InstructionSelector.cpp?rev=370410&r1=370409&r2=370410&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64InstructionSelector.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64InstructionSelector.cpp Thu Aug 29 14:53:58 2019
@@ -227,6 +227,14 @@ private:
     return selectShiftedRegister(Root);
   }
 
+  /// Instructions that accept extend modifiers like UXTW expect the register
+  /// being extended to be a GPR32. Narrow ExtReg to a 32-bit register using a
+  /// subregister copy if necessary. Return either ExtReg, or the result of the
+  /// new copy.
+  Register narrowExtendRegIfNeeded(Register ExtReg,
+                                             MachineIRBuilder &MIB) const;
+  ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const;
+
   void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI) const;
   void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I) const;
   void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I) const;
@@ -246,6 +254,11 @@ private:
   /// Return true if \p MI is a load or store of \p NumBytes bytes.
   bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const;
 
+  /// Returns true if \p MI is guaranteed to have the high-half of a 64-bit
+  /// register zeroed out. In other words, the result of MI has been explicitly
+  /// zero extended.
+  bool isDef32(const MachineInstr &MI) const;
+
   const AArch64TargetMachine &TM;
   const AArch64Subtarget &STI;
   const AArch64InstrInfo &TII;
@@ -363,7 +376,7 @@ static bool getSubRegForClass(const Targ
     SubReg = AArch64::hsub;
     break;
   case 32:
-    if (RC == &AArch64::GPR32RegClass)
+    if (RC != &AArch64::FPR32RegClass)
       SubReg = AArch64::sub_32;
     else
       SubReg = AArch64::ssub;
@@ -676,35 +689,35 @@ static bool selectCopy(MachineInstr &I,
       return false;
     }
 
-    // Is this a cross-bank copy?
-    if (DstRegBank.getID() != SrcRegBank.getID()) {
-      // If we're doing a cross-bank copy on different-sized registers, we need
-      // to do a bit more work.
-      unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC);
-      unsigned DstSize = TRI.getRegSizeInBits(*DstRC);
-
-      if (SrcSize > DstSize) {
-        // We're doing a cross-bank copy into a smaller register. We need a
-        // subregister copy. First, get a register class that's on the same bank
-        // as the destination, but the same size as the source.
-        const TargetRegisterClass *SubregRC =
-            getMinClassForRegBank(DstRegBank, SrcSize, true);
-        assert(SubregRC && "Didn't get a register class for subreg?");
-
-        // Get the appropriate subregister for the destination.
-        unsigned SubReg = 0;
-        if (!getSubRegForClass(DstRC, TRI, SubReg)) {
-          LLVM_DEBUG(dbgs() << "Couldn't determine subregister for copy.\n");
-          return false;
-        }
+    unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC);
+    unsigned DstSize = TRI.getRegSizeInBits(*DstRC);
 
-        // Now, insert a subregister copy using the new register class.
-        selectSubregisterCopy(I, MRI, RBI, SrcReg, SubregRC, DstRC, SubReg);
-        return CheckCopy();
+    // If we're doing a cross-bank copy on different-sized registers, we need
+    // to do a bit more work.
+    if (SrcSize > DstSize) {
+      // We're doing a cross-bank copy into a smaller register. We need a
+      // subregister copy. First, get a register class that's on the same bank
+      // as the destination, but the same size as the source.
+      const TargetRegisterClass *SubregRC =
+          getMinClassForRegBank(DstRegBank, SrcSize, true);
+      assert(SubregRC && "Didn't get a register class for subreg?");
+
+      // Get the appropriate subregister for the destination.
+      unsigned SubReg = 0;
+      if (!getSubRegForClass(DstRC, TRI, SubReg)) {
+        LLVM_DEBUG(dbgs() << "Couldn't determine subregister for copy.\n");
+        return false;
       }
 
-      else if (DstRegBank.getID() == AArch64::GPRRegBankID && DstSize == 32 &&
-               SrcSize == 16) {
+      // Now, insert a subregister copy using the new register class.
+      selectSubregisterCopy(I, MRI, RBI, SrcReg, SubregRC, DstRC, SubReg);
+      return CheckCopy();
+    }
+
+    // Is this a cross-bank copy?
+    if (DstRegBank.getID() != SrcRegBank.getID()) {
+      if (DstRegBank.getID() == AArch64::GPRRegBankID && DstSize == 32 &&
+          SrcSize == 16) {
         // Special case for FPR16 to GPR32.
         // FIXME: This can probably be generalized like the above case.
         Register PromoteReg =
@@ -4472,6 +4485,146 @@ AArch64InstructionSelector::selectShifte
            [=](MachineInstrBuilder &MIB) { MIB.addImm(ShiftVal); }}};
 }
 
+/// Get the correct ShiftExtendType for an extend instruction.
+static AArch64_AM::ShiftExtendType
+getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI) {
+  unsigned Opc = MI.getOpcode();
+
+  // Handle explicit extend instructions first.
+  if (Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG) {
+    unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
+    assert(Size != 64 && "Extend from 64 bits?");
+    switch (Size) {
+    case 8:
+      return AArch64_AM::SXTB;
+    case 16:
+      return AArch64_AM::SXTH;
+    case 32:
+      return AArch64_AM::SXTW;
+    default:
+      return AArch64_AM::InvalidShiftExtend;
+    }
+  }
+
+  if (Opc == TargetOpcode::G_ZEXT || Opc == TargetOpcode::G_ANYEXT) {
+    unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
+    assert(Size != 64 && "Extend from 64 bits?");
+    switch (Size) {
+    case 8:
+      return AArch64_AM::UXTB;
+    case 16:
+      return AArch64_AM::UXTH;
+    case 32:
+      return AArch64_AM::UXTW;
+    default:
+      return AArch64_AM::InvalidShiftExtend;
+    }
+  }
+
+  // Don't have an explicit extend. Try to handle a G_AND with a constant mask
+  // on the RHS.
+  if (Opc != TargetOpcode::G_AND)
+    return AArch64_AM::InvalidShiftExtend;
+
+  Optional<uint64_t> MaybeAndMask = getImmedFromMO(MI.getOperand(2));
+  if (!MaybeAndMask)
+    return AArch64_AM::InvalidShiftExtend;
+  uint64_t AndMask = *MaybeAndMask;
+  switch (AndMask) {
+  default:
+    return AArch64_AM::InvalidShiftExtend;
+  case 0xFF:
+    return AArch64_AM::UXTB;
+  case 0xFFFF:
+    return AArch64_AM::UXTH;
+  case 0xFFFFFFFF:
+    return AArch64_AM::UXTW;
+  }
+}
+
+Register AArch64InstructionSelector::narrowExtendRegIfNeeded(
+    Register ExtReg, MachineIRBuilder &MIB) const {
+  MachineRegisterInfo &MRI = *MIB.getMRI();
+  if (MRI.getType(ExtReg).getSizeInBits() == 32)
+    return ExtReg;
+
+  // Insert a copy to move ExtReg to GPR32.
+  Register NarrowReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
+  auto Copy = MIB.buildCopy({NarrowReg}, {ExtReg});
+
+  // Select the copy into a subregister copy.
+  selectCopy(*Copy, TII, MRI, TRI, RBI);
+  return Copy.getReg(0);
+}
+
+/// Select an "extended register" operand. This operand folds in an extend
+/// followed by an optional left shift.
+InstructionSelector::ComplexRendererFns
+AArch64InstructionSelector::selectArithExtendedRegister(
+    MachineOperand &Root) const {
+  if (!Root.isReg())
+    return None;
+  MachineRegisterInfo &MRI =
+      Root.getParent()->getParent()->getParent()->getRegInfo();
+
+  uint64_t ShiftVal = 0;
+  Register ExtReg;
+  AArch64_AM::ShiftExtendType Ext;
+  MachineInstr *RootDef = getDefIgnoringCopies(Root.getReg(), MRI);
+  if (!RootDef)
+    return None;
+
+  if (!isWorthFoldingIntoExtendedReg(*RootDef, MRI))
+    return None;
+
+  // Check if we can fold a shift and an extend.
+  if (RootDef->getOpcode() == TargetOpcode::G_SHL) {
+    // Look for a constant on the RHS of the shift.
+    MachineOperand &RHS = RootDef->getOperand(2);
+    Optional<uint64_t> MaybeShiftVal = getImmedFromMO(RHS);
+    if (!MaybeShiftVal)
+      return None;
+    ShiftVal = *MaybeShiftVal;
+    if (ShiftVal > 4)
+      return None;
+    // Look for a valid extend instruction on the LHS of the shift.
+    MachineOperand &LHS = RootDef->getOperand(1);
+    MachineInstr *ExtDef = getDefIgnoringCopies(LHS.getReg(), MRI);
+    if (!ExtDef)
+      return None;
+    Ext = getExtendTypeForInst(*ExtDef, MRI);
+    if (Ext == AArch64_AM::InvalidShiftExtend)
+      return None;
+    ExtReg = ExtDef->getOperand(1).getReg();
+  } else {
+    // Didn't get a shift. Try just folding an extend.
+    Ext = getExtendTypeForInst(*RootDef, MRI);
+    if (Ext == AArch64_AM::InvalidShiftExtend)
+      return None;
+    ExtReg = RootDef->getOperand(1).getReg();
+
+    // If we have a 32 bit instruction which zeroes out the high half of a
+    // register, we get an implicit zero extend for free. Check if we have one.
+    // FIXME: We actually emit the extend right now even though we don't have
+    // to.
+    if (Ext == AArch64_AM::UXTW && MRI.getType(ExtReg).getSizeInBits() == 32) {
+      MachineInstr *ExtInst = MRI.getVRegDef(ExtReg);
+      if (ExtInst && isDef32(*ExtInst))
+        return None;
+    }
+  }
+
+  // We require a GPR32 here. Narrow the ExtReg if needed using a subregister
+  // copy.
+  MachineIRBuilder MIB(*RootDef);
+  ExtReg = narrowExtendRegIfNeeded(ExtReg, MIB);
+
+  return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },
+           [=](MachineInstrBuilder &MIB) {
+             MIB.addImm(getArithExtendImm(Ext, ShiftVal));
+           }}};
+}
+
 void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB,
                                                 const MachineInstr &MI) const {
   const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
@@ -4506,6 +4659,26 @@ bool AArch64InstructionSelector::isLoadS
   return (*MI.memoperands_begin())->getSize() == NumBytes;
 }
 
+bool AArch64InstructionSelector::isDef32(const MachineInstr &MI) const {
+  const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
+  if (MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() != 32)
+    return false;
+
+  // Only return true if we know the operation will zero-out the high half of
+  // the 64-bit register. Truncates can be subregister copies, which don't
+  // zero out the high bits. Copies and other copy-like instructions can be
+  // fed by truncates, or could be lowered as subregister copies.
+  switch (MI.getOpcode()) {
+  default:
+    return true;
+  case TargetOpcode::COPY:
+  case TargetOpcode::G_BITCAST:
+  case TargetOpcode::G_TRUNC:
+  case TargetOpcode::G_PHI:
+    return false;
+  }
+}
+
 namespace llvm {
 InstructionSelector *
 createAArch64InstructionSelector(const AArch64TargetMachine &TM,

Added: llvm/trunk/test/CodeGen/AArch64/GlobalISel/select-arith-extended-reg.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/GlobalISel/select-arith-extended-reg.mir?rev=370410&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/GlobalISel/select-arith-extended-reg.mir (added)
+++ llvm/trunk/test/CodeGen/AArch64/GlobalISel/select-arith-extended-reg.mir Thu Aug 29 14:53:58 2019
@@ -0,0 +1,634 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=aarch64-unknown-unknown -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
+
+---
+name:            add_sext_s32_to_s64
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $w1, $x2
+    ; CHECK-LABEL: name: add_sext_s32_to_s64
+    ; CHECK: liveins: $w1, $x2
+    ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w1
+    ; CHECK: %add_lhs:gpr64sp = COPY $x2
+    ; CHECK: %res:gpr64sp = ADDXrx %add_lhs, [[COPY]], 48
+    ; CHECK: $x3 = COPY %res
+    ; CHECK: RET_ReallyLR implicit $x3
+    %1:gpr(s32) = COPY $w1
+    %ext:gpr(s64) = G_SEXT %1(s32)
+    %add_lhs:gpr(s64) = COPY $x2
+    %res:gpr(s64) = G_ADD %add_lhs, %ext
+    $x3 = COPY %res(s64)
+    RET_ReallyLR implicit $x3
+...
+---
+name:            add_and_s32_to_s64
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $x1, $x2
+    ; CHECK-LABEL: name: add_and_s32_to_s64
+    ; CHECK: liveins: $x1, $x2
+    ; CHECK: [[COPY:%[0-9]+]]:gpr64all = COPY $x1
+    ; CHECK: [[COPY1:%[0-9]+]]:gpr32all = COPY [[COPY]].sub_32
+    ; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY [[COPY1]]
+    ; CHECK: %add_lhs:gpr64sp = COPY $x2
+    ; CHECK: %res:gpr64sp = ADDXrx %add_lhs, [[COPY2]], 16
+    ; CHECK: $x3 = COPY %res
+    ; CHECK: RET_ReallyLR implicit $x3
+    %1:gpr(s64) = COPY $x1
+    %mask:gpr(s64) = G_CONSTANT i64 4294967295 ; 0xffff
+    %ext:gpr(s64) = G_AND %1(s64), %mask
+    %add_lhs:gpr(s64) = COPY $x2
+    %res:gpr(s64) = G_ADD %add_lhs, %ext
+    $x3 = COPY %res(s64)
+    RET_ReallyLR implicit $x3
+...
+---
+name:            add_sext_s16_to_s32
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $w1, $w2, $x2
+    ; CHECK-LABEL: name: add_sext_s16_to_s32
+    ; CHECK: liveins: $w1, $w2, $x2
+    ; CHECK: %wide_1:gpr32 = COPY $w1
+    ; CHECK: %add_lhs:gpr32sp = COPY $w2
+    ; CHECK: %res:gpr32sp = ADDWrx %add_lhs, %wide_1, 40
+    ; CHECK: $w3 = COPY %res
+    ; CHECK: RET_ReallyLR implicit $w3
+    %wide_1:gpr(s32) = COPY $w1
+    %1:gpr(s16) = G_TRUNC %wide_1
+    %ext:gpr(s32) = G_SEXT %1(s16)
+    %add_lhs:gpr(s32) = COPY $w2
+    %res:gpr(s32) = G_ADD %add_lhs, %ext
+    $w3 = COPY %res(s32)
+    RET_ReallyLR implicit $w3
+...
+---
+name:            add_zext_s16_to_s32
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $w1, $w2, $x2
+    ; CHECK-LABEL: name: add_zext_s16_to_s32
+    ; CHECK: liveins: $w1, $w2, $x2
+    ; CHECK: %wide_1:gpr32 = COPY $w1
+    ; CHECK: %add_lhs:gpr32sp = COPY $w2
+    ; CHECK: %res:gpr32sp = ADDWrx %add_lhs, %wide_1, 8
+    ; CHECK: $w3 = COPY %res
+    ; CHECK: RET_ReallyLR implicit $w3
+    %wide_1:gpr(s32) = COPY $w1
+    %1:gpr(s16) = G_TRUNC %wide_1
+    %ext:gpr(s32) = G_ZEXT %1(s16)
+    %add_lhs:gpr(s32) = COPY $w2
+    %res:gpr(s32) = G_ADD %add_lhs, %ext
+    $w3 = COPY %res(s32)
+    RET_ReallyLR implicit $w3
+...
+---
+name:            add_anyext_s16_to_s32
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $w1, $w2, $x2
+    ; CHECK-LABEL: name: add_anyext_s16_to_s32
+    ; CHECK: liveins: $w1, $w2, $x2
+    ; CHECK: %wide_1:gpr32 = COPY $w1
+    ; CHECK: %add_lhs:gpr32sp = COPY $w2
+    ; CHECK: %res:gpr32sp = ADDWrx %add_lhs, %wide_1, 8
+    ; CHECK: $w3 = COPY %res
+    ; CHECK: RET_ReallyLR implicit $w3
+    %wide_1:gpr(s32) = COPY $w1
+    %1:gpr(s16) = G_TRUNC %wide_1
+    %ext:gpr(s32) = G_ANYEXT %1(s16)
+    %add_lhs:gpr(s32) = COPY $w2
+    %res:gpr(s32) = G_ADD %add_lhs, %ext
+    $w3 = COPY %res(s32)
+    RET_ReallyLR implicit $w3
+...
+---
+name:            add_and_s16_to_s32_uxtb
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $w1, $w2, $x2
+    ; CHECK-LABEL: name: add_and_s16_to_s32_uxtb
+    ; CHECK: liveins: $w1, $w2, $x2
+    ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w1
+    ; CHECK: %add_lhs:gpr32sp = COPY $w2
+    ; CHECK: %res:gpr32sp = ADDWrx %add_lhs, [[COPY]], 0
+    ; CHECK: $w3 = COPY %res
+    ; CHECK: RET_ReallyLR implicit $w3
+    %1:gpr(s32) = COPY $w1
+    %mask:gpr(s32) = G_CONSTANT i32 255 ; 0xff
+    %ext:gpr(s32) = G_AND %1(s32), %mask
+    %add_lhs:gpr(s32) = COPY $w2
+    %res:gpr(s32) = G_ADD %add_lhs, %ext
+    $w3 = COPY %res(s32)
+    RET_ReallyLR implicit $w3
+...
+---
+name:            add_and_s16_to_s32_uxth
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $w1, $w2, $x2
+    ; CHECK-LABEL: name: add_and_s16_to_s32_uxth
+    ; CHECK: liveins: $w1, $w2, $x2
+    ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w1
+    ; CHECK: %add_lhs:gpr32sp = COPY $w2
+    ; CHECK: %res:gpr32sp = ADDWrx %add_lhs, [[COPY]], 8
+    ; CHECK: $w3 = COPY %res
+    ; CHECK: RET_ReallyLR implicit $w3
+    %1:gpr(s32) = COPY $w1
+    %mask:gpr(s32) = G_CONSTANT i32 65535 ; 0xffff
+    %ext:gpr(s32) = G_AND %1(s32), %mask
+    %add_lhs:gpr(s32) = COPY $w2
+    %res:gpr(s32) = G_ADD %add_lhs, %ext
+    $w3 = COPY %res(s32)
+    RET_ReallyLR implicit $w3
+...
+---
+name:            add_sext_s8_to_s32
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $w1, $w2, $x2
+    ; CHECK-LABEL: name: add_sext_s8_to_s32
+    ; CHECK: liveins: $w1, $w2, $x2
+    ; CHECK: %wide_1:gpr32 = COPY $w1
+    ; CHECK: %add_lhs:gpr32sp = COPY $w2
+    ; CHECK: %res:gpr32sp = ADDWrx %add_lhs, %wide_1, 32
+    ; CHECK: $w3 = COPY %res
+    ; CHECK: RET_ReallyLR implicit $w3
+    %wide_1:gpr(s32) = COPY $w1
+    %1:gpr(s8) = G_TRUNC %wide_1
+    %ext:gpr(s32) = G_SEXT %1(s8)
+    %add_lhs:gpr(s32) = COPY $w2
+    %res:gpr(s32) = G_ADD %add_lhs, %ext
+    $w3 = COPY %res(s32)
+    RET_ReallyLR implicit $w3
+...
+---
+name:            add_zext_s8_to_s32
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $w1, $w2, $x2
+    ; CHECK-LABEL: name: add_zext_s8_to_s32
+    ; CHECK: liveins: $w1, $w2, $x2
+    ; CHECK: %wide_1:gpr32 = COPY $w1
+    ; CHECK: %add_lhs:gpr32sp = COPY $w2
+    ; CHECK: %res:gpr32sp = ADDWrx %add_lhs, %wide_1, 0
+    ; CHECK: $w3 = COPY %res
+    ; CHECK: RET_ReallyLR implicit $w3
+    %wide_1:gpr(s32) = COPY $w1
+    %1:gpr(s8) = G_TRUNC %wide_1
+    %ext:gpr(s32) = G_ZEXT %1(s8)
+    %add_lhs:gpr(s32) = COPY $w2
+    %res:gpr(s32) = G_ADD %add_lhs, %ext
+    $w3 = COPY %res(s32)
+    RET_ReallyLR implicit $w3
+...
+---
+name:            add_anyext_s8_to_s32
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $w1, $w2, $x2
+    ; CHECK-LABEL: name: add_anyext_s8_to_s32
+    ; CHECK: liveins: $w1, $w2, $x2
+    ; CHECK: %wide_1:gpr32 = COPY $w1
+    ; CHECK: %add_lhs:gpr32sp = COPY $w2
+    ; CHECK: %res:gpr32sp = ADDWrx %add_lhs, %wide_1, 0
+    ; CHECK: $w3 = COPY %res
+    ; CHECK: RET_ReallyLR implicit $w3
+    %wide_1:gpr(s32) = COPY $w1
+    %1:gpr(s8) = G_TRUNC %wide_1
+    %ext:gpr(s32) = G_ANYEXT %1(s8)
+    %add_lhs:gpr(s32) = COPY $w2
+    %res:gpr(s32) = G_ADD %add_lhs, %ext
+    $w3 = COPY %res(s32)
+    RET_ReallyLR implicit $w3
+...
+---
+name:            add_sext_with_shl
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $w1, $w2, $x2
+    ; CHECK-LABEL: name: add_sext_with_shl
+    ; CHECK: liveins: $w1, $w2, $x2
+    ; CHECK: %wide_1:gpr32 = COPY $w1
+    ; CHECK: %add_lhs:gpr32sp = COPY $w2
+    ; CHECK: %res:gpr32sp = ADDWrx %add_lhs, %wide_1, 43
+    ; CHECK: $w3 = COPY %res
+    ; CHECK: RET_ReallyLR implicit $w3
+    %wide_1:gpr(s32) = COPY $w1
+    %1:gpr(s16) = G_TRUNC %wide_1
+    %ext:gpr(s32) = G_SEXT %1(s16)
+    %imm:gpr(s32) = G_CONSTANT i32 3
+    %shl:gpr(s32) = G_SHL %ext, %imm
+    %add_lhs:gpr(s32) = COPY $w2
+    %res:gpr(s32) = G_ADD %add_lhs, %shl
+    $w3 = COPY %res(s32)
+    RET_ReallyLR implicit $w3
+...
+---
+name:            add_and_with_shl
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $w1, $w2, $x2
+    ; CHECK-LABEL: name: add_and_with_shl
+    ; CHECK: liveins: $w1, $w2, $x2
+    ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w1
+    ; CHECK: %add_lhs:gpr32sp = COPY $w2
+    ; CHECK: %res:gpr32sp = ADDWrx %add_lhs, [[COPY]], 3
+    ; CHECK: $w3 = COPY %res
+    ; CHECK: RET_ReallyLR implicit $w3
+    %1:gpr(s32) = COPY $w1
+    %mask:gpr(s32) = G_CONSTANT i32 255 ; 0xff
+    %ext:gpr(s32) = G_AND %1(s32), %mask
+    %imm:gpr(s32) = G_CONSTANT i32 3
+    %shl:gpr(s32) = G_SHL %ext, %imm
+    %add_lhs:gpr(s32) = COPY $w2
+    %res:gpr(s32) = G_ADD %add_lhs, %shl
+    $w3 = COPY %res(s32)
+    RET_ReallyLR implicit $w3
+...
+---
+name:            dont_fold_invalid_mask
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    ; Check that we only fold when we have a supported AND mask.
+    liveins: $w1, $w2, $x2
+    ; CHECK-LABEL: name: dont_fold_invalid_mask
+    ; CHECK: liveins: $w1, $w2, $x2
+    ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w1
+    ; CHECK: %mask:gpr32 = MOVi32imm 42
+    ; CHECK: %ext:gpr32 = ANDWrr [[COPY]], %mask
+    ; CHECK: %add_lhs:gpr32 = COPY $w2
+    ; CHECK: %res:gpr32 = ADDWrr %add_lhs, %ext
+    ; CHECK: $w3 = COPY %res
+    ; CHECK: RET_ReallyLR implicit $w3
+    %1:gpr(s32) = COPY $w1
+    %mask:gpr(s32) = G_CONSTANT i32 42
+    %ext:gpr(s32) = G_AND %1(s32), %mask
+    %add_lhs:gpr(s32) = COPY $w2
+    %res:gpr(s32) = G_ADD %add_lhs, %ext
+    $w3 = COPY %res(s32)
+    RET_ReallyLR implicit $w3
+...
+---
+name:            dont_fold_invalid_shl
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $w1, $w2, $x2
+    ; CHECK-LABEL: name: dont_fold_invalid_shl
+    ; CHECK: liveins: $w1, $w2, $x2
+    ; CHECK: %wide_1:gpr32 = COPY $w1
+    ; CHECK: %ext:gpr32 = SBFMWri %wide_1, 0, 15
+    ; CHECK: %add_lhs:gpr32 = COPY $w2
+    ; CHECK: %res:gpr32 = ADDWrs %add_lhs, %ext, 5
+    ; CHECK: $w3 = COPY %res
+    ; CHECK: RET_ReallyLR implicit $w3
+    %wide_1:gpr(s32) = COPY $w1
+    %1:gpr(s16) = G_TRUNC %wide_1
+    %ext:gpr(s32) = G_SEXT %1(s16)
+    %imm:gpr(s32) = G_CONSTANT i32 5
+    %shl:gpr(s32) = G_SHL %ext, %imm
+    %add_lhs:gpr(s32) = COPY $w2
+    %res:gpr(s32) = G_ADD %add_lhs, %shl
+    $w3 = COPY %res(s32)
+    RET_ReallyLR implicit $w3
+...
+---
+name:            sub_sext_s32_to_s64
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $w1, $x2
+    ; CHECK-LABEL: name: sub_sext_s32_to_s64
+    ; CHECK: liveins: $w1, $x2
+    ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w1
+    ; CHECK: %sub_lhs:gpr64sp = COPY $x2
+    ; CHECK: %res:gpr64 = SUBSXrx %sub_lhs, [[COPY]], 48, implicit-def $nzcv
+    ; CHECK: $x3 = COPY %res
+    ; CHECK: RET_ReallyLR implicit $x3
+    %1:gpr(s32) = COPY $w1
+    %ext:gpr(s64) = G_SEXT %1(s32)
+    %sub_lhs:gpr(s64) = COPY $x2
+    %res:gpr(s64) = G_SUB %sub_lhs, %ext
+    $x3 = COPY %res(s64)
+    RET_ReallyLR implicit $x3
+...
+---
+name:            sub_sext_s16_to_s32
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $w1, $w2, $x2
+    ; CHECK-LABEL: name: sub_sext_s16_to_s32
+    ; CHECK: liveins: $w1, $w2, $x2
+    ; CHECK: %wide_1:gpr32 = COPY $w1
+    ; CHECK: %sub_lhs:gpr32sp = COPY $w2
+    ; CHECK: %res:gpr32 = SUBSWrx %sub_lhs, %wide_1, 40, implicit-def $nzcv
+    ; CHECK: $w3 = COPY %res
+    ; CHECK: RET_ReallyLR implicit $w3
+    %wide_1:gpr(s32) = COPY $w1
+    %1:gpr(s16) = G_TRUNC %wide_1
+    %ext:gpr(s32) = G_SEXT %1(s16)
+    %sub_lhs:gpr(s32) = COPY $w2
+    %res:gpr(s32) = G_SUB %sub_lhs, %ext
+    $w3 = COPY %res(s32)
+    RET_ReallyLR implicit $w3
+...
+---
+name:            sub_zext_s16_to_s32
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $w1, $w2, $x2
+    ; CHECK-LABEL: name: sub_zext_s16_to_s32
+    ; CHECK: liveins: $w1, $w2, $x2
+    ; CHECK: %wide_1:gpr32 = COPY $w1
+    ; CHECK: %sub_lhs:gpr32sp = COPY $w2
+    ; CHECK: %res:gpr32 = SUBSWrx %sub_lhs, %wide_1, 8, implicit-def $nzcv
+    ; CHECK: $w3 = COPY %res
+    ; CHECK: RET_ReallyLR implicit $w3
+    %wide_1:gpr(s32) = COPY $w1
+    %1:gpr(s16) = G_TRUNC %wide_1
+    %ext:gpr(s32) = G_ZEXT %1(s16)
+    %sub_lhs:gpr(s32) = COPY $w2
+    %res:gpr(s32) = G_SUB %sub_lhs, %ext
+    $w3 = COPY %res(s32)
+    RET_ReallyLR implicit $w3
+...
+---
+name:            sub_anyext_s16_to_s32
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $w1, $w2, $x2
+    ; CHECK-LABEL: name: sub_anyext_s16_to_s32
+    ; CHECK: liveins: $w1, $w2, $x2
+    ; CHECK: %wide_1:gpr32 = COPY $w1
+    ; CHECK: %sub_lhs:gpr32sp = COPY $w2
+    ; CHECK: %res:gpr32 = SUBSWrx %sub_lhs, %wide_1, 8, implicit-def $nzcv
+    ; CHECK: $w3 = COPY %res
+    ; CHECK: RET_ReallyLR implicit $w3
+    %wide_1:gpr(s32) = COPY $w1
+    %1:gpr(s16) = G_TRUNC %wide_1
+    %ext:gpr(s32) = G_ANYEXT %1(s16)
+    %sub_lhs:gpr(s32) = COPY $w2
+    %res:gpr(s32) = G_SUB %sub_lhs, %ext
+    $w3 = COPY %res(s32)
+    RET_ReallyLR implicit $w3
+...
+---
+name:            sub_and_s16_to_s32_uxtb
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $w1, $w2, $x2
+    ; CHECK-LABEL: name: sub_and_s16_to_s32_uxtb
+    ; CHECK: liveins: $w1, $w2, $x2
+    ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w1
+    ; CHECK: %sub_lhs:gpr32sp = COPY $w2
+    ; CHECK: %res:gpr32 = SUBSWrx %sub_lhs, [[COPY]], 0, implicit-def $nzcv
+    ; CHECK: $w3 = COPY %res
+    ; CHECK: RET_ReallyLR implicit $w3
+    %1:gpr(s32) = COPY $w1
+    %mask:gpr(s32) = G_CONSTANT i32 255 ; 0xff
+    %ext:gpr(s32) = G_AND %1(s32), %mask
+    %sub_lhs:gpr(s32) = COPY $w2
+    %res:gpr(s32) = G_SUB %sub_lhs, %ext
+    $w3 = COPY %res(s32)
+    RET_ReallyLR implicit $w3
+...
+---
+name:            sub_and_s16_to_s32_uxth
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $w1, $w2, $x2
+    ; CHECK-LABEL: name: sub_and_s16_to_s32_uxth
+    ; CHECK: liveins: $w1, $w2, $x2
+    ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w1
+    ; CHECK: %sub_lhs:gpr32sp = COPY $w2
+    ; CHECK: %res:gpr32 = SUBSWrx %sub_lhs, [[COPY]], 8, implicit-def $nzcv
+    ; CHECK: $w3 = COPY %res
+    ; CHECK: RET_ReallyLR implicit $w3
+    %1:gpr(s32) = COPY $w1
+    %mask:gpr(s32) = G_CONSTANT i32 65535 ; 0xffff
+    %ext:gpr(s32) = G_AND %1(s32), %mask
+    %sub_lhs:gpr(s32) = COPY $w2
+    %res:gpr(s32) = G_SUB %sub_lhs, %ext
+    $w3 = COPY %res(s32)
+    RET_ReallyLR implicit $w3
+---
+name:            sub_sext_s8_to_s32
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $w1, $w2, $x2
+    %wide_1:gpr(s32) = COPY $w1
+    %1:gpr(s8) = G_TRUNC %wide_1
+    %ext:gpr(s32) = G_SEXT %1(s8)
+    %sub_lhs:gpr(s32) = COPY $w2
+    %res:gpr(s32) = G_SUB %sub_lhs, %ext
+    $w3 = COPY %res(s32)
+    RET_ReallyLR implicit $w3
+...
+---
+name:            sub_zext_s8_to_s32
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $w1, $w2, $x2
+    ; CHECK-LABEL: name: sub_zext_s8_to_s32
+    ; CHECK: liveins: $w1, $w2, $x2
+    ; CHECK: %wide_1:gpr32 = COPY $w1
+    ; CHECK: %sub_lhs:gpr32sp = COPY $w2
+    ; CHECK: %res:gpr32 = SUBSWrx %sub_lhs, %wide_1, 0, implicit-def $nzcv
+    ; CHECK: $w3 = COPY %res
+    ; CHECK: RET_ReallyLR implicit $w3
+    %wide_1:gpr(s32) = COPY $w1
+    %1:gpr(s8) = G_TRUNC %wide_1
+    %ext:gpr(s32) = G_ZEXT %1(s8)
+    %sub_lhs:gpr(s32) = COPY $w2
+    %res:gpr(s32) = G_SUB %sub_lhs, %ext
+    $w3 = COPY %res(s32)
+    RET_ReallyLR implicit $w3
+...
+---
+name:            sub_anyext_s8_to_s32
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $w1, $w2, $x2
+    ; CHECK-LABEL: name: sub_anyext_s8_to_s32
+    ; CHECK: liveins: $w1, $w2, $x2
+    ; CHECK: %wide_1:gpr32 = COPY $w1
+    ; CHECK: %sub_lhs:gpr32sp = COPY $w2
+    ; CHECK: %res:gpr32 = SUBSWrx %sub_lhs, %wide_1, 0, implicit-def $nzcv
+    ; CHECK: $w3 = COPY %res
+    ; CHECK: RET_ReallyLR implicit $w3
+    %wide_1:gpr(s32) = COPY $w1
+    %1:gpr(s8) = G_TRUNC %wide_1
+    %ext:gpr(s32) = G_ANYEXT %1(s8)
+    %sub_lhs:gpr(s32) = COPY $w2
+    %res:gpr(s32) = G_SUB %sub_lhs, %ext
+    $w3 = COPY %res(s32)
+    RET_ReallyLR implicit $w3
+---
+...
+---
+name:            sub_sext_with_shl
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $w1, $w2, $x2
+    ; CHECK-LABEL: name: sub_sext_with_shl
+    ; CHECK: liveins: $w1, $w2, $x2
+    ; CHECK: %wide_1:gpr32 = COPY $w1
+    ; CHECK: %sub_lhs:gpr32sp = COPY $w2
+    ; CHECK: %res:gpr32 = SUBSWrx %sub_lhs, %wide_1, 43, implicit-def $nzcv
+    ; CHECK: $w3 = COPY %res
+    ; CHECK: RET_ReallyLR implicit $w3
+    %wide_1:gpr(s32) = COPY $w1
+    %1:gpr(s16) = G_TRUNC %wide_1
+    %ext:gpr(s32) = G_SEXT %1(s16)
+    %imm:gpr(s32) = G_CONSTANT i32 3
+    %shl:gpr(s32) = G_SHL %ext, %imm
+    %sub_lhs:gpr(s32) = COPY $w2
+    %res:gpr(s32) = G_SUB %sub_lhs, %shl
+    $w3 = COPY %res(s32)
+    RET_ReallyLR implicit $w3
+...
+---
+name:            sub_and_with_shl
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $w1, $w2, $x2
+    ; CHECK-LABEL: name: sub_and_with_shl
+    ; CHECK: liveins: $w1, $w2, $x2
+    ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w1
+    ; CHECK: %sub_lhs:gpr32sp = COPY $w2
+    ; CHECK: %res:gpr32 = SUBSWrx %sub_lhs, [[COPY]], 3, implicit-def $nzcv
+    ; CHECK: $w3 = COPY %res
+    ; CHECK: RET_ReallyLR implicit $w3
+    %1:gpr(s32) = COPY $w1
+    %mask:gpr(s32) = G_CONSTANT i32 255 ; 0xff
+    %ext:gpr(s32) = G_AND %1(s32), %mask
+    %imm:gpr(s32) = G_CONSTANT i32 3
+    %shl:gpr(s32) = G_SHL %ext, %imm
+    %sub_lhs:gpr(s32) = COPY $w2
+    %res:gpr(s32) = G_SUB %sub_lhs, %shl
+    $w3 = COPY %res(s32)
+    RET_ReallyLR implicit $w3

Modified: llvm/trunk/test/CodeGen/AArch64/addsub_ext.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/addsub_ext.ll?rev=370410&r1=370409&r2=370410&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/addsub_ext.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/addsub_ext.ll Thu Aug 29 14:53:58 2019
@@ -1,4 +1,9 @@
-; RUN: llc -verify-machineinstrs %s -o - -mtriple=aarch64-linux-gnu -aarch64-enable-atomic-cfg-tidy=0 | FileCheck %s
+; RUN: llc -enable-machine-outliner=never -verify-machineinstrs %s -o - -mtriple=aarch64-linux-gnu -aarch64-enable-atomic-cfg-tidy=0 | FileCheck %s
+; RUN: llc -global-isel -enable-machine-outliner=never -verify-machineinstrs %s -o - -mtriple=aarch64-linux-gnu -aarch64-enable-atomic-cfg-tidy=0 | FileCheck %s --check-prefix=GISEL
+
+; FIXME: GISel only knows how to handle explicit G_SEXT instructions. So when
+; G_SEXT is lowered to anything else, it won't fold in a stx*.
+; FIXME: GISel doesn't currently handle folding the addressing mode into a cmp.
 
 @var8 = global i8 0
 @var16 = global i16 0
@@ -7,6 +12,7 @@
 
 define void @addsub_i8rhs() minsize {
 ; CHECK-LABEL: addsub_i8rhs:
+; GISEL-LABEL: addsub_i8rhs:
     %val8_tmp = load i8, i8* @var8
     %lhs32 = load i32, i32* @var32
     %lhs64 = load i64, i64* @var64
@@ -20,23 +26,26 @@ define void @addsub_i8rhs() minsize {
     %res32_zext = add i32 %lhs32, %rhs32_zext
     store volatile i32 %res32_zext, i32* @var32
 ; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxtb
+; GISEL: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxtb
 
    %rhs32_zext_shift = shl i32 %rhs32_zext, 3
    %res32_zext_shift = add i32 %lhs32, %rhs32_zext_shift
    store volatile i32 %res32_zext_shift, i32* @var32
 ; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxtb #3
-
+; GISEL: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxtb #3
 
 ; Zero-extending to 64-bits
     %rhs64_zext = zext i8 %val8 to i64
     %res64_zext = add i64 %lhs64, %rhs64_zext
     store volatile i64 %res64_zext, i64* @var64
 ; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtb
+; GISEL: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtb
 
    %rhs64_zext_shift = shl i64 %rhs64_zext, 1
    %res64_zext_shift = add i64 %lhs64, %rhs64_zext_shift
    store volatile i64 %res64_zext_shift, i64* @var64
 ; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtb #1
+; GISEL: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtb #1
 
 ; Sign-extending to 32-bits
     %rhs32_sext = sext i8 %val8 to i32
@@ -95,23 +104,26 @@ define void @sub_i8rhs() minsize {
     %res32_zext = sub i32 %lhs32, %rhs32_zext
     store volatile i32 %res32_zext, i32* @var32
 ; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxtb
+; GISEL: subs {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxtb
 
    %rhs32_zext_shift = shl i32 %rhs32_zext, 3
    %res32_zext_shift = sub i32 %lhs32, %rhs32_zext_shift
    store volatile i32 %res32_zext_shift, i32* @var32
 ; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxtb #3
-
+; GISEL: subs {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxtb #3
 
 ; Zero-extending to 64-bits
     %rhs64_zext = zext i8 %val8 to i64
     %res64_zext = sub i64 %lhs64, %rhs64_zext
     store volatile i64 %res64_zext, i64* @var64
 ; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtb
+; GISEL: subs {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtb
 
    %rhs64_zext_shift = shl i64 %rhs64_zext, 1
    %res64_zext_shift = sub i64 %lhs64, %rhs64_zext_shift
    store volatile i64 %res64_zext_shift, i64* @var64
 ; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtb #1
+; GISEL: subs {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtb #1
 
 ; Sign-extending to 32-bits
     %rhs32_sext = sext i8 %val8 to i32
@@ -140,6 +152,7 @@ define void @sub_i8rhs() minsize {
 
 define void @addsub_i16rhs() minsize {
 ; CHECK-LABEL: addsub_i16rhs:
+; GISEL-LABEL: addsub_i16rhs:
     %val16_tmp = load i16, i16* @var16
     %lhs32 = load i32, i32* @var32
     %lhs64 = load i64, i64* @var64
@@ -153,23 +166,26 @@ define void @addsub_i16rhs() minsize {
     %res32_zext = add i32 %lhs32, %rhs32_zext
     store volatile i32 %res32_zext, i32* @var32
 ; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxth
+; GISEL: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxth
 
    %rhs32_zext_shift = shl i32 %rhs32_zext, 3
    %res32_zext_shift = add i32 %lhs32, %rhs32_zext_shift
    store volatile i32 %res32_zext_shift, i32* @var32
 ; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxth #3
-
+; GISEL: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxth #3
 
 ; Zero-extending to 64-bits
     %rhs64_zext = zext i16 %val16 to i64
     %res64_zext = add i64 %lhs64, %rhs64_zext
     store volatile i64 %res64_zext, i64* @var64
 ; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxth
+; GISEL: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxth
 
    %rhs64_zext_shift = shl i64 %rhs64_zext, 1
    %res64_zext_shift = add i64 %lhs64, %rhs64_zext_shift
    store volatile i64 %res64_zext_shift, i64* @var64
 ; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxth #1
+; GISEL: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxth #1
 
 ; Sign-extending to 32-bits
     %rhs32_sext = sext i16 %val16 to i32
@@ -215,6 +231,7 @@ end:
 
 define void @sub_i16rhs() minsize {
 ; CHECK-LABEL: sub_i16rhs:
+; GISEL-LABEL: sub_i16rhs:
     %val16_tmp = load i16, i16* @var16
     %lhs32 = load i32, i32* @var32
     %lhs64 = load i64, i64* @var64
@@ -228,23 +245,26 @@ define void @sub_i16rhs() minsize {
     %res32_zext = sub i32 %lhs32, %rhs32_zext
     store volatile i32 %res32_zext, i32* @var32
 ; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxth
+; GISEL: subs {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxth
 
    %rhs32_zext_shift = shl i32 %rhs32_zext, 3
    %res32_zext_shift = sub i32 %lhs32, %rhs32_zext_shift
    store volatile i32 %res32_zext_shift, i32* @var32
 ; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxth #3
-
+; GISEL: subs {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxth #3
 
 ; Zero-extending to 64-bits
     %rhs64_zext = zext i16 %val16 to i64
     %res64_zext = sub i64 %lhs64, %rhs64_zext
     store volatile i64 %res64_zext, i64* @var64
 ; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxth
+; GISEL: subs {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxth
 
    %rhs64_zext_shift = shl i64 %rhs64_zext, 1
    %res64_zext_shift = sub i64 %lhs64, %rhs64_zext_shift
    store volatile i64 %res64_zext_shift, i64* @var64
 ; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxth #1
+; GISEL: subs {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxth #1
 
 ; Sign-extending to 32-bits
     %rhs32_sext = sext i16 %val16 to i32
@@ -276,6 +296,7 @@ define void @sub_i16rhs() minsize {
 ; in the face of "add/sub (shifted register)" so I don't intend to.
 define void @addsub_i32rhs(i32 %in32) minsize {
 ; CHECK-LABEL: addsub_i32rhs:
+; GISEL-LABEL: addsub_i32rhs:
     %val32_tmp = load i32, i32* @var32
     %lhs64 = load i64, i64* @var64
 
@@ -285,22 +306,26 @@ define void @addsub_i32rhs(i32 %in32) mi
     %res64_zext = add i64 %lhs64, %rhs64_zext
     store volatile i64 %res64_zext, i64* @var64
 ; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtw
+; GISEL: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtw
 
     %rhs64_zext2 = zext i32 %val32 to i64
     %rhs64_zext_shift = shl i64 %rhs64_zext2, 2
     %res64_zext_shift = add i64 %lhs64, %rhs64_zext_shift
     store volatile i64 %res64_zext_shift, i64* @var64
 ; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtw #2
+; GISEL: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtw #2
 
     %rhs64_sext = sext i32 %val32 to i64
     %res64_sext = add i64 %lhs64, %rhs64_sext
     store volatile i64 %res64_sext, i64* @var64
 ; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxtw
+; GISEL: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxtw
 
     %rhs64_sext_shift = shl i64 %rhs64_sext, 2
     %res64_sext_shift = add i64 %lhs64, %rhs64_sext_shift
     store volatile i64 %res64_sext_shift, i64* @var64
 ; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxtw #2
+; GISEL: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxtw #2
 
     ret void
 }
@@ -316,12 +341,14 @@ define void @sub_i32rhs(i32 %in32) minsi
     %res64_zext = sub i64 %lhs64, %rhs64_zext
     store volatile i64 %res64_zext, i64* @var64
 ; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtw
+; GISEL: subs {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtw
 
     %rhs64_zext2 = zext i32 %val32 to i64
     %rhs64_zext_shift = shl i64 %rhs64_zext2, 2
     %res64_zext_shift = sub i64 %lhs64, %rhs64_zext_shift
     store volatile i64 %res64_zext_shift, i64* @var64
 ; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtw #2
+; GISEL: subs {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtw #2
 
     %rhs64_sext = sext i32 %val32 to i64
     %res64_sext = sub i64 %lhs64, %rhs64_sext
@@ -339,11 +366,15 @@ define void @sub_i32rhs(i32 %in32) minsi
 ; Check that implicit zext from w reg write is used instead of uxtw form of add.
 define i64 @add_fold_uxtw(i32 %x, i64 %y) {
 ; CHECK-LABEL: add_fold_uxtw:
+; GISEL-LABEL: add_fold_uxtw:
 entry:
 ; CHECK: and w[[TMP:[0-9]+]], w0, #0x3
+; GISEL: and w[[TMP:[0-9]+]], w0, #0x3
+; FIXME: Global ISel produces an unncessary ubfx here.
   %m = and i32 %x, 3
   %ext = zext i32 %m to i64
 ; CHECK-NEXT: add x0, x1, x[[TMP]]
+; GISEL: add x0, x1, x[[TMP]]
   %ret = add i64 %y, %ext
   ret i64 %ret
 }
@@ -352,11 +383,14 @@ entry:
 ; form of sub and that mov WZR is folded to form a neg instruction.
 define i64 @sub_fold_uxtw_xzr(i32 %x)  {
 ; CHECK-LABEL: sub_fold_uxtw_xzr:
+; GISEL-LABEL: sub_fold_uxtw_xzr:
 entry:
 ; CHECK: and w[[TMP:[0-9]+]], w0, #0x3
+; GISEL: and w[[TMP:[0-9]+]], w0, #0x3
   %m = and i32 %x, 3
   %ext = zext i32 %m to i64
 ; CHECK-NEXT: neg x0, x[[TMP]]
+; GISEL: negs x0, x[[TMP]]
   %ret = sub i64 0, %ext
   ret i64 %ret
 }
@@ -378,10 +412,13 @@ entry:
 ; form of add, leading to madd selection.
 define i64 @madd_fold_uxtw(i32 %x, i64 %y) {
 ; CHECK-LABEL: madd_fold_uxtw:
+; GISEL-LABEL: madd_fold_uxtw:
 entry:
 ; CHECK: and w[[TMP:[0-9]+]], w0, #0x3
+; GISEL: and w[[TMP:[0-9]+]], w0, #0x3
   %m = and i32 %x, 3
   %ext = zext i32 %m to i64
+; GISEL: madd x0, x1, x1, x[[TMP]]
 ; CHECK-NEXT: madd x0, x1, x1, x[[TMP]]
   %mul = mul i64 %y, %y
   %ret = add i64 %mul, %ext
@@ -408,11 +445,14 @@ entry:
 ; form of add and add of -1 gets selected as sub.
 define i64 @add_imm_fold_uxtw(i32 %x) {
 ; CHECK-LABEL: add_imm_fold_uxtw:
+; GISEL-LABEL: add_imm_fold_uxtw:
 entry:
 ; CHECK: and w[[TMP:[0-9]+]], w0, #0x3
+; GISEL: and w[[TMP:[0-9]+]], w0, #0x3
   %m = and i32 %x, 3
   %ext = zext i32 %m to i64
 ; CHECK-NEXT: sub x0, x[[TMP]], #1
+; GISEL: subs x0, x[[TMP]], #1
   %ret = add i64 %ext, -1
   ret i64 %ret
 }
@@ -421,12 +461,15 @@ entry:
 ; form of add and add lsl form gets selected.
 define i64 @add_lsl_fold_uxtw(i32 %x, i64 %y) {
 ; CHECK-LABEL: add_lsl_fold_uxtw:
+; GISEL-LABEL: add_lsl_fold_uxtw:
 entry:
 ; CHECK: orr w[[TMP:[0-9]+]], w0, #0x3
+; GISEL: orr w[[TMP:[0-9]+]], w0, #0x3
   %m = or i32 %x, 3
   %ext = zext i32 %m to i64
   %shift = shl i64 %y, 3
 ; CHECK-NEXT: add x0, x[[TMP]], x1, lsl #3
+; GISEL: add x0, x[[TMP]], x1, lsl #3
   %ret = add i64 %ext, %shift
   ret i64 %ret
 }