[llvm] 2262393 - [AArch64][GlobalISel] Port some AArch64 target specific MUL combines from SDAG.

Tue Nov 10 22:21:31 PST 2020

Author: Amara Emerson
Date: 2020-11-10T22:21:13-08:00
New Revision: 22623930903dd7922a0ee06d4f43fc74e0053e3d

URL: https://github.com/llvm/llvm-project/commit/22623930903dd7922a0ee06d4f43fc74e0053e3d
DIFF: https://github.com/llvm/llvm-project/commit/22623930903dd7922a0ee06d4f43fc74e0053e3d.diff

LOG: [AArch64][GlobalISel] Port some AArch64 target specific MUL combines from SDAG.

These do things like turn a multiply of a pow-2+1 into a shift and and add,
which is a common pattern that pops up, and is universally better than expensive
madd instructions with a constant.

I've added check lines to an existing codegen test since the code being ported
is almost identical, however the mul by negative pow2 constant tests don't generate
the same code because we're missing some generic G_MUL combines still.

Differential Revision: https://reviews.llvm.org/D91125

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64Combine.td
    llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
    llvm/test/CodeGen/AArch64/mul_pow2.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td
index 8cbf5931390e..b1e714653f46 100644

--- a/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -111,6 +111,14 @@ def extractvecelt_pairwise_add : GICombineRule<
   (apply [{ applyExtractVecEltPairwiseAdd(*${root}, MRI, B, ${matchinfo}); }])
 >;
 
+def mul_const_matchdata : GIDefMatchData<"std::function<void(MachineIRBuilder&, Register)>">;
+def mul_const : GICombineRule<
+  (defs root:$root, mul_const_matchdata:$matchinfo),
+  (match (wip_match_opcode G_MUL):$root,
+          [{ return matchAArch64MulConstCombine(*${root}, MRI, ${matchinfo}); }]),
+  (apply [{ applyAArch64MulConstCombine(*${root}, MRI, B, ${matchinfo}); }])
+>;
+
 // Post-legalization combines which should happen at all optimization levels.
 // (E.g. ones that facilitate matching for the selector) For example, matching
 // pseudos.
@@ -128,6 +136,7 @@ def AArch64PostLegalizerCombinerHelper
                         sext_trunc_sextload,
                         hoist_logic_op_with_same_opcode_hands,
                         redundant_and, xor_of_and_with_same_reg,
-                        extractvecelt_pairwise_add, redundant_or]> {
+                        extractvecelt_pairwise_add, redundant_or,
+                        mul_const]> {
   let DisableRuleOption = "aarch64postlegalizercombiner-disable-rule";
 }

diff  --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
index 17520ded4ba7..ce298c766e41 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
@@ -28,6 +28,7 @@
 #include "llvm/CodeGen/GlobalISel/Utils.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/TargetOpcodes.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
 #include "llvm/Support/Debug.h"
@@ -104,6 +105,138 @@ bool applyExtractVecEltPairwiseAdd(
   return true;
 }
 
+static bool isSignExtended(Register R, MachineRegisterInfo &MRI) {
+  // TODO: check if extended build vector as well.
+  unsigned Opc = MRI.getVRegDef(R)->getOpcode();
+  return Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG;
+}
+
+static bool isZeroExtended(Register R, MachineRegisterInfo &MRI) {
+  // TODO: check if extended build vector as well.
+  return MRI.getVRegDef(R)->getOpcode() == TargetOpcode::G_ZEXT;
+}
+
+bool matchAArch64MulConstCombine(
+    MachineInstr &MI, MachineRegisterInfo &MRI,
+    std::function<void(MachineIRBuilder &B, Register DstReg)> &ApplyFn) {
+  assert(MI.getOpcode() == TargetOpcode::G_MUL);
+  Register LHS = MI.getOperand(1).getReg();
+  Register RHS = MI.getOperand(2).getReg();
+  Register Dst = MI.getOperand(0).getReg();
+  const LLT Ty = MRI.getType(LHS);
+
+  // The below optimizations require a constant RHS.
+  auto Const = getConstantVRegValWithLookThrough(RHS, MRI);
+  if (!Const)
+    return false;
+
+  const APInt &ConstValue = APInt(Ty.getSizeInBits(), Const->Value, true);
+  // The following code is ported from AArch64ISelLowering.
+  // Multiplication of a power of two plus/minus one can be done more
+  // cheaply as as shift+add/sub. For now, this is true unilaterally. If
+  // future CPUs have a cheaper MADD instruction, this may need to be
+  // gated on a subtarget feature. For Cyclone, 32-bit MADD is 4 cycles and
+  // 64-bit is 5 cycles, so this is always a win.
+  // More aggressively, some multiplications N0 * C can be lowered to
+  // shift+add+shift if the constant C = A * B where A = 2^N + 1 and B = 2^M,
+  // e.g. 6=3*2=(2+1)*2.
+  // TODO: consider lowering more cases, e.g. C = 14, -6, -14 or even 45
+  // which equals to (1+2)*16-(1+2).
+  // TrailingZeroes is used to test if the mul can be lowered to
+  // shift+add+shift.
+  unsigned TrailingZeroes = ConstValue.countTrailingZeros();
+  if (TrailingZeroes) {
+    // Conservatively do not lower to shift+add+shift if the mul might be
+    // folded into smul or umul.
+    if (MRI.hasOneNonDBGUse(LHS) &&
+        (isSignExtended(LHS, MRI) || isZeroExtended(LHS, MRI)))
+      return false;
+    // Conservatively do not lower to shift+add+shift if the mul might be
+    // folded into madd or msub.
+    if (MRI.hasOneNonDBGUse(Dst)) {
+      MachineInstr &UseMI = *MRI.use_instr_begin(Dst);
+      if (UseMI.getOpcode() == TargetOpcode::G_ADD ||
+          UseMI.getOpcode() == TargetOpcode::G_SUB)
+        return false;
+    }
+  }
+  // Use ShiftedConstValue instead of ConstValue to support both shift+add/sub
+  // and shift+add+shift.
+  APInt ShiftedConstValue = ConstValue.ashr(TrailingZeroes);
+
+  unsigned ShiftAmt, AddSubOpc;
+  // Is the shifted value the LHS operand of the add/sub?
+  bool ShiftValUseIsLHS = true;
+  // Do we need to negate the result?
+  bool NegateResult = false;
+
+  if (ConstValue.isNonNegative()) {
+    // (mul x, 2^N + 1) => (add (shl x, N), x)
+    // (mul x, 2^N - 1) => (sub (shl x, N), x)
+    // (mul x, (2^N + 1) * 2^M) => (shl (add (shl x, N), x), M)
+    APInt SCVMinus1 = ShiftedConstValue - 1;
+    APInt CVPlus1 = ConstValue + 1;
+    if (SCVMinus1.isPowerOf2()) {
+      ShiftAmt = SCVMinus1.logBase2();
+      AddSubOpc = TargetOpcode::G_ADD;
+    } else if (CVPlus1.isPowerOf2()) {
+      ShiftAmt = CVPlus1.logBase2();
+      AddSubOpc = TargetOpcode::G_SUB;
+    } else
+      return false;
+  } else {
+    // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
+    // (mul x, -(2^N + 1)) => - (add (shl x, N), x)
+    APInt CVNegPlus1 = -ConstValue + 1;
+    APInt CVNegMinus1 = -ConstValue - 1;
+    if (CVNegPlus1.isPowerOf2()) {
+      ShiftAmt = CVNegPlus1.logBase2();
+      AddSubOpc = TargetOpcode::G_SUB;
+      ShiftValUseIsLHS = false;
+    } else if (CVNegMinus1.isPowerOf2()) {
+      ShiftAmt = CVNegMinus1.logBase2();
+      AddSubOpc = TargetOpcode::G_ADD;
+      NegateResult = true;
+    } else
+      return false;
+  }
+
+  if (NegateResult && TrailingZeroes)
+    return false;
+
+  ApplyFn = [=](MachineIRBuilder &B, Register DstReg) {
+    auto Shift = B.buildConstant(LLT::scalar(64), ShiftAmt);
+    auto ShiftedVal = B.buildShl(Ty, LHS, Shift);
+
+    Register AddSubLHS = ShiftValUseIsLHS ? ShiftedVal.getReg(0) : LHS;
+    Register AddSubRHS = ShiftValUseIsLHS ? LHS : ShiftedVal.getReg(0);
+    auto Res = B.buildInstr(AddSubOpc, {Ty}, {AddSubLHS, AddSubRHS});
+    assert(!(NegateResult && TrailingZeroes) &&
+           "NegateResult and TrailingZeroes cannot both be true for now.");
+    // Negate the result.
+    if (NegateResult) {
+      B.buildSub(DstReg, B.buildConstant(Ty, 0), Res);
+      return;
+    }
+    // Shift the result.
+    if (TrailingZeroes) {
+      B.buildShl(DstReg, Res, B.buildConstant(LLT::scalar(64), TrailingZeroes));
+      return;
+    }
+    B.buildCopy(DstReg, Res.getReg(0));
+  };
+  return true;
+}
+
+bool applyAArch64MulConstCombine(
+    MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B,
+    std::function<void(MachineIRBuilder &B, Register DstReg)> &ApplyFn) {
+  B.setInstrAndDebugLoc(MI);
+  ApplyFn(B, MI.getOperand(0).getReg());
+  MI.eraseFromParent();
+  return true;
+}
+
 #define AARCH64POSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
 #include "AArch64GenPostLegalizeGICombiner.inc"
 #undef AARCH64POSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS

diff  --git a/llvm/test/CodeGen/AArch64/mul_pow2.ll b/llvm/test/CodeGen/AArch64/mul_pow2.ll
index 80a7b7200806..59ac56f34aa2 100644
--- a/llvm/test/CodeGen/AArch64/mul_pow2.ll
+++ b/llvm/test/CodeGen/AArch64/mul_pow2.ll
@@ -1,4 +1,6 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=aarch64-eabi | FileCheck %s
+; RUN: llc < %s -mtriple=aarch64-eabi -global-isel -global-isel-abort=1 | FileCheck %s --check-prefix=GISEL
 
 ; Convert mul x, pow2 to shift.
 ; Convert mul x, pow2 +/- 1 to shift + add/sub.
@@ -6,32 +8,60 @@
 ; Lowering other positive constants are not supported yet.
 
 define i32 @test2(i32 %x) {
-; CHECK-LABEL: test2
-; CHECK: lsl w0, w0, #1
+; CHECK-LABEL: test2:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    lsl w0, w0, #1
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: test2:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    lsl w0, w0, #1
+; GISEL-NEXT:    ret
 
   %mul = shl nsw i32 %x, 1
   ret i32 %mul
 }
 
 define i32 @test3(i32 %x) {
-; CHECK-LABEL: test3
-; CHECK: add w0, w0, w0, lsl #1
+; CHECK-LABEL: test3:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    add w0, w0, w0, lsl #1
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: test3:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    add w0, w0, w0, lsl #1
+; GISEL-NEXT:    ret
 
   %mul = mul nsw i32 %x, 3
   ret i32 %mul
 }
 
 define i32 @test4(i32 %x) {
-; CHECK-LABEL: test4
-; CHECK: lsl w0, w0, #2
+; CHECK-LABEL: test4:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    lsl w0, w0, #2
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: test4:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    lsl w0, w0, #2
+; GISEL-NEXT:    ret
 
   %mul = shl nsw i32 %x, 2
   ret i32 %mul
 }
 
 define i32 @test5(i32 %x) {
-; CHECK-LABEL: test5
-; CHECK: add w0, w0, w0, lsl #2
+; CHECK-LABEL: test5:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    add w0, w0, w0, lsl #2
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: test5:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    add w0, w0, w0, lsl #2
+; GISEL-NEXT:    ret
 
 
   %mul = mul nsw i32 %x, 5
@@ -39,200 +69,386 @@ define i32 @test5(i32 %x) {
 }
 
 define i32 @test6_32b(i32 %x) {
-; CHECK-LABEL: test6
-; CHECK: add {{w[0-9]+}}, w0, w0, lsl #1
-; CHECK: lsl w0, {{w[0-9]+}}, #1
-
-  %mul = mul nsw i32 %x, 6 
+; CHECK-LABEL: test6_32b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    add w8, w0, w0, lsl #1
+; CHECK-NEXT:    lsl w0, w8, #1
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: test6_32b:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    add w8, w0, w0, lsl #1
+; GISEL-NEXT:    lsl w0, w8, #1
+; GISEL-NEXT:    ret
+
+  %mul = mul nsw i32 %x, 6
   ret i32 %mul
 }
 
 define i64 @test6_64b(i64 %x) {
-; CHECK-LABEL: test6_64b
-; CHECK: add {{x[0-9]+}}, x0, x0, lsl #1
-; CHECK: lsl x0, {{x[0-9]+}}, #1
-
-  %mul = mul nsw i64 %x, 6 
+; CHECK-LABEL: test6_64b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    add x8, x0, x0, lsl #1
+; CHECK-NEXT:    lsl x0, x8, #1
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: test6_64b:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    add x8, x0, x0, lsl #1
+; GISEL-NEXT:    lsl x0, x8, #1
+; GISEL-NEXT:    ret
+
+  %mul = mul nsw i64 %x, 6
   ret i64 %mul
 }
 
-; mul that appears together with add, sub, s(z)ext is not supported to be 
+; mul that appears together with add, sub, s(z)ext is not supported to be
 ; converted to the combination of lsl, add/sub yet.
 define i64 @test6_umull(i32 %x) {
-; CHECK-LABEL: test6_umull
-; CHECK: umull x0, w0, {{w[0-9]+}} 
+; CHECK-LABEL: test6_umull:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #6
+; CHECK-NEXT:    umull x0, w0, w8
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: test6_umull:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    mov w8, #6
+; GISEL-NEXT:    umull x0, w0, w8
+; GISEL-NEXT:    ret
 
   %ext = zext i32 %x to i64
-  %mul = mul nsw i64 %ext, 6 
+  %mul = mul nsw i64 %ext, 6
   ret i64 %mul
 }
 
 define i64 @test6_smull(i32 %x) {
-; CHECK-LABEL: test6_smull
-; CHECK: smull x0, w0, {{w[0-9]+}} 
+; CHECK-LABEL: test6_smull:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #6
+; CHECK-NEXT:    smull x0, w0, w8
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: test6_smull:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    mov w8, #6
+; GISEL-NEXT:    smull x0, w0, w8
+; GISEL-NEXT:    ret
 
   %ext = sext i32 %x to i64
-  %mul = mul nsw i64 %ext, 6 
+  %mul = mul nsw i64 %ext, 6
   ret i64 %mul
 }
 
 define i32 @test6_madd(i32 %x, i32 %y) {
-; CHECK-LABEL: test6_madd
-; CHECK: madd w0, w0, {{w[0-9]+}}, w1 
-
-  %mul = mul nsw i32 %x, 6 
+; CHECK-LABEL: test6_madd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #6
+; CHECK-NEXT:    madd w0, w0, w8, w1
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: test6_madd:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    mov w8, #6
+; GISEL-NEXT:    madd w0, w0, w8, w1
+; GISEL-NEXT:    ret
+
+  %mul = mul nsw i32 %x, 6
   %add = add i32 %mul, %y
   ret i32 %add
 }
 
 define i32 @test6_msub(i32 %x, i32 %y) {
-; CHECK-LABEL: test6_msub
-; CHECK: msub w0, w0, {{w[0-9]+}}, w1 
-
-  %mul = mul nsw i32 %x, 6 
+; CHECK-LABEL: test6_msub:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #6
+; CHECK-NEXT:    msub w0, w0, w8, w1
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: test6_msub:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    mov w8, #6
+; GISEL-NEXT:    msub w0, w0, w8, w1
+; GISEL-NEXT:    ret
+
+  %mul = mul nsw i32 %x, 6
   %sub = sub i32 %y, %mul
   ret i32 %sub
 }
 
 define i64 @test6_umaddl(i32 %x, i64 %y) {
-; CHECK-LABEL: test6_umaddl
-; CHECK: umaddl x0, w0, {{w[0-9]+}}, x1 
+; CHECK-LABEL: test6_umaddl:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #6
+; CHECK-NEXT:    umaddl x0, w0, w8, x1
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: test6_umaddl:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    mov w8, #6
+; GISEL-NEXT:    umaddl x0, w0, w8, x1
+; GISEL-NEXT:    ret
 
   %ext = zext i32 %x to i64
-  %mul = mul nsw i64 %ext, 6 
+  %mul = mul nsw i64 %ext, 6
   %add = add i64 %mul, %y
   ret i64 %add
 }
 
 define i64 @test6_smaddl(i32 %x, i64 %y) {
-; CHECK-LABEL: test6_smaddl
-; CHECK: smaddl x0, w0, {{w[0-9]+}}, x1
+; CHECK-LABEL: test6_smaddl:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #6
+; CHECK-NEXT:    smaddl x0, w0, w8, x1
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: test6_smaddl:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    mov w8, #6
+; GISEL-NEXT:    smaddl x0, w0, w8, x1
+; GISEL-NEXT:    ret
 
   %ext = sext i32 %x to i64
-  %mul = mul nsw i64 %ext, 6 
+  %mul = mul nsw i64 %ext, 6
   %add = add i64 %mul, %y
   ret i64 %add
 }
 
 define i64 @test6_umsubl(i32 %x, i64 %y) {
-; CHECK-LABEL: test6_umsubl
-; CHECK: umsubl x0, w0, {{w[0-9]+}}, x1
+; CHECK-LABEL: test6_umsubl:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #6
+; CHECK-NEXT:    umsubl x0, w0, w8, x1
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: test6_umsubl:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    mov w8, #6
+; GISEL-NEXT:    umsubl x0, w0, w8, x1
+; GISEL-NEXT:    ret
 
   %ext = zext i32 %x to i64
-  %mul = mul nsw i64 %ext, 6 
+  %mul = mul nsw i64 %ext, 6
   %sub = sub i64 %y, %mul
   ret i64 %sub
 }
 
 define i64 @test6_smsubl(i32 %x, i64 %y) {
-; CHECK-LABEL: test6_smsubl
-; CHECK: smsubl x0, w0, {{w[0-9]+}}, x1 
+; CHECK-LABEL: test6_smsubl:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #6
+; CHECK-NEXT:    smsubl x0, w0, w8, x1
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: test6_smsubl:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    mov w8, #6
+; GISEL-NEXT:    smsubl x0, w0, w8, x1
+; GISEL-NEXT:    ret
 
   %ext = sext i32 %x to i64
-  %mul = mul nsw i64 %ext, 6 
+  %mul = mul nsw i64 %ext, 6
   %sub = sub i64 %y, %mul
   ret i64 %sub
 }
 
 define i64 @test6_umnegl(i32 %x) {
-; CHECK-LABEL: test6_umnegl
-; CHECK: umnegl x0, w0, {{w[0-9]+}} 
+; CHECK-LABEL: test6_umnegl:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #6
+; CHECK-NEXT:    umnegl x0, w0, w8
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: test6_umnegl:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    mov w8, #6
+; GISEL-NEXT:    umnegl x0, w0, w8
+; GISEL-NEXT:    ret
 
   %ext = zext i32 %x to i64
-  %mul = mul nsw i64 %ext, 6 
+  %mul = mul nsw i64 %ext, 6
   %sub = sub i64 0, %mul
   ret i64 %sub
 }
 
 define i64 @test6_smnegl(i32 %x) {
-; CHECK-LABEL: test6_smnegl
-; CHECK: smnegl x0, w0, {{w[0-9]+}} 
+; CHECK-LABEL: test6_smnegl:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #6
+; CHECK-NEXT:    smnegl x0, w0, w8
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: test6_smnegl:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    mov w8, #6
+; GISEL-NEXT:    smnegl x0, w0, w8
+; GISEL-NEXT:    ret
 
   %ext = sext i32 %x to i64
-  %mul = mul nsw i64 %ext, 6 
+  %mul = mul nsw i64 %ext, 6
   %sub = sub i64 0, %mul
   ret i64 %sub
 }
 
 define i32 @test7(i32 %x) {
-; CHECK-LABEL: test7
-; CHECK: lsl {{w[0-9]+}}, w0, #3
-; CHECK: sub w0, {{w[0-9]+}}, w0
+; CHECK-LABEL: test7:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    lsl w8, w0, #3
+; CHECK-NEXT:    sub w0, w8, w0
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: test7:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    lsl w8, w0, #3
+; GISEL-NEXT:    sub w0, w8, w0
+; GISEL-NEXT:    ret
 
   %mul = mul nsw i32 %x, 7
   ret i32 %mul
 }
 
 define i32 @test8(i32 %x) {
-; CHECK-LABEL: test8
-; CHECK: lsl w0, w0, #3
+; CHECK-LABEL: test8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    lsl w0, w0, #3
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: test8:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    lsl w0, w0, #3
+; GISEL-NEXT:    ret
 
   %mul = shl nsw i32 %x, 3
   ret i32 %mul
 }
 
 define i32 @test9(i32 %x) {
-; CHECK-LABEL: test9
-; CHECK: add w0, w0, w0, lsl #3
-
-  %mul = mul nsw i32 %x, 9 
+; CHECK-LABEL: test9:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    add w0, w0, w0, lsl #3
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: test9:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    add w0, w0, w0, lsl #3
+; GISEL-NEXT:    ret
+
+  %mul = mul nsw i32 %x, 9
   ret i32 %mul
 }
 
 define i32 @test10(i32 %x) {
-; CHECK-LABEL: test10
-; CHECK: add {{w[0-9]+}}, w0, w0, lsl #2
-; CHECK: lsl w0, {{w[0-9]+}}, #1
+; CHECK-LABEL: test10:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    add w8, w0, w0, lsl #2
+; CHECK-NEXT:    lsl w0, w8, #1
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: test10:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    add w8, w0, w0, lsl #2
+; GISEL-NEXT:    lsl w0, w8, #1
+; GISEL-NEXT:    ret
 
   %mul = mul nsw i32 %x, 10
   ret i32 %mul
 }
 
 define i32 @test11(i32 %x) {
-; CHECK-LABEL: test11
-; CHECK: mul w0, w0, {{w[0-9]+}}
+; CHECK-LABEL: test11:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #11
+; CHECK-NEXT:    mul w0, w0, w8
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: test11:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    mov w8, #11
+; GISEL-NEXT:    mul w0, w0, w8
+; GISEL-NEXT:    ret
 
   %mul = mul nsw i32 %x, 11
   ret i32 %mul
 }
 
 define i32 @test12(i32 %x) {
-; CHECK-LABEL: test12
-; CHECK: add {{w[0-9]+}}, w0, w0, lsl #1
-; CHECK: lsl w0, {{w[0-9]+}}, #2
+; CHECK-LABEL: test12:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    add w8, w0, w0, lsl #1
+; CHECK-NEXT:    lsl w0, w8, #2
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: test12:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    add w8, w0, w0, lsl #1
+; GISEL-NEXT:    lsl w0, w8, #2
+; GISEL-NEXT:    ret
 
   %mul = mul nsw i32 %x, 12
   ret i32 %mul
 }
 
 define i32 @test13(i32 %x) {
-; CHECK-LABEL: test13
-; CHECK: mul w0, w0, {{w[0-9]+}}
+; CHECK-LABEL: test13:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #13
+; CHECK-NEXT:    mul w0, w0, w8
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: test13:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    mov w8, #13
+; GISEL-NEXT:    mul w0, w0, w8
+; GISEL-NEXT:    ret
 
   %mul = mul nsw i32 %x, 13
   ret i32 %mul
 }
 
 define i32 @test14(i32 %x) {
-; CHECK-LABEL: test14
-; CHECK: mul w0, w0, {{w[0-9]+}}
-
-  %mul = mul nsw i32 %x, 14 
+; CHECK-LABEL: test14:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #14
+; CHECK-NEXT:    mul w0, w0, w8
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: test14:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    mov w8, #14
+; GISEL-NEXT:    mul w0, w0, w8
+; GISEL-NEXT:    ret
+
+  %mul = mul nsw i32 %x, 14
   ret i32 %mul
 }
 
 define i32 @test15(i32 %x) {
-; CHECK-LABEL: test15
-; CHECK: lsl {{w[0-9]+}}, w0, #4
-; CHECK: sub w0, {{w[0-9]+}}, w0
+; CHECK-LABEL: test15:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    lsl w8, w0, #4
+; CHECK-NEXT:    sub w0, w8, w0
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: test15:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    lsl w8, w0, #4
+; GISEL-NEXT:    sub w0, w8, w0
+; GISEL-NEXT:    ret
 
   %mul = mul nsw i32 %x, 15
   ret i32 %mul
 }
 
 define i32 @test16(i32 %x) {
-; CHECK-LABEL: test16
-; CHECK: lsl w0, w0, #4
+; CHECK-LABEL: test16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    lsl w0, w0, #4
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: test16:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    lsl w0, w0, #4
+; GISEL-NEXT:    ret
 
   %mul = mul nsw i32 %x, 16
   ret i32 %mul
@@ -243,120 +459,243 @@ define i32 @test16(i32 %x) {
 ; Lowering other negative constants are not supported yet.
 
 define i32 @ntest2(i32 %x) {
-; CHECK-LABEL: ntest2
-; CHECK: neg w0, w0, lsl #1
+; CHECK-LABEL: ntest2:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    neg w0, w0, lsl #1
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: ntest2:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    mov w8, #-2
+; GISEL-NEXT:    mul w0, w0, w8
+; GISEL-NEXT:    ret
 
   %mul = mul nsw i32 %x, -2
   ret i32 %mul
 }
 
 define i32 @ntest3(i32 %x) {
-; CHECK-LABEL: ntest3
-; CHECK: sub w0, w0, w0, lsl #2
+; CHECK-LABEL: ntest3:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub w0, w0, w0, lsl #2
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: ntest3:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    sub w0, w0, w0, lsl #2
+; GISEL-NEXT:    ret
 
   %mul = mul nsw i32 %x, -3
   ret i32 %mul
 }
 
 define i32 @ntest4(i32 %x) {
-; CHECK-LABEL: ntest4
-; CHECK:neg w0, w0, lsl #2
+; CHECK-LABEL: ntest4:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    neg w0, w0, lsl #2
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: ntest4:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    mov w8, #-4
+; GISEL-NEXT:    mul w0, w0, w8
+; GISEL-NEXT:    ret
 
   %mul = mul nsw i32 %x, -4
   ret i32 %mul
 }
 
 define i32 @ntest5(i32 %x) {
-; CHECK-LABEL: ntest5
-; CHECK: add {{w[0-9]+}}, w0, w0, lsl #2
-; CHECK: neg w0, {{w[0-9]+}}
+; CHECK-LABEL: ntest5:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    add w8, w0, w0, lsl #2
+; CHECK-NEXT:    neg w0, w8
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: ntest5:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    add w8, w0, w0, lsl #2
+; GISEL-NEXT:    neg w0, w8
+; GISEL-NEXT:    ret
   %mul = mul nsw i32 %x, -5
   ret i32 %mul
 }
 
 define i32 @ntest6(i32 %x) {
-; CHECK-LABEL: ntest6
-; CHECK: mul w0, w0, {{w[0-9]+}}
+; CHECK-LABEL: ntest6:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #-6
+; CHECK-NEXT:    mul w0, w0, w8
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: ntest6:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    mov w8, #-6
+; GISEL-NEXT:    mul w0, w0, w8
+; GISEL-NEXT:    ret
 
   %mul = mul nsw i32 %x, -6
   ret i32 %mul
 }
 
 define i32 @ntest7(i32 %x) {
-; CHECK-LABEL: ntest7
-; CHECK: sub w0, w0, w0, lsl #3
+; CHECK-LABEL: ntest7:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub w0, w0, w0, lsl #3
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: ntest7:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    sub w0, w0, w0, lsl #3
+; GISEL-NEXT:    ret
 
   %mul = mul nsw i32 %x, -7
   ret i32 %mul
 }
 
 define i32 @ntest8(i32 %x) {
-; CHECK-LABEL: ntest8
-; CHECK: neg w0, w0, lsl #3
+; CHECK-LABEL: ntest8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    neg w0, w0, lsl #3
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: ntest8:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    mov w8, #-8
+; GISEL-NEXT:    mul w0, w0, w8
+; GISEL-NEXT:    ret
 
   %mul = mul nsw i32 %x, -8
   ret i32 %mul
 }
 
 define i32 @ntest9(i32 %x) {
-; CHECK-LABEL: ntest9
-; CHECK: add {{w[0-9]+}}, w0, w0, lsl #3
-; CHECK: neg w0, {{w[0-9]+}}
+; CHECK-LABEL: ntest9:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    add w8, w0, w0, lsl #3
+; CHECK-NEXT:    neg w0, w8
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: ntest9:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    add w8, w0, w0, lsl #3
+; GISEL-NEXT:    neg w0, w8
+; GISEL-NEXT:    ret
 
   %mul = mul nsw i32 %x, -9
   ret i32 %mul
 }
 
 define i32 @ntest10(i32 %x) {
-; CHECK-LABEL: ntest10
-; CHECK: mul w0, w0, {{w[0-9]+}}
+; CHECK-LABEL: ntest10:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #-10
+; CHECK-NEXT:    mul w0, w0, w8
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: ntest10:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    mov w8, #-10
+; GISEL-NEXT:    mul w0, w0, w8
+; GISEL-NEXT:    ret
 
   %mul = mul nsw i32 %x, -10
   ret i32 %mul
 }
 
 define i32 @ntest11(i32 %x) {
-; CHECK-LABEL: ntest11
-; CHECK: mul w0, w0, {{w[0-9]+}}
+; CHECK-LABEL: ntest11:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #-11
+; CHECK-NEXT:    mul w0, w0, w8
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: ntest11:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    mov w8, #-11
+; GISEL-NEXT:    mul w0, w0, w8
+; GISEL-NEXT:    ret
 
   %mul = mul nsw i32 %x, -11
   ret i32 %mul
 }
 
 define i32 @ntest12(i32 %x) {
-; CHECK-LABEL: ntest12
-; CHECK: mul w0, w0, {{w[0-9]+}}
+; CHECK-LABEL: ntest12:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #-12
+; CHECK-NEXT:    mul w0, w0, w8
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: ntest12:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    mov w8, #-12
+; GISEL-NEXT:    mul w0, w0, w8
+; GISEL-NEXT:    ret
 
   %mul = mul nsw i32 %x, -12
   ret i32 %mul
 }
 
 define i32 @ntest13(i32 %x) {
-; CHECK-LABEL: ntest13
-; CHECK: mul w0, w0, {{w[0-9]+}}
+; CHECK-LABEL: ntest13:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #-13
+; CHECK-NEXT:    mul w0, w0, w8
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: ntest13:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    mov w8, #-13
+; GISEL-NEXT:    mul w0, w0, w8
+; GISEL-NEXT:    ret
   %mul = mul nsw i32 %x, -13
   ret i32 %mul
 }
 
 define i32 @ntest14(i32 %x) {
-; CHECK-LABEL: ntest14
-; CHECK: mul w0, w0, {{w[0-9]+}}
+; CHECK-LABEL: ntest14:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #-14
+; CHECK-NEXT:    mul w0, w0, w8
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: ntest14:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    mov w8, #-14
+; GISEL-NEXT:    mul w0, w0, w8
+; GISEL-NEXT:    ret
 
   %mul = mul nsw i32 %x, -14
   ret i32 %mul
 }
 
 define i32 @ntest15(i32 %x) {
-; CHECK-LABEL: ntest15
-; CHECK: sub w0, w0, w0, lsl #4
+; CHECK-LABEL: ntest15:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub w0, w0, w0, lsl #4
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: ntest15:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    sub w0, w0, w0, lsl #4
+; GISEL-NEXT:    ret
 
   %mul = mul nsw i32 %x, -15
   ret i32 %mul
 }
 
 define i32 @ntest16(i32 %x) {
-; CHECK-LABEL: ntest16
-; CHECK: neg w0, w0, lsl #4
+; CHECK-LABEL: ntest16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    neg w0, w0, lsl #4
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: ntest16:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    mov w8, #-16
+; GISEL-NEXT:    mul w0, w0, w8
+; GISEL-NEXT:    ret
 
   %mul = mul nsw i32 %x, -16
   ret i32 %mul