[llvm] 62476c7 - Revert "[AArch64] Revive optimize add/sub with immediate through MIPeepholeOpt"

Tue Jan 18 05:17:27 PST 2022

Author: Florian Hahn
Date: 2022-01-18T13:17:02Z
New Revision: 62476c7c14506e7181344a853e0a46981868287d

URL: https://github.com/llvm/llvm-project/commit/62476c7c14506e7181344a853e0a46981868287d
DIFF: https://github.com/llvm/llvm-project/commit/62476c7c14506e7181344a853e0a46981868287d.diff

LOG: Revert "[AArch64] Revive optimize add/sub with immediate through MIPeepholeOpt"

This reverts commit e6698f09929a134bf0f46d9347142b86d8f636a2.

This commit appears to introduce new machine verifier failures when
building the llvm-test-suite with `-mllvm -verify-machineinstrs` enabled:

https://green.lab.llvm.org/green/job/test-suite-verify-machineinstrs-aarch64-O3/11061/

FAILED: MultiSource/Benchmarks/Olden/health/CMakeFiles/health.dir/health.c.o
/Users/buildslave/jenkins/workspace/test-suite-verify-machineinstrs-aarch64-O3/test-suite-build/tools/timeit --summary MultiSource/Benchmarks/Olden/health/CMakeFiles/health.dir/health.c.o.time /Users/buildslave/jenkins/workspace/test-suite-verify-machineinstrs-aarch64-O3/compiler/bin/clang -DNDEBUG  -B /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin    -Wno-unused-command-line-argument -mllvm -verify-machineinstrs -O3 -arch arm64 -isysroot /Applications/Xcode.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS13.5.sdk   -w -Werror=date-time -DTORONTO -MD -MT MultiSource/Benchmarks/Olden/health/CMakeFiles/health.dir/health.c.o -MF MultiSource/Benchmarks/Olden/health/CMakeFiles/health.dir/health.c.o.d -o MultiSource/Benchmarks/Olden/health/CMakeFiles/health.dir/health.c.o   -c /Users/buildslave/jenkins/workspace/test-suite-verify-machineinstrs-aarch64-O3/test-suite/MultiSource/Benchmarks/Olden/health/health.c
*** Bad machine code: Illegal virtual register for instruction ***
- function:    alloc_tree
- basic block: %bb.1 if.else (0x7fc0db8f8bb0)
- instruction: %31:gpr64 = nsw MADDXrrr killed %39:gpr64sp, killed %25:gpr64, $xzr
- operand 1:   killed %39:gpr64sp
Expected a GPR64 register, but got a GPR64sp register
fatal error: error in backend: Found 1 machine code errors.
PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace, preprocessed source, and associated run script.
Stack dump:
0.	Program arguments: /Users/buildslave/jenkins/workspace/test-suite-verify-machineinstrs-aarch64-O3/compiler/bin/clang -DNDEBUG -B /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin -Wno-unused-command-line-argument -mllvm -verify-machineinstrs -O3 -arch arm64 -isysroot /Applications/Xcode.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS13.5.sdk -w -Werror=date-time -DTORONTO -MD -MT MultiSource/Benchmarks/Olden/health/CMakeFiles/health.dir/health.c.o -MF MultiSource/Benchmarks/Olden/health/CMakeFiles/health.dir/health.c.o.d -o MultiSource/Benchmarks/Olden/health/CMakeFiles/health.dir/health.c.o -c /Users/buildslave/jenkins/workspace/test-suite-verify-machineinstrs-aarch64-O3/test-suite/MultiSource/Benchmarks/Olden/health/health.c
1.	<eof> parser at end of file
2.	Code generation
3.	Running pass 'Function Pass Manager' on module '/Users/buildslave/jenkins/workspace/test-suite-verify-machineinstrs-aarch64-O3/test-suite/MultiSource/Benchmarks/Olden/health/health.c'.
4.	Running pass 'Verify generated machine code' on function '@alloc_tree'
Stack dump without symbol names (ensure you have llvm-symbolizer in your PATH or set the environment var `LLVM_SYMBOLIZER_PATH` to point to it):
0  clang         0x000000011191896b llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) + 43
1  clang         0x00000001119179b5 llvm::sys::RunSignalHandlers() + 85
2  clang         0x00000001119180e2 llvm::sys::CleanupOnSignal(unsigned long) + 210
3  clang         0x0000000111849f6a (anonymous namespace)::CrashRecoveryContextImpl::HandleCrash(int, unsigned long) + 106
4  clang         0x0000000111849ee8 llvm::CrashRecoveryContext::HandleExit(int) + 24
5  clang         0x0000000111914acc llvm::sys::Process::Exit(int, bool) + 44
6  clang         0x000000010f4e9be9 LLVMErrorHandler(void*, char const*, bool) + 89
7  clang         0x0000000114eba333 llvm::report_fatal_error(llvm::Twine const&, bool) + 323
8  clang         0x0000000110d8c620 (anonymous namespace)::MachineVerifier::BBInfo::~BBInfo() + 0
9  clang         0x0000000110cdddca llvm::MachineFunctionPass::runOnFunction(llvm::Function&) + 378
10 clang         0x00000001110b0154 llvm::FPPassManager::runOnFunction(llvm::Function&) + 1092
11 clang         0x00000001110b6268 llvm::FPPassManager::runOnModule(llvm::Module&) + 72
12 clang         0x00000001110b074a llvm::legacy::PassManagerImpl::run(llvm::Module&) + 986
13 clang         0x0000000111c20ad4 clang::EmitBackendOutput(clang::DiagnosticsEngine&, clang::HeaderSearchOptions const&, clang::CodeGenOptions const&, clang::TargetOptions const&, clang::LangOptions const&, llvm::StringRef, llvm::Module*, clang::BackendAction, std::__1::unique_ptr<llvm::raw_pwrite_stream, std::__1::default_delete<llvm::raw_pwrite_stream> >) + 3764
14 clang         0x0000000111f6dd31 clang::BackendConsumer::HandleTranslationUnit(clang::ASTContext&) + 1905
15 clang         0x00000001131a28b3 clang::ParseAST(clang::Sema&, bool, bool) + 643
16 clang         0x00000001122b02a4 clang::FrontendAction::Execute() + 84
17 clang         0x000000011222d6a9 clang::CompilerInstance::ExecuteAction(clang::FrontendAction&) + 873
18 clang         0x000000011232faf5 clang::ExecuteCompilerInvocation(clang::CompilerInstance*) + 661
19 clang         0x000000010f4e9860 cc1_main(llvm::ArrayRef<char const*>, char const*, void*) + 2544
20 clang         0x000000010f4e7168 ExecuteCC1Tool(llvm::SmallVectorImpl<char const*>&) + 312
21 clang         0x00000001120ab187 void llvm::function_ref<void ()>::callback_fn<clang::driver::CC1Command::Execute(llvm::ArrayRef<llvm::Optional<llvm::StringRef> >, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >*, bool*) const::$_1>(long) + 23
22 clang         0x0000000111849eb4 llvm::CrashRecoveryContext::RunSafely(llvm::function_ref<void ()>) + 228
23 clang         0x00000001120aac24 clang::driver::CC1Command::Execute(llvm::ArrayRef<llvm::Optional<llvm::StringRef> >, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >*, bool*) const + 324
24 clang         0x000000011207b85d clang::driver::Compilation::ExecuteCommand(clang::driver::Command const&, clang::driver::Command const*&) const + 221
25 clang         0x000000011207bdad clang::driver::Compilation::ExecuteJobs(clang::driver::JobList const&, llvm::SmallVectorImpl<std::__1::pair<int, clang::driver::Command const*> >&) const + 125
26 clang         0x0000000112092f7c clang::driver::Driver::ExecuteCompilation(clang::driver::Compilation&, llvm::SmallVectorImpl<std::__1::pair<int, clang::driver::Command const*> >&) + 204
27 clang         0x000000010f4e6977 main + 10375
28 libdyld.dylib 0x00007fff6be90cc9 start + 1
29 libdyld.dylib 0x0000000000000018 start + 18446603338705728336
clang-14: error: clang frontend command failed with exit code 70 (use -v to see invocation)
clang version 14.0.0 (https://github.com/llvm/llvm-project.git c90d136be4e055f1b409f38706d0fe3e2211af08)
Target: arm64-apple-darwin19.5.0
Thread model: posix
InstalledDir: /Users/buildslave/jenkins/workspace/test-suite-verify-machineinstrs-aarch64-O3/compiler/bin
clang-14: note: diagnostic msg:
********************

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
    llvm/test/CodeGen/AArch64/addsub.ll
    llvm/test/Transforms/CodeGenPrepare/AArch64/large-offset-gep.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
index a827dcdbc7f51..3e1306eb32972 100644

--- a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
+++ b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
@@ -11,19 +11,12 @@
 // 1. MOVi32imm + ANDWrr ==> ANDWri + ANDWri
 //    MOVi64imm + ANDXrr ==> ANDXri + ANDXri
 //
-// 2. MOVi32imm + ADDWrr ==> ADDWRi + ADDWRi
-//    MOVi64imm + ADDXrr ==> ANDXri + ANDXri
-//
-// 3. MOVi32imm + SUBWrr ==> SUBWRi + SUBWRi
-//    MOVi64imm + SUBXrr ==> SUBXri + SUBXri
-//
 //    The mov pseudo instruction could be expanded to multiple mov instructions
 //    later. In this case, we could try to split the constant  operand of mov
-//    instruction into two immediates which can be directly encoded into
-//    *Wri/*Xri instructions. It makes two AND/ADD/SUB instructions instead of
-//    multiple `mov` + `and/add/sub` instructions.
+//    instruction into two bitmask immediates. It makes two AND instructions
+//    intead of multiple `mov` + `and` instructions.
 //
-// 4. Remove redundant ORRWrs which is generated by zero-extend.
+// 2. Remove redundant ORRWrs which is generated by zero-extend.
 //
 //    %3:gpr32 = ORRWrs $wzr, %2, 0
 //    %4:gpr64 = SUBREG_TO_REG 0, %3, %subreg.sub_32
@@ -58,12 +51,6 @@ struct AArch64MIPeepholeOpt : public MachineFunctionPass {
   MachineLoopInfo *MLI;
   MachineRegisterInfo *MRI;
 
-  bool checkMovImmInstr(MachineInstr &MI, MachineInstr *&MovMI,
-                        MachineInstr *&SubregToRegMI);
-
-  template <typename T>
-  bool visitADDSUB(MachineInstr &MI,
-                   SmallSetVector<MachineInstr *, 8> &ToBeRemoved, bool IsAdd);
   template <typename T>
   bool visitAND(MachineInstr &MI,
                 SmallSetVector<MachineInstr *, 8> &ToBeRemoved);
@@ -144,9 +131,36 @@ bool AArch64MIPeepholeOpt::visitAND(
   assert((RegSize == 32 || RegSize == 64) &&
          "Invalid RegSize for AND bitmask peephole optimization");
 
-  // Perform several essential checks against current MI.
-  MachineInstr *MovMI = nullptr, *SubregToRegMI = nullptr;
-  if (!checkMovImmInstr(MI, MovMI, SubregToRegMI))
+  // Check whether AND's MBB is in loop and the AND is loop invariant.
+  MachineBasicBlock *MBB = MI.getParent();
+  MachineLoop *L = MLI->getLoopFor(MBB);
+  if (L && !L->isLoopInvariant(MI))
+    return false;
+
+  // Check whether AND's operand is MOV with immediate.
+  MachineInstr *MovMI = MRI->getUniqueVRegDef(MI.getOperand(2).getReg());
+  if (!MovMI)
+    return false;
+
+  MachineInstr *SubregToRegMI = nullptr;
+  // If it is SUBREG_TO_REG, check its operand.
+  if (MovMI->getOpcode() == TargetOpcode::SUBREG_TO_REG) {
+    SubregToRegMI = MovMI;
+    MovMI = MRI->getUniqueVRegDef(MovMI->getOperand(2).getReg());
+    if (!MovMI)
+      return false;
+  }
+
+  if (MovMI->getOpcode() != AArch64::MOVi32imm &&
+      MovMI->getOpcode() != AArch64::MOVi64imm)
+    return false;
+
+  // If the MOV has multiple uses, do not split the immediate because it causes
+  // more instructions.
+  if (!MRI->hasOneUse(MovMI->getOperand(0).getReg()))
+    return false;
+
+  if (SubregToRegMI && !MRI->hasOneUse(SubregToRegMI->getOperand(0).getReg()))
     return false;
 
   // Split the bitmask immediate into two.
@@ -163,7 +177,6 @@ bool AArch64MIPeepholeOpt::visitAND(
 
   // Create new AND MIs.
   DebugLoc DL = MI.getDebugLoc();
-  MachineBasicBlock *MBB = MI.getParent();
   const TargetRegisterClass *ANDImmRC =
       (RegSize == 32) ? &AArch64::GPR32spRegClass : &AArch64::GPR64spRegClass;
   Register DstReg = MI.getOperand(0).getReg();
@@ -238,144 +251,6 @@ bool AArch64MIPeepholeOpt::visitORR(
   return true;
 }
 
-template <typename T>
-static bool splitAddSubImm(T Imm, unsigned RegSize, T &Imm0, T &Imm1) {
-  // The immediate must be in the form of ((imm0 << 12) + imm1), in which both
-  // imm0 and imm1 are non-zero 12-bit unsigned int.
-  if ((Imm & 0xfff000) == 0 || (Imm & 0xfff) == 0 ||
-      (Imm & ~static_cast<T>(0xffffff)) != 0)
-    return false;
-
-  // The immediate can not be composed via a single instruction.
-  SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
-  AArch64_IMM::expandMOVImm(Imm, RegSize, Insn);
-  if (Insn.size() == 1)
-    return false;
-
-  // Split Imm into (Imm0 << 12) + Imm1;
-  Imm0 = (Imm >> 12) & 0xfff;
-  Imm1 = Imm & 0xfff;
-  return true;
-}
-
-template <typename T>
-bool AArch64MIPeepholeOpt::visitADDSUB(
-    MachineInstr &MI, SmallSetVector<MachineInstr *, 8> &ToBeRemoved,
-    bool IsAdd) {
-  // Try below transformation.
-  //
-  // MOVi32imm + ADDWrr ==> ADDWri + ADDWri
-  // MOVi64imm + ADDXrr ==> ADDXri + ADDXri
-  //
-  // MOVi32imm + SUBWrr ==> SUBWri + SUBWri
-  // MOVi64imm + SUBXrr ==> SUBXri + SUBXri
-  //
-  // The mov pseudo instruction could be expanded to multiple mov instructions
-  // later. Let's try to split the constant operand of mov instruction into two
-  // legal add/sub immediates. It makes only two ADD/SUB instructions intead of
-  // multiple `mov` + `and/sub` instructions.
-
-  unsigned RegSize = sizeof(T) * 8;
-  assert((RegSize == 32 || RegSize == 64) &&
-         "Invalid RegSize for legal add/sub immediate peephole optimization");
-
-  // Perform several essential checks against current MI.
-  MachineInstr *MovMI, *SubregToRegMI;
-  if (!checkMovImmInstr(MI, MovMI, SubregToRegMI))
-    return false;
-
-  // Split the immediate to Imm0 and Imm1, and calculate the Opcode.
-  T Imm = static_cast<T>(MovMI->getOperand(1).getImm()), Imm0, Imm1;
-  unsigned Opcode;
-  if (splitAddSubImm(Imm, RegSize, Imm0, Imm1)) {
-    if (IsAdd)
-      Opcode = RegSize == 32 ? AArch64::ADDWri : AArch64::ADDXri;
-    else
-      Opcode = RegSize == 32 ? AArch64::SUBWri : AArch64::SUBXri;
-  } else if (splitAddSubImm(-Imm, RegSize, Imm0, Imm1)) {
-    if (IsAdd)
-      Opcode = RegSize == 32 ? AArch64::SUBWri : AArch64::SUBXri;
-    else
-      Opcode = RegSize == 32 ? AArch64::ADDWri : AArch64::ADDXri;
-  } else {
-    return false;
-  }
-
-  // Create new ADD/SUB MIs.
-  DebugLoc DL = MI.getDebugLoc();
-  MachineBasicBlock *MBB = MI.getParent();
-  const TargetRegisterClass *RC =
-      (RegSize == 32) ? &AArch64::GPR32spRegClass : &AArch64::GPR64spRegClass;
-  Register DstReg = MI.getOperand(0).getReg();
-  Register SrcReg = MI.getOperand(1).getReg();
-  Register NewTmpReg = MRI->createVirtualRegister(RC);
-  Register NewDstReg = MRI->createVirtualRegister(RC);
-
-  MRI->constrainRegClass(SrcReg, RC);
-  BuildMI(*MBB, MI, DL, TII->get(Opcode), NewTmpReg)
-      .addReg(SrcReg)
-      .addImm(Imm0)
-      .addImm(12);
-
-  BuildMI(*MBB, MI, DL, TII->get(Opcode), NewDstReg)
-      .addReg(NewTmpReg)
-      .addImm(Imm1)
-      .addImm(0);
-
-  MRI->replaceRegWith(DstReg, NewDstReg);
-  // replaceRegWith changes MI's definition register. Keep it for SSA form until
-  // deleting MI.
-  MI.getOperand(0).setReg(DstReg);
-
-  // Record the MIs need to be removed.
-  ToBeRemoved.insert(&MI);
-  if (SubregToRegMI)
-    ToBeRemoved.insert(SubregToRegMI);
-  ToBeRemoved.insert(MovMI);
-
-  return true;
-}
-
-// Checks if the corresponding MOV immediate instruction is applicable for
-// this peephole optimization.
-bool AArch64MIPeepholeOpt::checkMovImmInstr(MachineInstr &MI,
-                                            MachineInstr *&MovMI,
-                                            MachineInstr *&SubregToRegMI) {
-  // Check whether current MBB is in loop and the AND is loop invariant.
-  MachineBasicBlock *MBB = MI.getParent();
-  MachineLoop *L = MLI->getLoopFor(MBB);
-  if (L && !L->isLoopInvariant(MI))
-    return false;
-
-  // Check whether current MI's operand is MOV with immediate.
-  MovMI = MRI->getUniqueVRegDef(MI.getOperand(2).getReg());
-  if (!MovMI)
-    return false;
-
-  // If it is SUBREG_TO_REG, check its operand.
-  SubregToRegMI = nullptr;
-  if (MovMI->getOpcode() == TargetOpcode::SUBREG_TO_REG) {
-    SubregToRegMI = MovMI;
-    MovMI = MRI->getUniqueVRegDef(MovMI->getOperand(2).getReg());
-    if (!MovMI)
-      return false;
-  }
-
-  if (MovMI->getOpcode() != AArch64::MOVi32imm &&
-      MovMI->getOpcode() != AArch64::MOVi64imm)
-    return false;
-
-  // If the MOV has multiple uses, do not split the immediate because it causes
-  // more instructions.
-  if (!MRI->hasOneUse(MovMI->getOperand(0).getReg()))
-    return false;
-  if (SubregToRegMI && !MRI->hasOneUse(SubregToRegMI->getOperand(0).getReg()))
-    return false;
-
-  // It is OK to perform this peephole optimization.
-  return true;
-}
-
 bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
   if (skipFunction(MF.getFunction()))
     return false;
@@ -403,18 +278,6 @@ bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
       case AArch64::ORRWrs:
         Changed = visitORR(MI, ToBeRemoved);
         break;
-      case AArch64::ADDWrr:
-        Changed = visitADDSUB<uint32_t>(MI, ToBeRemoved, true);
-        break;
-      case AArch64::SUBWrr:
-        Changed = visitADDSUB<uint32_t>(MI, ToBeRemoved, false);
-        break;
-      case AArch64::ADDXrr:
-        Changed = visitADDSUB<uint64_t>(MI, ToBeRemoved, true);
-        break;
-      case AArch64::SUBXrr:
-        Changed = visitADDSUB<uint64_t>(MI, ToBeRemoved, false);
-        break;
       }
     }
   }

diff  --git a/llvm/test/CodeGen/AArch64/addsub.ll b/llvm/test/CodeGen/AArch64/addsub.ll
index 37c9e4c5c6fe1..f0857fe2d9660 100644
--- a/llvm/test/CodeGen/AArch64/addsub.ll
+++ b/llvm/test/CodeGen/AArch64/addsub.ll
@@ -152,8 +152,9 @@ define void @sub_med() {
 define i64 @add_two_parts_imm_i64(i64 %a) {
 ; CHECK-LABEL: add_two_parts_imm_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    add x8, x0, #2730, lsl #12 // =11182080
-; CHECK-NEXT:    add x0, x8, #1365
+; CHECK-NEXT:    mov w8, #42325
+; CHECK-NEXT:    movk w8, #170, lsl #16
+; CHECK-NEXT:    add x0, x0, x8
 ; CHECK-NEXT:    ret
   %b = add i64 %a, 11183445
   ret i64 %b
@@ -162,8 +163,9 @@ define i64 @add_two_parts_imm_i64(i64 %a) {
 define i32 @add_two_parts_imm_i32(i32 %a) {
 ; CHECK-LABEL: add_two_parts_imm_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    add w8, w0, #2730, lsl #12 // =11182080
-; CHECK-NEXT:    add w0, w8, #1365
+; CHECK-NEXT:    mov w8, #42325
+; CHECK-NEXT:    movk w8, #170, lsl #16
+; CHECK-NEXT:    add w0, w0, w8
 ; CHECK-NEXT:    ret
   %b = add i32 %a, 11183445
   ret i32 %b
@@ -172,8 +174,9 @@ define i32 @add_two_parts_imm_i32(i32 %a) {
 define i64 @add_two_parts_imm_i64_neg(i64 %a) {
 ; CHECK-LABEL: add_two_parts_imm_i64_neg:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sub x8, x0, #2730, lsl #12 // =11182080
-; CHECK-NEXT:    sub x0, x8, #1365
+; CHECK-NEXT:    mov x8, #-42325
+; CHECK-NEXT:    movk x8, #65365, lsl #16
+; CHECK-NEXT:    add x0, x0, x8
 ; CHECK-NEXT:    ret
   %b = add i64 %a, -11183445
   ret i64 %b
@@ -182,8 +185,9 @@ define i64 @add_two_parts_imm_i64_neg(i64 %a) {
 define i32 @add_two_parts_imm_i32_neg(i32 %a) {
 ; CHECK-LABEL: add_two_parts_imm_i32_neg:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sub w8, w0, #2730, lsl #12 // =11182080
-; CHECK-NEXT:    sub w0, w8, #1365
+; CHECK-NEXT:    mov w8, #23211
+; CHECK-NEXT:    movk w8, #65365, lsl #16
+; CHECK-NEXT:    add w0, w0, w8
 ; CHECK-NEXT:    ret
   %b = add i32 %a, -11183445
   ret i32 %b
@@ -192,8 +196,9 @@ define i32 @add_two_parts_imm_i32_neg(i32 %a) {
 define i64 @sub_two_parts_imm_i64(i64 %a) {
 ; CHECK-LABEL: sub_two_parts_imm_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sub x8, x0, #2730, lsl #12 // =11182080
-; CHECK-NEXT:    sub x0, x8, #1365
+; CHECK-NEXT:    mov x8, #-42325
+; CHECK-NEXT:    movk x8, #65365, lsl #16
+; CHECK-NEXT:    add x0, x0, x8
 ; CHECK-NEXT:    ret
   %b = sub i64 %a, 11183445
   ret i64 %b
@@ -202,8 +207,9 @@ define i64 @sub_two_parts_imm_i64(i64 %a) {
 define i32 @sub_two_parts_imm_i32(i32 %a) {
 ; CHECK-LABEL: sub_two_parts_imm_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sub w8, w0, #2730, lsl #12 // =11182080
-; CHECK-NEXT:    sub w0, w8, #1365
+; CHECK-NEXT:    mov w8, #23211
+; CHECK-NEXT:    movk w8, #65365, lsl #16
+; CHECK-NEXT:    add w0, w0, w8
 ; CHECK-NEXT:    ret
   %b = sub i32 %a, 11183445
   ret i32 %b
@@ -212,8 +218,9 @@ define i32 @sub_two_parts_imm_i32(i32 %a) {
 define i64 @sub_two_parts_imm_i64_neg(i64 %a) {
 ; CHECK-LABEL: sub_two_parts_imm_i64_neg:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    add x8, x0, #2730, lsl #12 // =11182080
-; CHECK-NEXT:    add x0, x8, #1365
+; CHECK-NEXT:    mov w8, #42325
+; CHECK-NEXT:    movk w8, #170, lsl #16
+; CHECK-NEXT:    add x0, x0, x8
 ; CHECK-NEXT:    ret
   %b = sub i64 %a, -11183445
   ret i64 %b
@@ -222,57 +229,14 @@ define i64 @sub_two_parts_imm_i64_neg(i64 %a) {
 define i32 @sub_two_parts_imm_i32_neg(i32 %a) {
 ; CHECK-LABEL: sub_two_parts_imm_i32_neg:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    add w8, w0, #2730, lsl #12 // =11182080
-; CHECK-NEXT:    add w0, w8, #1365
-; CHECK-NEXT:    ret
-  %b = sub i32 %a, -11183445
-  ret i32 %b
-}
-
-define i32 @add_27962026(i32 %a) {
-; CHECK-LABEL: add_27962026:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #43690
-; CHECK-NEXT:    movk w8, #426, lsl #16
-; CHECK-NEXT:    add w0, w0, w8
-; CHECK-NEXT:    ret
-  %b = add i32 %a, 27962026
-  ret i32 %b
-}
-
-define i32 @add_65534(i32 %a) {
-; CHECK-LABEL: add_65534:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #65534
+; CHECK-NEXT:    mov w8, #42325
+; CHECK-NEXT:    movk w8, #170, lsl #16
 ; CHECK-NEXT:    add w0, w0, w8
 ; CHECK-NEXT:    ret
-  %b = add i32 %a, 65534
+  %b = sub i32 %a, -11183445
   ret i32 %b
 }
 
-declare i32 @foox(i32)
-
-define void @add_in_loop(i32 %0) {
-; CHECK-LABEL: add_in_loop:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    .cfi_offset w19, -8
-; CHECK-NEXT:    .cfi_offset w30, -16
-; CHECK-NEXT:    mov w19, #43690
-; CHECK-NEXT:    movk w19, #170, lsl #16
-; CHECK-NEXT:  .LBB15_1: // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    add w0, w0, w19
-; CHECK-NEXT:    bl foox
-; CHECK-NEXT:    b .LBB15_1
-  br label %2
-2:
-  %3 = phi i32 [ %0, %1 ], [ %5, %2 ]
-  %4 = add nsw i32 %3, 11184810
-  %5 = tail call i32 @foox(i32 %4) #2
-  br label %2
-}
-
 define void @testing() {
 ; CHECK-LABEL: testing:
 ; CHECK:       // %bb.0:
@@ -280,7 +244,7 @@ define void @testing() {
 ; CHECK-NEXT:    ldr x8, [x8, :got_lo12:var_i32]
 ; CHECK-NEXT:    ldr w9, [x8]
 ; CHECK-NEXT:    cmp w9, #4095
-; CHECK-NEXT:    b.ne .LBB16_6
+; CHECK-NEXT:    b.ne .LBB13_6
 ; CHECK-NEXT:  // %bb.1: // %test2
 ; CHECK-NEXT:    adrp x10, :got:var2_i32
 ; CHECK-NEXT:    add w11, w9, #1
@@ -288,26 +252,26 @@ define void @testing() {
 ; CHECK-NEXT:    str w11, [x8]
 ; CHECK-NEXT:    ldr w10, [x10]
 ; CHECK-NEXT:    cmp w10, #3567, lsl #12 // =14610432
-; CHECK-NEXT:    b.lo .LBB16_6
+; CHECK-NEXT:    b.lo .LBB13_6
 ; CHECK-NEXT:  // %bb.2: // %test3
 ; CHECK-NEXT:    add w11, w9, #2
 ; CHECK-NEXT:    cmp w9, #123
 ; CHECK-NEXT:    str w11, [x8]
-; CHECK-NEXT:    b.lt .LBB16_6
+; CHECK-NEXT:    b.lt .LBB13_6
 ; CHECK-NEXT:  // %bb.3: // %test4
 ; CHECK-NEXT:    add w11, w9, #3
 ; CHECK-NEXT:    cmp w10, #321
 ; CHECK-NEXT:    str w11, [x8]
-; CHECK-NEXT:    b.gt .LBB16_6
+; CHECK-NEXT:    b.gt .LBB13_6
 ; CHECK-NEXT:  // %bb.4: // %test5
 ; CHECK-NEXT:    add w11, w9, #4
 ; CHECK-NEXT:    cmn w10, #443
 ; CHECK-NEXT:    str w11, [x8]
-; CHECK-NEXT:    b.ge .LBB16_6
+; CHECK-NEXT:    b.ge .LBB13_6
 ; CHECK-NEXT:  // %bb.5: // %test6
 ; CHECK-NEXT:    add w9, w9, #5
 ; CHECK-NEXT:    str w9, [x8]
-; CHECK-NEXT:  .LBB16_6: // %common.ret
+; CHECK-NEXT:  .LBB13_6: // %common.ret
 ; CHECK-NEXT:    ret
   %val = load i32, i32* @var_i32
   %val2 = load i32, i32* @var2_i32

diff  --git a/llvm/test/Transforms/CodeGenPrepare/AArch64/large-offset-gep.ll b/llvm/test/Transforms/CodeGenPrepare/AArch64/large-offset-gep.ll
index 97e877211b120..1c587080f4b68 100644
--- a/llvm/test/Transforms/CodeGenPrepare/AArch64/large-offset-gep.ll
+++ b/llvm/test/Transforms/CodeGenPrepare/AArch64/large-offset-gep.ll
@@ -214,9 +214,10 @@ define void @test5([65536 x i32]** %s, i32 %n) {
 ; CHECK-LABEL: test5:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ldr x9, [x0]
+; CHECK-NEXT:    mov w10, #14464
+; CHECK-NEXT:    movk w10, #1, lsl #16
 ; CHECK-NEXT:    mov w8, wzr
-; CHECK-NEXT:    add x9, x9, #19, lsl #12 // =77824
-; CHECK-NEXT:    add x9, x9, #2176
+; CHECK-NEXT:    add x9, x9, x10
 ; CHECK-NEXT:    cmp w8, w1
 ; CHECK-NEXT:    b.ge .LBB4_2
 ; CHECK-NEXT:  .LBB4_1: // %while_body