[llvm] [GlobalIsel] Import GEP flags (PR #93850)

Thorsten Schütt via llvm-commits llvm-commits at lists.llvm.org
Thu May 30 13:42:30 PDT 2024


https://github.com/tschuett updated https://github.com/llvm/llvm-project/pull/93850

>From f724e88457e696bd6bab2fc19439ee30edd5afcc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= <schuett at gmail.com>
Date: Thu, 30 May 2024 09:06:17 +0200
Subject: [PATCH 1/3] [GlobalIsel] Import GEP flags

https://github.com/llvm/llvm-project/pull/90824
---
 llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp  |    6 +-
 llvm/lib/CodeGen/MachineInstr.cpp             |    5 +
 .../AArch64/GlobalISel/arm64-irtranslator.ll  | 4889 ++++++++++++++---
 3 files changed, 4269 insertions(+), 631 deletions(-)

diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 5289b993476db..a93856257dfba 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -1583,10 +1583,8 @@ bool IRTranslator::translateGetElementPtr(const User &U,
   LLT OffsetTy = getLLTForType(*OffsetIRTy, *DL);
 
   uint32_t Flags = 0;
-  if (isa<Instruction>(U)) {
-    const Instruction &I = cast<Instruction>(U);
-    Flags = MachineInstr::copyFlagsFromInstruction(I);
-  }
+  if (const Instruction *I = dyn_cast<Instruction>(&U))
+    Flags = MachineInstr::copyFlagsFromInstruction(*I);
 
   // Normalize Vector GEP - all scalar operands should be converted to the
   // splat vector.
diff --git a/llvm/lib/CodeGen/MachineInstr.cpp b/llvm/lib/CodeGen/MachineInstr.cpp
index 02479f31f0b69..b3c0abe4688eb 100644
--- a/llvm/lib/CodeGen/MachineInstr.cpp
+++ b/llvm/lib/CodeGen/MachineInstr.cpp
@@ -576,6 +576,11 @@ uint32_t MachineInstr::copyFlagsFromInstruction(const Instruction &I) {
       MIFlags |= MachineInstr::MIFlag::NoSWrap;
     if (TI->hasNoUnsignedWrap())
       MIFlags |= MachineInstr::MIFlag::NoUWrap;
+  } else if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&I)) {
+    if (GEP->hasNoUnsignedSignedWrap())
+      MIFlags |= MachineInstr::MIFlag::NoSWrap;
+    if (GEP->hasNoUnsignedWrap())
+      MIFlags |= MachineInstr::MIFlag::NoUWrap;
   }
 
   // Copy the nonneg flag.
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll
index a61931b898aea..28c4965d647d7 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc -O0 -aarch64-enable-atomic-cfg-tidy=0 -mattr=+lse -stop-after=irtranslator -global-isel -verify-machineinstrs %s -o - 2>&1 | FileCheck %s
 ; RUN: llc -O3 -aarch64-enable-atomic-cfg-tidy=0 -mattr=+lse -stop-after=irtranslator -global-isel -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefix=O3
 
@@ -14,6 +15,25 @@ target triple = "aarch64--"
 ; CHECK-NEXT: $x0 = COPY [[RES]]
 ; CHECK-NEXT: RET_ReallyLR implicit $x0
 define i64 @addi64(i64 %arg1, i64 %arg2) {
+  ; CHECK-LABEL: name: addi64
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $x0, $x1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
+  ; CHECK-NEXT:   [[ADD:%[0-9]+]]:_(s64) = G_ADD [[COPY]], [[COPY1]]
+  ; CHECK-NEXT:   $x0 = COPY [[ADD]](s64)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $x0
+  ;
+  ; O3-LABEL: name: addi64
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $x0, $x1
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+  ; O3-NEXT:   [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
+  ; O3-NEXT:   [[ADD:%[0-9]+]]:_(s64) = G_ADD [[COPY]], [[COPY1]]
+  ; O3-NEXT:   $x0 = COPY [[ADD]](s64)
+  ; O3-NEXT:   RET_ReallyLR implicit $x0
   %res = add i64 %arg1, %arg2
   ret i64 %res
 }
@@ -25,6 +45,25 @@ define i64 @addi64(i64 %arg1, i64 %arg2) {
 ; CHECK-NEXT: $x0 = COPY [[RES]]
 ; CHECK-NEXT: RET_ReallyLR implicit $x0
 define i64 @muli64(i64 %arg1, i64 %arg2) {
+  ; CHECK-LABEL: name: muli64
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $x0, $x1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
+  ; CHECK-NEXT:   [[MUL:%[0-9]+]]:_(s64) = G_MUL [[COPY]], [[COPY1]]
+  ; CHECK-NEXT:   $x0 = COPY [[MUL]](s64)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $x0
+  ;
+  ; O3-LABEL: name: muli64
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $x0, $x1
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+  ; O3-NEXT:   [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
+  ; O3-NEXT:   [[MUL:%[0-9]+]]:_(s64) = G_MUL [[COPY]], [[COPY1]]
+  ; O3-NEXT:   $x0 = COPY [[MUL]](s64)
+  ; O3-NEXT:   RET_ReallyLR implicit $x0
   %res = mul i64 %arg1, %arg2
   ret i64 %res
 }
@@ -47,6 +86,21 @@ define i64 @muli64(i64 %arg1, i64 %arg2) {
 ; CHECK: %{{[0-9]+}}:_(p0) = G_FRAME_INDEX %stack.2.ptr3
 ; CHECK: %{{[0-9]+}}:_(p0) = G_FRAME_INDEX %stack.3.ptr4
 define void @allocai64() {
+  ; CHECK-LABEL: name: allocai64
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.ptr1
+  ; CHECK-NEXT:   [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1.ptr2
+  ; CHECK-NEXT:   [[FRAME_INDEX2:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.2.ptr3
+  ; CHECK-NEXT:   [[FRAME_INDEX3:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.3.ptr4
+  ; CHECK-NEXT:   RET_ReallyLR
+  ;
+  ; O3-LABEL: name: allocai64
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.ptr1
+  ; O3-NEXT:   [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1.ptr2
+  ; O3-NEXT:   [[FRAME_INDEX2:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.2.ptr3
+  ; O3-NEXT:   [[FRAME_INDEX3:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.3.ptr4
+  ; O3-NEXT:   RET_ReallyLR
   %ptr1 = alloca i64
   %ptr2 = alloca i64, align 1
   %ptr3 = alloca i64, i32 16
@@ -75,6 +129,23 @@ define void @allocai64() {
 ; CHECK-NEXT: successors: %[[END]](0x80000000)
 ; CHECK: G_BR %[[END]]
 define void @uncondbr() {
+  ; CHECK-LABEL: name: uncondbr
+  ; CHECK: bb.1.entry:
+  ; CHECK-NEXT:   successors: %bb.3(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   G_BR %bb.3
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2.end:
+  ; CHECK-NEXT:   RET_ReallyLR
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.3.bb2:
+  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   G_BR %bb.2
+  ;
+  ; O3-LABEL: name: uncondbr
+  ; O3: bb.1.entry:
+  ; O3-NEXT:   RET_ReallyLR
 entry:
   br label %bb2
 end:
@@ -90,6 +161,21 @@ bb2:
 ; CHECK: [[END]].{{[a-zA-Z0-9.]+}}:
 ; CHECK-NEXT: RET_ReallyLR
 define void @uncondbr_fallthrough() {
+  ; CHECK-LABEL: name: uncondbr_fallthrough
+  ; CHECK: bb.1.entry:
+  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   G_BR %bb.2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2.end:
+  ; CHECK-NEXT:   RET_ReallyLR
+  ;
+  ; O3-LABEL: name: uncondbr_fallthrough
+  ; O3: bb.1.entry:
+  ; O3-NEXT:   successors: %bb.2(0x80000000)
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT: bb.2.end:
+  ; O3-NEXT:   RET_ReallyLR
 entry:
   br label %end
 end:
@@ -119,6 +205,37 @@ end:
 ; CHECK: [[FALSE]].{{[a-zA-Z0-9.]+}}:
 ; CHECK-NEXT: RET_ReallyLR
 define void @condbr(ptr %tstaddr) {
+  ; CHECK-LABEL: name: condbr
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   successors: %bb.2(0x40000000), %bb.3(0x40000000)
+  ; CHECK-NEXT:   liveins: $x0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[COPY]](p0) :: (load (s1) from %ir.tstaddr)
+  ; CHECK-NEXT:   G_BRCOND [[LOAD]](s1), %bb.2
+  ; CHECK-NEXT:   G_BR %bb.3
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2.true:
+  ; CHECK-NEXT:   RET_ReallyLR
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.3.false:
+  ; CHECK-NEXT:   RET_ReallyLR
+  ;
+  ; O3-LABEL: name: condbr
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   successors: %bb.2(0x40000000), %bb.3(0x40000000)
+  ; O3-NEXT:   liveins: $x0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; O3-NEXT:   [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[COPY]](p0) :: (load (s1) from %ir.tstaddr)
+  ; O3-NEXT:   G_BRCOND [[LOAD]](s1), %bb.2
+  ; O3-NEXT:   G_BR %bb.3
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT: bb.2.true:
+  ; O3-NEXT:   RET_ReallyLR
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT: bb.3.false:
+  ; O3-NEXT:   RET_ReallyLR
   %tst = load i1, ptr %tstaddr
   br i1 %tst, label %true, label %false
 true:
@@ -149,6 +266,58 @@ false:
 @indirectbr.L = internal unnamed_addr constant [3 x ptr] [ptr blockaddress(@indirectbr, %L1), ptr blockaddress(@indirectbr, %L2), ptr null], align 8
 
 define void @indirectbr() {
+  ; CHECK-LABEL: name: indirectbr
+  ; CHECK: bb.1.entry:
+  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @indirectbr.L
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+  ; CHECK-NEXT:   G_BR %bb.2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2.L1 (ir-block-address-taken %ir-block.L1):
+  ; CHECK-NEXT:   successors: %bb.2(0x40000000), %bb.3(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[PHI:%[0-9]+]]:_(s32) = G_PHI [[C1]](s32), %bb.1, %2(s32), %bb.2
+  ; CHECK-NEXT:   [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI]], [[C]]
+  ; CHECK-NEXT:   [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[PHI]](s32)
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+  ; CHECK-NEXT:   [[MUL:%[0-9]+]]:_(s64) = G_MUL [[ZEXT]], [[C2]]
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[GV]], [[MUL]](s64)
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY [[PTR_ADD]](p0)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[COPY]](p0) :: (load (p0) from %ir.arrayidx)
+  ; CHECK-NEXT:   G_BRINDIRECT [[LOAD]](p0)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.3.L2 (ir-block-address-taken %ir-block.L2):
+  ; CHECK-NEXT:   RET_ReallyLR
+  ;
+  ; O3-LABEL: name: indirectbr
+  ; O3: bb.1.entry:
+  ; O3-NEXT:   successors: %bb.3(0x80000000)
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+  ; O3-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @indirectbr.L
+  ; O3-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+  ; O3-NEXT:   G_BR %bb.3
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT: bb.2.L1 (ir-block-address-taken %ir-block.L1):
+  ; O3-NEXT:   successors: %bb.3(0x80000000)
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT: bb.3..split:
+  ; O3-NEXT:   successors: %bb.2(0x40000000), %bb.4(0x40000000)
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[PHI:%[0-9]+]]:_(s32) = G_PHI %2(s32), %bb.2, [[C1]](s32), %bb.1
+  ; O3-NEXT:   [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI]], [[C]]
+  ; O3-NEXT:   [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[PHI]](s32)
+  ; O3-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+  ; O3-NEXT:   [[MUL:%[0-9]+]]:_(s64) = G_MUL [[ZEXT]], [[C2]]
+  ; O3-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[GV]], [[MUL]](s64)
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY [[PTR_ADD]](p0)
+  ; O3-NEXT:   [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[COPY]](p0) :: (invariant load (p0) from %ir.arrayidx)
+  ; O3-NEXT:   G_BRINDIRECT [[LOAD]](p0)
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT: bb.4.L2 (ir-block-address-taken %ir-block.L2):
+  ; O3-NEXT:   RET_ReallyLR
 entry:
   br label %L1
 L1:                                               ; preds = %entry, %L1
@@ -170,6 +339,25 @@ L2:                                               ; preds = %L1
 ; CHECK-NEXT: $x0 = COPY [[RES]]
 ; CHECK-NEXT: RET_ReallyLR implicit $x0
 define i64 @ori64(i64 %arg1, i64 %arg2) {
+  ; CHECK-LABEL: name: ori64
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $x0, $x1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s64) = G_OR [[COPY]], [[COPY1]]
+  ; CHECK-NEXT:   $x0 = COPY [[OR]](s64)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $x0
+  ;
+  ; O3-LABEL: name: ori64
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $x0, $x1
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+  ; O3-NEXT:   [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
+  ; O3-NEXT:   [[OR:%[0-9]+]]:_(s64) = G_OR [[COPY]], [[COPY1]]
+  ; O3-NEXT:   $x0 = COPY [[OR]](s64)
+  ; O3-NEXT:   RET_ReallyLR implicit $x0
   %res = or i64 %arg1, %arg2
   ret i64 %res
 }
@@ -181,6 +369,25 @@ define i64 @ori64(i64 %arg1, i64 %arg2) {
 ; CHECK-NEXT: $w0 = COPY [[RES]]
 ; CHECK-NEXT: RET_ReallyLR implicit $w0
 define i32 @ori32(i32 %arg1, i32 %arg2) {
+  ; CHECK-LABEL: name: ori32
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $w0, $w1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY]], [[COPY1]]
+  ; CHECK-NEXT:   $w0 = COPY [[OR]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $w0
+  ;
+  ; O3-LABEL: name: ori32
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $w0, $w1
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+  ; O3-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+  ; O3-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY]], [[COPY1]]
+  ; O3-NEXT:   $w0 = COPY [[OR]](s32)
+  ; O3-NEXT:   RET_ReallyLR implicit $w0
   %res = or i32 %arg1, %arg2
   ret i32 %res
 }
@@ -193,6 +400,25 @@ define i32 @ori32(i32 %arg1, i32 %arg2) {
 ; CHECK-NEXT: $x0 = COPY [[RES]]
 ; CHECK-NEXT: RET_ReallyLR implicit $x0
 define i64 @xori64(i64 %arg1, i64 %arg2) {
+  ; CHECK-LABEL: name: xori64
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $x0, $x1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
+  ; CHECK-NEXT:   [[XOR:%[0-9]+]]:_(s64) = G_XOR [[COPY]], [[COPY1]]
+  ; CHECK-NEXT:   $x0 = COPY [[XOR]](s64)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $x0
+  ;
+  ; O3-LABEL: name: xori64
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $x0, $x1
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+  ; O3-NEXT:   [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
+  ; O3-NEXT:   [[XOR:%[0-9]+]]:_(s64) = G_XOR [[COPY]], [[COPY1]]
+  ; O3-NEXT:   $x0 = COPY [[XOR]](s64)
+  ; O3-NEXT:   RET_ReallyLR implicit $x0
   %res = xor i64 %arg1, %arg2
   ret i64 %res
 }
@@ -204,6 +430,25 @@ define i64 @xori64(i64 %arg1, i64 %arg2) {
 ; CHECK-NEXT: $w0 = COPY [[RES]]
 ; CHECK-NEXT: RET_ReallyLR implicit $w0
 define i32 @xori32(i32 %arg1, i32 %arg2) {
+  ; CHECK-LABEL: name: xori32
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $w0, $w1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+  ; CHECK-NEXT:   [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY]], [[COPY1]]
+  ; CHECK-NEXT:   $w0 = COPY [[XOR]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $w0
+  ;
+  ; O3-LABEL: name: xori32
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $w0, $w1
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+  ; O3-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+  ; O3-NEXT:   [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY]], [[COPY1]]
+  ; O3-NEXT:   $w0 = COPY [[XOR]](s32)
+  ; O3-NEXT:   RET_ReallyLR implicit $w0
   %res = xor i32 %arg1, %arg2
   ret i32 %res
 }
@@ -216,6 +461,25 @@ define i32 @xori32(i32 %arg1, i32 %arg2) {
 ; CHECK-NEXT: $x0 = COPY [[RES]]
 ; CHECK-NEXT: RET_ReallyLR implicit $x0
 define i64 @andi64(i64 %arg1, i64 %arg2) {
+  ; CHECK-LABEL: name: andi64
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $x0, $x1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
+  ; CHECK-NEXT:   [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[COPY1]]
+  ; CHECK-NEXT:   $x0 = COPY [[AND]](s64)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $x0
+  ;
+  ; O3-LABEL: name: andi64
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $x0, $x1
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+  ; O3-NEXT:   [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
+  ; O3-NEXT:   [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[COPY1]]
+  ; O3-NEXT:   $x0 = COPY [[AND]](s64)
+  ; O3-NEXT:   RET_ReallyLR implicit $x0
   %res = and i64 %arg1, %arg2
   ret i64 %res
 }
@@ -227,6 +491,25 @@ define i64 @andi64(i64 %arg1, i64 %arg2) {
 ; CHECK-NEXT: $w0 = COPY [[RES]]
 ; CHECK-NEXT: RET_ReallyLR implicit $w0
 define i32 @andi32(i32 %arg1, i32 %arg2) {
+  ; CHECK-LABEL: name: andi32
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $w0, $w1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+  ; CHECK-NEXT:   [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[COPY1]]
+  ; CHECK-NEXT:   $w0 = COPY [[AND]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $w0
+  ;
+  ; O3-LABEL: name: andi32
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $w0, $w1
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+  ; O3-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+  ; O3-NEXT:   [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[COPY1]]
+  ; O3-NEXT:   $w0 = COPY [[AND]](s32)
+  ; O3-NEXT:   RET_ReallyLR implicit $w0
   %res = and i32 %arg1, %arg2
   ret i32 %res
 }
@@ -239,6 +522,25 @@ define i32 @andi32(i32 %arg1, i32 %arg2) {
 ; CHECK-NEXT: $x0 = COPY [[RES]]
 ; CHECK-NEXT: RET_ReallyLR implicit $x0
 define i64 @subi64(i64 %arg1, i64 %arg2) {
+  ; CHECK-LABEL: name: subi64
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $x0, $x1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
+  ; CHECK-NEXT:   [[SUB:%[0-9]+]]:_(s64) = G_SUB [[COPY]], [[COPY1]]
+  ; CHECK-NEXT:   $x0 = COPY [[SUB]](s64)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $x0
+  ;
+  ; O3-LABEL: name: subi64
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $x0, $x1
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+  ; O3-NEXT:   [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
+  ; O3-NEXT:   [[SUB:%[0-9]+]]:_(s64) = G_SUB [[COPY]], [[COPY1]]
+  ; O3-NEXT:   $x0 = COPY [[SUB]](s64)
+  ; O3-NEXT:   RET_ReallyLR implicit $x0
   %res = sub i64 %arg1, %arg2
   ret i64 %res
 }
@@ -250,6 +552,25 @@ define i64 @subi64(i64 %arg1, i64 %arg2) {
 ; CHECK-NEXT: $w0 = COPY [[RES]]
 ; CHECK-NEXT: RET_ReallyLR implicit $w0
 define i32 @subi32(i32 %arg1, i32 %arg2) {
+  ; CHECK-LABEL: name: subi32
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $w0, $w1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+  ; CHECK-NEXT:   [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[COPY1]]
+  ; CHECK-NEXT:   $w0 = COPY [[SUB]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $w0
+  ;
+  ; O3-LABEL: name: subi32
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $w0, $w1
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+  ; O3-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+  ; O3-NEXT:   [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[COPY1]]
+  ; O3-NEXT:   $w0 = COPY [[SUB]](s32)
+  ; O3-NEXT:   RET_ReallyLR implicit $w0
   %res = sub i32 %arg1, %arg2
   ret i32 %res
 }
@@ -260,6 +581,23 @@ define i32 @subi32(i32 %arg1, i32 %arg2) {
 ; CHECK: $x0 = COPY [[RES]]
 ; CHECK: RET_ReallyLR implicit $x0
 define i64 @ptrtoint(ptr %a) {
+  ; CHECK-LABEL: name: ptrtoint
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $x0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; CHECK-NEXT:   [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[COPY]](p0)
+  ; CHECK-NEXT:   $x0 = COPY [[PTRTOINT]](s64)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $x0
+  ;
+  ; O3-LABEL: name: ptrtoint
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $x0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; O3-NEXT:   [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[COPY]](p0)
+  ; O3-NEXT:   $x0 = COPY [[PTRTOINT]](s64)
+  ; O3-NEXT:   RET_ReallyLR implicit $x0
   %val = ptrtoint ptr %a to i64
   ret i64 %val
 }
@@ -270,6 +608,23 @@ define i64 @ptrtoint(ptr %a) {
 ; CHECK: $x0 = COPY [[RES]]
 ; CHECK: RET_ReallyLR implicit $x0
 define ptr @inttoptr(i64 %a) {
+  ; CHECK-LABEL: name: inttoptr
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $x0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+  ; CHECK-NEXT:   [[INTTOPTR:%[0-9]+]]:_(p0) = G_INTTOPTR [[COPY]](s64)
+  ; CHECK-NEXT:   $x0 = COPY [[INTTOPTR]](p0)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $x0
+  ;
+  ; O3-LABEL: name: inttoptr
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $x0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+  ; O3-NEXT:   [[INTTOPTR:%[0-9]+]]:_(p0) = G_INTTOPTR [[COPY]](s64)
+  ; O3-NEXT:   $x0 = COPY [[INTTOPTR]](p0)
+  ; O3-NEXT:   RET_ReallyLR implicit $x0
   %val = inttoptr i64 %a to ptr
   ret ptr %val
 }
@@ -279,6 +634,21 @@ define ptr @inttoptr(i64 %a) {
 ; CHECK: $x0 = COPY [[ARG1]]
 ; CHECK: RET_ReallyLR implicit $x0
 define ptr @trivial_bitcast(ptr %a) {
+  ; CHECK-LABEL: name: trivial_bitcast
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $x0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; CHECK-NEXT:   $x0 = COPY [[COPY]](p0)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $x0
+  ;
+  ; O3-LABEL: name: trivial_bitcast
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $x0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; O3-NEXT:   $x0 = COPY [[COPY]](p0)
+  ; O3-NEXT:   RET_ReallyLR implicit $x0
   ret ptr %a
 }
 
@@ -292,6 +662,39 @@ define ptr @trivial_bitcast(ptr %a) {
 ; CHECK: [[CAST]].{{[a-zA-Z0-9.]+}}:
 ; CHECK:     G_BR %[[END]]
 define i64 @trivial_bitcast_with_copy(double %a) {
+  ; CHECK-LABEL: name: trivial_bitcast_with_copy
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   successors: %bb.3(0x80000000)
+  ; CHECK-NEXT:   liveins: $d0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s64) = COPY $d0
+  ; CHECK-NEXT:   G_BR %bb.3
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2.end:
+  ; CHECK-NEXT:   $x0 = COPY [[COPY]](s64)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $x0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.3.cast:
+  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   G_BR %bb.2
+  ;
+  ; O3-LABEL: name: trivial_bitcast_with_copy
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   successors: %bb.3(0x80000000)
+  ; O3-NEXT:   liveins: $d0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(s64) = COPY $d0
+  ; O3-NEXT:   G_BR %bb.3
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT: bb.2.end:
+  ; O3-NEXT:   $x0 = COPY [[COPY]](s64)
+  ; O3-NEXT:   RET_ReallyLR implicit $x0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT: bb.3.cast:
+  ; O3-NEXT:   successors: %bb.2(0x80000000)
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   G_BR %bb.2
   br label %cast
 
 end:
@@ -309,6 +712,25 @@ cast:
 ; CHECK: $x0 = COPY [[RES2]]
 ; CHECK: RET_ReallyLR implicit $x0
 define i64 @bitcast(i64 %a) {
+  ; CHECK-LABEL: name: bitcast
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $x0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+  ; CHECK-NEXT:   [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]](s64)
+  ; CHECK-NEXT:   [[BITCAST1:%[0-9]+]]:_(s64) = G_BITCAST [[BITCAST]](<2 x s32>)
+  ; CHECK-NEXT:   $x0 = COPY [[BITCAST1]](s64)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $x0
+  ;
+  ; O3-LABEL: name: bitcast
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $x0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+  ; O3-NEXT:   [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]](s64)
+  ; O3-NEXT:   [[BITCAST1:%[0-9]+]]:_(s64) = G_BITCAST [[BITCAST]](<2 x s32>)
+  ; O3-NEXT:   $x0 = COPY [[BITCAST1]](s64)
+  ; O3-NEXT:   RET_ReallyLR implicit $x0
   %res1 = bitcast i64 %a to <2 x i32>
   %res2 = bitcast <2 x i32> %res1 to i64
   ret i64 %res2
@@ -321,6 +743,25 @@ define i64 @bitcast(i64 %a) {
 ; CHECK: $x0 = COPY [[RES2]]
 ; CHECK: RET_ReallyLR implicit $x0
 define ptr @addrspacecast(ptr addrspace(1) %a) {
+  ; CHECK-LABEL: name: addrspacecast
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $x0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p1) = COPY $x0
+  ; CHECK-NEXT:   [[ADDRSPACE_CAST:%[0-9]+]]:_(p2) = G_ADDRSPACE_CAST [[COPY]](p1)
+  ; CHECK-NEXT:   [[ADDRSPACE_CAST1:%[0-9]+]]:_(p0) = G_ADDRSPACE_CAST [[ADDRSPACE_CAST]](p2)
+  ; CHECK-NEXT:   $x0 = COPY [[ADDRSPACE_CAST1]](p0)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $x0
+  ;
+  ; O3-LABEL: name: addrspacecast
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $x0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(p1) = COPY $x0
+  ; O3-NEXT:   [[ADDRSPACE_CAST:%[0-9]+]]:_(p2) = G_ADDRSPACE_CAST [[COPY]](p1)
+  ; O3-NEXT:   [[ADDRSPACE_CAST1:%[0-9]+]]:_(p0) = G_ADDRSPACE_CAST [[ADDRSPACE_CAST]](p2)
+  ; O3-NEXT:   $x0 = COPY [[ADDRSPACE_CAST1]](p0)
+  ; O3-NEXT:   RET_ReallyLR implicit $x0
   %res1 = addrspacecast ptr addrspace(1) %a to ptr addrspace(2)
   %res2 = addrspacecast ptr addrspace(2) %res1 to ptr
   ret ptr %res2
@@ -332,6 +773,27 @@ define ptr @addrspacecast(ptr addrspace(1) %a) {
 ; CHECK: [[RES1:%[0-9]+]]:_(s8) = G_TRUNC [[ARG1]]
 ; CHECK: [[RES2:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[VEC]]
 define void @trunc(i64 %a) {
+  ; CHECK-LABEL: name: trunc
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $x0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+  ; CHECK-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.vecptr
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load (<4 x s32>) from %ir.vecptr)
+  ; CHECK-NEXT:   [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s64)
+  ; CHECK-NEXT:   [[TRUNC1:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[LOAD]](<4 x s32>)
+  ; CHECK-NEXT:   RET_ReallyLR
+  ;
+  ; O3-LABEL: name: trunc
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $x0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+  ; O3-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.vecptr
+  ; O3-NEXT:   [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load (<4 x s32>) from %ir.vecptr)
+  ; O3-NEXT:   [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s64)
+  ; O3-NEXT:   [[TRUNC1:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[LOAD]](<4 x s32>)
+  ; O3-NEXT:   RET_ReallyLR
   %vecptr = alloca <4 x i32>
   %vec = load <4 x i32>, ptr %vecptr
   %res1 = trunc i64 %a to i8
@@ -352,6 +814,37 @@ define void @trunc(i64 %a) {
 ; CHECK: $x0 = COPY [[SUM4]]
 ; CHECK: RET_ReallyLR implicit $x0
 define i64 @load(ptr %addr, ptr addrspace(42) %addr42) {
+  ; CHECK-LABEL: name: load
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $x0, $x1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(p42) = COPY $x1
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load (s64) from %ir.addr, align 16)
+  ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[COPY1]](p42) :: (load (s64) from %ir.addr42, addrspace 42)
+  ; CHECK-NEXT:   [[ADD:%[0-9]+]]:_(s64) = G_ADD [[LOAD]], [[LOAD1]]
+  ; CHECK-NEXT:   [[LOAD2:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (volatile load (s64) from %ir.addr)
+  ; CHECK-NEXT:   [[ADD1:%[0-9]+]]:_(s64) = G_ADD [[ADD]], [[LOAD2]]
+  ; CHECK-NEXT:   [[LOAD3:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load (s64) from %ir.addr, !range !0)
+  ; CHECK-NEXT:   [[ADD2:%[0-9]+]]:_(s64) = G_ADD [[ADD1]], [[LOAD3]]
+  ; CHECK-NEXT:   $x0 = COPY [[ADD2]](s64)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $x0
+  ;
+  ; O3-LABEL: name: load
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $x0, $x1
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; O3-NEXT:   [[COPY1:%[0-9]+]]:_(p42) = COPY $x1
+  ; O3-NEXT:   [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load (s64) from %ir.addr, align 16)
+  ; O3-NEXT:   [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[COPY1]](p42) :: (load (s64) from %ir.addr42, addrspace 42)
+  ; O3-NEXT:   [[ADD:%[0-9]+]]:_(s64) = G_ADD [[LOAD]], [[LOAD1]]
+  ; O3-NEXT:   [[LOAD2:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (volatile load (s64) from %ir.addr)
+  ; O3-NEXT:   [[ADD1:%[0-9]+]]:_(s64) = G_ADD [[ADD]], [[LOAD2]]
+  ; O3-NEXT:   [[LOAD3:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load (s64) from %ir.addr, !range !0)
+  ; O3-NEXT:   [[ADD2:%[0-9]+]]:_(s64) = G_ADD [[ADD1]], [[LOAD3]]
+  ; O3-NEXT:   $x0 = COPY [[ADD2]](s64)
+  ; O3-NEXT:   RET_ReallyLR implicit $x0
   %val1 = load i64, ptr %addr, align 16
 
   %val2 = load i64, ptr addrspace(42) %addr42
@@ -375,6 +868,33 @@ define i64 @load(ptr %addr, ptr addrspace(42) %addr42) {
 ; CHECK: G_STORE [[VAL1]](s64), [[ADDR]](p0) :: (volatile store (s64) into %ir.addr)
 ; CHECK: RET_ReallyLR
 define void @store(ptr %addr, ptr addrspace(42) %addr42, i64 %val1, i64 %val2) {
+  ; CHECK-LABEL: name: store
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $x0, $x1, $x2, $x3
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(p42) = COPY $x1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s64) = COPY $x2
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(s64) = COPY $x3
+  ; CHECK-NEXT:   G_STORE [[COPY2]](s64), [[COPY]](p0) :: (store (s64) into %ir.addr, align 16)
+  ; CHECK-NEXT:   G_STORE [[COPY3]](s64), [[COPY1]](p42) :: (store (s64) into %ir.addr42, addrspace 42)
+  ; CHECK-NEXT:   G_STORE [[COPY2]](s64), [[COPY]](p0) :: (volatile store (s64) into %ir.addr)
+  ; CHECK-NEXT:   [[ADD:%[0-9]+]]:_(s64) = G_ADD [[COPY2]], [[COPY3]]
+  ; CHECK-NEXT:   RET_ReallyLR
+  ;
+  ; O3-LABEL: name: store
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $x0, $x1, $x2, $x3
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; O3-NEXT:   [[COPY1:%[0-9]+]]:_(p42) = COPY $x1
+  ; O3-NEXT:   [[COPY2:%[0-9]+]]:_(s64) = COPY $x2
+  ; O3-NEXT:   [[COPY3:%[0-9]+]]:_(s64) = COPY $x3
+  ; O3-NEXT:   G_STORE [[COPY2]](s64), [[COPY]](p0) :: (store (s64) into %ir.addr, align 16)
+  ; O3-NEXT:   G_STORE [[COPY3]](s64), [[COPY1]](p42) :: (store (s64) into %ir.addr42, addrspace 42)
+  ; O3-NEXT:   G_STORE [[COPY2]](s64), [[COPY]](p0) :: (volatile store (s64) into %ir.addr)
+  ; O3-NEXT:   [[ADD:%[0-9]+]]:_(s64) = G_ADD [[COPY2]], [[COPY3]]
+  ; O3-NEXT:   RET_ReallyLR
   store i64 %val1, ptr %addr, align 16
   store i64 %val2, ptr addrspace(42) %addr42
   store volatile i64 %val1, ptr %addr
@@ -394,6 +914,29 @@ declare ptr @llvm.returnaddress(i32)
 declare void @llvm.aarch64.neon.st2.v8i8.p0(<8 x i8>, <8 x i8>, ptr)
 declare { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0(ptr)
 define void @intrinsics(i32 %cur, i32 %bits) {
+  ; CHECK-LABEL: name: intrinsics
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $w0, $w1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+  ; CHECK-NEXT:   [[INT:%[0-9]+]]:_(p0) = G_INTRINSIC intrinsic(@llvm.returnaddress), 0
+  ; CHECK-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.ptr.vec
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load (<8 x s8>) from %ir.ptr.vec)
+  ; CHECK-NEXT:   G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.neon.st2), [[LOAD]](<8 x s8>), [[LOAD]](<8 x s8>), [[INT]](p0) :: (store (<2 x s64>) into %ir.ptr)
+  ; CHECK-NEXT:   RET_ReallyLR
+  ;
+  ; O3-LABEL: name: intrinsics
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $w0, $w1
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+  ; O3-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+  ; O3-NEXT:   [[INT:%[0-9]+]]:_(p0) = G_INTRINSIC intrinsic(@llvm.returnaddress), 0
+  ; O3-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.ptr.vec
+  ; O3-NEXT:   [[LOAD:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load (<8 x s8>) from %ir.ptr.vec)
+  ; O3-NEXT:   G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.neon.st2), [[LOAD]](<8 x s8>), [[LOAD]](<8 x s8>), [[INT]](p0) :: (store (<2 x s64>) into %ir.ptr)
+  ; O3-NEXT:   RET_ReallyLR
   %ptr = call ptr @llvm.returnaddress(i32 0)
   %ptr.vec = alloca <8 x i8>
   %vec = load <8 x i8>, ptr %ptr.vec
@@ -414,6 +957,66 @@ define void @intrinsics(i32 %cur, i32 %bits) {
 ; CHECK:     [[RES:%[0-9]+]]:_(s32) = G_PHI [[RES1]](s32), %[[TRUE]], [[RES2]](s32), %[[FALSE]]
 ; CHECK:     $w0 = COPY [[RES]]
 define i32 @test_phi(ptr %addr1, ptr %addr2, i1 %tst) {
+  ; CHECK-LABEL: name: test_phi
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   successors: %bb.2(0x40000000), %bb.3(0x40000000)
+  ; CHECK-NEXT:   liveins: $w2, $x0, $x1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $w2
+  ; CHECK-NEXT:   [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY2]](s32)
+  ; CHECK-NEXT:   [[ASSERT_ZEXT:%[0-9]+]]:_(s8) = G_ASSERT_ZEXT [[TRUNC]], 1
+  ; CHECK-NEXT:   [[TRUNC1:%[0-9]+]]:_(s1) = G_TRUNC [[ASSERT_ZEXT]](s8)
+  ; CHECK-NEXT:   G_BRCOND [[TRUNC1]](s1), %bb.2
+  ; CHECK-NEXT:   G_BR %bb.3
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2.true:
+  ; CHECK-NEXT:   successors: %bb.4(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32) from %ir.addr1)
+  ; CHECK-NEXT:   G_BR %bb.4
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.3.false:
+  ; CHECK-NEXT:   successors: %bb.4(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[COPY1]](p0) :: (load (s32) from %ir.addr2)
+  ; CHECK-NEXT:   G_BR %bb.4
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.4.end:
+  ; CHECK-NEXT:   [[PHI:%[0-9]+]]:_(s32) = G_PHI [[LOAD]](s32), %bb.2, [[LOAD1]](s32), %bb.3
+  ; CHECK-NEXT:   $w0 = COPY [[PHI]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $w0
+  ;
+  ; O3-LABEL: name: test_phi
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   successors: %bb.2(0x40000000), %bb.3(0x40000000)
+  ; O3-NEXT:   liveins: $w2, $x0, $x1
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; O3-NEXT:   [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
+  ; O3-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $w2
+  ; O3-NEXT:   [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY2]](s32)
+  ; O3-NEXT:   [[ASSERT_ZEXT:%[0-9]+]]:_(s8) = G_ASSERT_ZEXT [[TRUNC]], 1
+  ; O3-NEXT:   [[TRUNC1:%[0-9]+]]:_(s1) = G_TRUNC [[ASSERT_ZEXT]](s8)
+  ; O3-NEXT:   G_BRCOND [[TRUNC1]](s1), %bb.2
+  ; O3-NEXT:   G_BR %bb.3
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT: bb.2.true:
+  ; O3-NEXT:   successors: %bb.4(0x80000000)
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32) from %ir.addr1)
+  ; O3-NEXT:   G_BR %bb.4
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT: bb.3.false:
+  ; O3-NEXT:   successors: %bb.4(0x80000000)
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[COPY1]](p0) :: (load (s32) from %ir.addr2)
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT: bb.4.end:
+  ; O3-NEXT:   [[PHI:%[0-9]+]]:_(s32) = G_PHI [[LOAD]](s32), %bb.2, [[LOAD1]](s32), %bb.3
+  ; O3-NEXT:   $w0 = COPY [[PHI]](s32)
+  ; O3-NEXT:   RET_ReallyLR implicit $w0
   br i1 %tst, label %true, label %false
 
 true:
@@ -434,6 +1037,19 @@ end:
 ; CHECK-NEXT: {{^$}}
 ; CHECK-NEXT: ...
 define void @unreachable(i32 %a) {
+  ; CHECK-LABEL: name: unreachable
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $w0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+  ; CHECK-NEXT:   [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY]]
+  ;
+  ; O3-LABEL: name: unreachable
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $w0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+  ; O3-NEXT:   [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY]]
   %sum = add i32 %a, %a
   unreachable
 }
@@ -451,6 +1067,36 @@ define void @unreachable(i32 %a) {
 ; CHECK: $w0 = COPY [[RES]]
 
 define i32 @constant_int(i32 %in) {
+  ; CHECK-LABEL: name: constant_int
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
+  ; CHECK-NEXT:   liveins: $w0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+  ; CHECK-NEXT:   G_BR %bb.2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2.next:
+  ; CHECK-NEXT:   [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[C]]
+  ; CHECK-NEXT:   [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[C]]
+  ; CHECK-NEXT:   [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[ADD1]]
+  ; CHECK-NEXT:   $w0 = COPY [[ADD2]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $w0
+  ;
+  ; O3-LABEL: name: constant_int
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   successors: %bb.2(0x80000000)
+  ; O3-NEXT:   liveins: $w0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+  ; O3-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT: bb.2.next:
+  ; O3-NEXT:   [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[C]]
+  ; O3-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY [[ADD]](s32)
+  ; O3-NEXT:   [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[COPY1]]
+  ; O3-NEXT:   $w0 = COPY [[ADD1]](s32)
+  ; O3-NEXT:   RET_ReallyLR implicit $w0
   br label %next
 
 next:
@@ -465,6 +1111,21 @@ next:
 ; CHECK: [[ANSWER:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
 ; CHECK: [[RES:%[0-9]+]]:_(s32) = G_CONSTANT i32 44
 define i32 @constant_int_start() {
+  ; CHECK-LABEL: name: constant_int_start
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 44
+  ; CHECK-NEXT:   $w0 = COPY [[C2]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $w0
+  ;
+  ; O3-LABEL: name: constant_int_start
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+  ; O3-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
+  ; O3-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 44
+  ; O3-NEXT:   $w0 = COPY [[C2]](s32)
+  ; O3-NEXT:   RET_ReallyLR implicit $w0
   %res = add i32 2, 42
   ret i32 %res
 }
@@ -473,6 +1134,17 @@ define i32 @constant_int_start() {
 ; CHECK: [[UNDEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
 ; CHECK: $w0 = COPY [[UNDEF]]
 define i32 @test_undef() {
+  ; CHECK-LABEL: name: test_undef
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   $w0 = COPY [[DEF]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $w0
+  ;
+  ; O3-LABEL: name: test_undef
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; O3-NEXT:   $w0 = COPY [[DEF]](s32)
+  ; O3-NEXT:   RET_ReallyLR implicit $w0
   ret i32 undef
 }
 
@@ -481,6 +1153,19 @@ define i32 @test_undef() {
 ; CHECK: [[PTR:%[0-9]+]]:_(p0) = G_INTTOPTR [[ONE]]
 ; CHECK: $x0 = COPY [[PTR]]
 define ptr @test_constant_inttoptr() {
+  ; CHECK-LABEL: name: test_constant_inttoptr
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+  ; CHECK-NEXT:   [[INTTOPTR:%[0-9]+]]:_(p0) = G_INTTOPTR [[C]](s64)
+  ; CHECK-NEXT:   $x0 = COPY [[INTTOPTR]](p0)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $x0
+  ;
+  ; O3-LABEL: name: test_constant_inttoptr
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+  ; O3-NEXT:   [[INTTOPTR:%[0-9]+]]:_(p0) = G_INTTOPTR [[C]](s64)
+  ; O3-NEXT:   $x0 = COPY [[INTTOPTR]](p0)
+  ; O3-NEXT:   RET_ReallyLR implicit $x0
   ret ptr inttoptr(i64 1 to ptr)
 }
 
@@ -490,6 +1175,17 @@ define ptr @test_constant_inttoptr() {
 ; CHECK: [[ONE:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
 ; CHECK: $x0 = COPY [[ONE]]
 define i64 @test_reused_constant() {
+  ; CHECK-LABEL: name: test_reused_constant
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+  ; CHECK-NEXT:   $x0 = COPY [[C]](s64)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $x0
+  ;
+  ; O3-LABEL: name: test_reused_constant
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+  ; O3-NEXT:   $x0 = COPY [[C]](s64)
+  ; O3-NEXT:   RET_ReallyLR implicit $x0
   ret i64 1
 }
 
@@ -498,6 +1194,23 @@ define i64 @test_reused_constant() {
 ; CHECK: [[RES:%[0-9]+]]:_(s64) = G_SEXT [[IN]]
 ; CHECK: $x0 = COPY [[RES]]
 define i64 @test_sext(i32 %in) {
+  ; CHECK-LABEL: name: test_sext
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $w0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+  ; CHECK-NEXT:   [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[COPY]](s32)
+  ; CHECK-NEXT:   $x0 = COPY [[SEXT]](s64)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $x0
+  ;
+  ; O3-LABEL: name: test_sext
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $w0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+  ; O3-NEXT:   [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[COPY]](s32)
+  ; O3-NEXT:   $x0 = COPY [[SEXT]](s64)
+  ; O3-NEXT:   RET_ReallyLR implicit $x0
   %res = sext i32 %in to i64
   ret i64 %res
 }
@@ -507,6 +1220,23 @@ define i64 @test_sext(i32 %in) {
 ; CHECK: [[RES:%[0-9]+]]:_(s64) = G_ZEXT [[IN]]
 ; CHECK: $x0 = COPY [[RES]]
 define i64 @test_zext(i32 %in) {
+  ; CHECK-LABEL: name: test_zext
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $w0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+  ; CHECK-NEXT:   [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY]](s32)
+  ; CHECK-NEXT:   $x0 = COPY [[ZEXT]](s64)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $x0
+  ;
+  ; O3-LABEL: name: test_zext
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $w0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+  ; O3-NEXT:   [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY]](s32)
+  ; O3-NEXT:   $x0 = COPY [[ZEXT]](s64)
+  ; O3-NEXT:   RET_ReallyLR implicit $x0
   %res = zext i32 %in to i64
   ret i64 %res
 }
@@ -518,6 +1248,25 @@ define i64 @test_zext(i32 %in) {
 ; CHECK-NEXT: $w0 = COPY [[RES]]
 ; CHECK-NEXT: RET_ReallyLR implicit $w0
 define i32 @test_shl(i32 %arg1, i32 %arg2) {
+  ; CHECK-LABEL: name: test_shl
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $w0, $w1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY1]](s32)
+  ; CHECK-NEXT:   $w0 = COPY [[SHL]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $w0
+  ;
+  ; O3-LABEL: name: test_shl
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $w0, $w1
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+  ; O3-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+  ; O3-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY1]](s32)
+  ; O3-NEXT:   $w0 = COPY [[SHL]](s32)
+  ; O3-NEXT:   RET_ReallyLR implicit $w0
   %res = shl i32 %arg1, %arg2
   ret i32 %res
 }
@@ -530,6 +1279,25 @@ define i32 @test_shl(i32 %arg1, i32 %arg2) {
 ; CHECK-NEXT: $w0 = COPY [[RES]]
 ; CHECK-NEXT: RET_ReallyLR implicit $w0
 define i32 @test_lshr(i32 %arg1, i32 %arg2) {
+  ; CHECK-LABEL: name: test_lshr
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $w0, $w1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+  ; CHECK-NEXT:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[COPY1]](s32)
+  ; CHECK-NEXT:   $w0 = COPY [[LSHR]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $w0
+  ;
+  ; O3-LABEL: name: test_lshr
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $w0, $w1
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+  ; O3-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+  ; O3-NEXT:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[COPY1]](s32)
+  ; O3-NEXT:   $w0 = COPY [[LSHR]](s32)
+  ; O3-NEXT:   RET_ReallyLR implicit $w0
   %res = lshr i32 %arg1, %arg2
   ret i32 %res
 }
@@ -541,6 +1309,25 @@ define i32 @test_lshr(i32 %arg1, i32 %arg2) {
 ; CHECK-NEXT: $w0 = COPY [[RES]]
 ; CHECK-NEXT: RET_ReallyLR implicit $w0
 define i32 @test_ashr(i32 %arg1, i32 %arg2) {
+  ; CHECK-LABEL: name: test_ashr
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $w0, $w1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+  ; CHECK-NEXT:   [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[COPY1]](s32)
+  ; CHECK-NEXT:   $w0 = COPY [[ASHR]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $w0
+  ;
+  ; O3-LABEL: name: test_ashr
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $w0, $w1
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+  ; O3-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+  ; O3-NEXT:   [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[COPY1]](s32)
+  ; O3-NEXT:   $w0 = COPY [[ASHR]](s32)
+  ; O3-NEXT:   RET_ReallyLR implicit $w0
   %res = ashr i32 %arg1, %arg2
   ret i32 %res
 }
@@ -552,6 +1339,25 @@ define i32 @test_ashr(i32 %arg1, i32 %arg2) {
 ; CHECK-NEXT: $w0 = COPY [[RES]]
 ; CHECK-NEXT: RET_ReallyLR implicit $w0
 define i32 @test_sdiv(i32 %arg1, i32 %arg2) {
+  ; CHECK-LABEL: name: test_sdiv
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $w0, $w1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+  ; CHECK-NEXT:   [[SDIV:%[0-9]+]]:_(s32) = G_SDIV [[COPY]], [[COPY1]]
+  ; CHECK-NEXT:   $w0 = COPY [[SDIV]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $w0
+  ;
+  ; O3-LABEL: name: test_sdiv
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $w0, $w1
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+  ; O3-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+  ; O3-NEXT:   [[SDIV:%[0-9]+]]:_(s32) = G_SDIV [[COPY]], [[COPY1]]
+  ; O3-NEXT:   $w0 = COPY [[SDIV]](s32)
+  ; O3-NEXT:   RET_ReallyLR implicit $w0
   %res = sdiv i32 %arg1, %arg2
   ret i32 %res
 }
@@ -563,6 +1369,25 @@ define i32 @test_sdiv(i32 %arg1, i32 %arg2) {
 ; CHECK-NEXT: $w0 = COPY [[RES]]
 ; CHECK-NEXT: RET_ReallyLR implicit $w0
 define i32 @test_udiv(i32 %arg1, i32 %arg2) {
+  ; CHECK-LABEL: name: test_udiv
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $w0, $w1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+  ; CHECK-NEXT:   [[UDIV:%[0-9]+]]:_(s32) = G_UDIV [[COPY]], [[COPY1]]
+  ; CHECK-NEXT:   $w0 = COPY [[UDIV]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $w0
+  ;
+  ; O3-LABEL: name: test_udiv
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $w0, $w1
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+  ; O3-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+  ; O3-NEXT:   [[UDIV:%[0-9]+]]:_(s32) = G_UDIV [[COPY]], [[COPY1]]
+  ; O3-NEXT:   $w0 = COPY [[UDIV]](s32)
+  ; O3-NEXT:   RET_ReallyLR implicit $w0
   %res = udiv i32 %arg1, %arg2
   ret i32 %res
 }
@@ -574,6 +1399,25 @@ define i32 @test_udiv(i32 %arg1, i32 %arg2) {
 ; CHECK-NEXT: $w0 = COPY [[RES]]
 ; CHECK-NEXT: RET_ReallyLR implicit $w0
 define i32 @test_srem(i32 %arg1, i32 %arg2) {
+  ; CHECK-LABEL: name: test_srem
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $w0, $w1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+  ; CHECK-NEXT:   [[SREM:%[0-9]+]]:_(s32) = G_SREM [[COPY]], [[COPY1]]
+  ; CHECK-NEXT:   $w0 = COPY [[SREM]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $w0
+  ;
+  ; O3-LABEL: name: test_srem
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $w0, $w1
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+  ; O3-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+  ; O3-NEXT:   [[SREM:%[0-9]+]]:_(s32) = G_SREM [[COPY]], [[COPY1]]
+  ; O3-NEXT:   $w0 = COPY [[SREM]](s32)
+  ; O3-NEXT:   RET_ReallyLR implicit $w0
   %res = srem i32 %arg1, %arg2
   ret i32 %res
 }
@@ -585,6 +1429,25 @@ define i32 @test_srem(i32 %arg1, i32 %arg2) {
 ; CHECK-NEXT: $w0 = COPY [[RES]]
 ; CHECK-NEXT: RET_ReallyLR implicit $w0
 define i32 @test_urem(i32 %arg1, i32 %arg2) {
+  ; CHECK-LABEL: name: test_urem
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $w0, $w1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+  ; CHECK-NEXT:   [[UREM:%[0-9]+]]:_(s32) = G_UREM [[COPY]], [[COPY1]]
+  ; CHECK-NEXT:   $w0 = COPY [[UREM]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $w0
+  ;
+  ; O3-LABEL: name: test_urem
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $w0, $w1
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+  ; O3-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+  ; O3-NEXT:   [[UREM:%[0-9]+]]:_(s32) = G_UREM [[COPY]], [[COPY1]]
+  ; O3-NEXT:   $w0 = COPY [[UREM]](s32)
+  ; O3-NEXT:   RET_ReallyLR implicit $w0
   %res = urem i32 %arg1, %arg2
   ret i32 %res
 }
@@ -593,6 +1456,17 @@ define i32 @test_urem(i32 %arg1, i32 %arg2) {
 ; CHECK: [[NULL:%[0-9]+]]:_(p0) = G_CONSTANT i64 0
 ; CHECK: $x0 = COPY [[NULL]]
 define ptr @test_constant_null() {
+  ; CHECK-LABEL: name: test_constant_null
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(p0) = G_CONSTANT i64 0
+  ; CHECK-NEXT:   $x0 = COPY [[C]](p0)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $x0
+  ;
+  ; O3-LABEL: name: test_constant_null
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   [[C:%[0-9]+]]:_(p0) = G_CONSTANT i64 0
+  ; O3-NEXT:   $x0 = COPY [[C]](p0)
+  ; O3-NEXT:   RET_ReallyLR implicit $x0
   ret ptr null
 }
 
@@ -606,6 +1480,33 @@ define ptr @test_constant_null() {
 ; CHECK: [[GEP2:%[0-9]+]]:_(p0) = G_PTR_ADD [[ADDR]], [[CST1]](s64)
 ; CHECK: G_STORE [[VAL2]](s32), [[GEP2]](p0) :: (store (s32) into %ir.addr + 4)
 define void @test_struct_memops(ptr %addr) {
+  ; CHECK-LABEL: name: test_struct_memops
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $x0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p0) :: (load (s8) from %ir.addr, align 4)
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from %ir.addr + 4)
+  ; CHECK-NEXT:   G_STORE [[LOAD]](s8), [[COPY]](p0) :: (store (s8) into %ir.addr, align 4)
+  ; CHECK-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; CHECK-NEXT:   G_STORE [[LOAD1]](s32), [[PTR_ADD1]](p0) :: (store (s32) into %ir.addr + 4)
+  ; CHECK-NEXT:   RET_ReallyLR
+  ;
+  ; O3-LABEL: name: test_struct_memops
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $x0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; O3-NEXT:   [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p0) :: (load (s8) from %ir.addr, align 4)
+  ; O3-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+  ; O3-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; O3-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from %ir.addr + 4)
+  ; O3-NEXT:   G_STORE [[LOAD]](s8), [[COPY]](p0) :: (store (s8) into %ir.addr, align 4)
+  ; O3-NEXT:   [[COPY1:%[0-9]+]]:_(p0) = COPY [[PTR_ADD]](p0)
+  ; O3-NEXT:   G_STORE [[LOAD1]](s32), [[COPY1]](p0) :: (store (s32) into %ir.addr + 4)
+  ; O3-NEXT:   RET_ReallyLR
   %val = load { i8, i32 }, ptr %addr
   store { i8, i32 } %val, ptr %addr
   ret void
@@ -616,6 +1517,23 @@ define void @test_struct_memops(ptr %addr) {
 ; CHECK: [[VAL:%[0-9]+]]:_(s1) = G_LOAD [[ADDR]](p0) :: (load (s1) from  %ir.addr)
 ; CHECK: G_STORE [[VAL]](s1), [[ADDR]](p0) :: (store (s1) into  %ir.addr)
 define void @test_i1_memops(ptr %addr) {
+  ; CHECK-LABEL: name: test_i1_memops
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $x0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[COPY]](p0) :: (load (s1) from %ir.addr)
+  ; CHECK-NEXT:   G_STORE [[LOAD]](s1), [[COPY]](p0) :: (store (s1) into %ir.addr)
+  ; CHECK-NEXT:   RET_ReallyLR
+  ;
+  ; O3-LABEL: name: test_i1_memops
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $x0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; O3-NEXT:   [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[COPY]](p0) :: (load (s1) from %ir.addr)
+  ; O3-NEXT:   G_STORE [[LOAD]](s1), [[COPY]](p0) :: (store (s1) into %ir.addr)
+  ; O3-NEXT:   RET_ReallyLR
   %val = load i1, ptr %addr
   store i1 %val, ptr %addr
   ret void
@@ -628,6 +1546,27 @@ define void @test_i1_memops(ptr %addr) {
 ; CHECK: [[TST:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[LHS]](s32), [[RHS]]
 ; CHECK: G_STORE [[TST]](s1), [[ADDR]](p0)
 define void @int_comparison(i32 %a, i32 %b, ptr %addr) {
+  ; CHECK-LABEL: name: int_comparison
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $w0, $w1, $x2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(p0) = COPY $x2
+  ; CHECK-NEXT:   [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]]
+  ; CHECK-NEXT:   G_STORE [[ICMP]](s1), [[COPY2]](p0) :: (store (s1) into %ir.addr)
+  ; CHECK-NEXT:   RET_ReallyLR
+  ;
+  ; O3-LABEL: name: int_comparison
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $w0, $w1, $x2
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+  ; O3-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+  ; O3-NEXT:   [[COPY2:%[0-9]+]]:_(p0) = COPY $x2
+  ; O3-NEXT:   [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]]
+  ; O3-NEXT:   G_STORE [[ICMP]](s1), [[COPY2]](p0) :: (store (s1) into %ir.addr)
+  ; O3-NEXT:   RET_ReallyLR
   %res = icmp ne i32 %a, %b
   store i1 %res, ptr %addr
   ret void
@@ -640,6 +1579,27 @@ define void @int_comparison(i32 %a, i32 %b, ptr %addr) {
 ; CHECK: [[TST:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[LHS]](p0), [[RHS]]
 ; CHECK: G_STORE [[TST]](s1), [[ADDR]](p0)
 define void @ptr_comparison(ptr %a, ptr %b, ptr %addr) {
+  ; CHECK-LABEL: name: ptr_comparison
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $x0, $x1, $x2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(p0) = COPY $x2
+  ; CHECK-NEXT:   [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](p0), [[COPY1]]
+  ; CHECK-NEXT:   G_STORE [[ICMP]](s1), [[COPY2]](p0) :: (store (s1) into %ir.addr)
+  ; CHECK-NEXT:   RET_ReallyLR
+  ;
+  ; O3-LABEL: name: ptr_comparison
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $x0, $x1, $x2
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; O3-NEXT:   [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
+  ; O3-NEXT:   [[COPY2:%[0-9]+]]:_(p0) = COPY $x2
+  ; O3-NEXT:   [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](p0), [[COPY1]]
+  ; O3-NEXT:   G_STORE [[ICMP]](s1), [[COPY2]](p0) :: (store (s1) into %ir.addr)
+  ; O3-NEXT:   RET_ReallyLR
   %res = icmp eq ptr %a, %b
   store i1 %res, ptr %addr
   ret void
@@ -652,6 +1612,25 @@ define void @ptr_comparison(ptr %a, ptr %b, ptr %addr) {
 ; CHECK-NEXT: $s0 = COPY [[RES]]
 ; CHECK-NEXT: RET_ReallyLR implicit $s0
 define float @test_fadd(float %arg1, float %arg2) {
+  ; CHECK-LABEL: name: test_fadd
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $s0, $s1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $s0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $s1
+  ; CHECK-NEXT:   [[FADD:%[0-9]+]]:_(s32) = G_FADD [[COPY]], [[COPY1]]
+  ; CHECK-NEXT:   $s0 = COPY [[FADD]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $s0
+  ;
+  ; O3-LABEL: name: test_fadd
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $s0, $s1
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $s0
+  ; O3-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $s1
+  ; O3-NEXT:   [[FADD:%[0-9]+]]:_(s32) = G_FADD [[COPY]], [[COPY1]]
+  ; O3-NEXT:   $s0 = COPY [[FADD]](s32)
+  ; O3-NEXT:   RET_ReallyLR implicit $s0
   %res = fadd float %arg1, %arg2
   ret float %res
 }
@@ -663,6 +1642,25 @@ define float @test_fadd(float %arg1, float %arg2) {
 ; CHECK-NEXT: $s0 = COPY [[RES]]
 ; CHECK-NEXT: RET_ReallyLR implicit $s0
 define float @test_fsub(float %arg1, float %arg2) {
+  ; CHECK-LABEL: name: test_fsub
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $s0, $s1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $s0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $s1
+  ; CHECK-NEXT:   [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[COPY]], [[COPY1]]
+  ; CHECK-NEXT:   $s0 = COPY [[FSUB]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $s0
+  ;
+  ; O3-LABEL: name: test_fsub
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $s0, $s1
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $s0
+  ; O3-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $s1
+  ; O3-NEXT:   [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[COPY]], [[COPY1]]
+  ; O3-NEXT:   $s0 = COPY [[FSUB]](s32)
+  ; O3-NEXT:   RET_ReallyLR implicit $s0
   %res = fsub float %arg1, %arg2
   ret float %res
 }
@@ -674,6 +1672,25 @@ define float @test_fsub(float %arg1, float %arg2) {
 ; CHECK-NEXT: $s0 = COPY [[RES]]
 ; CHECK-NEXT: RET_ReallyLR implicit $s0
 define float @test_fmul(float %arg1, float %arg2) {
+  ; CHECK-LABEL: name: test_fmul
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $s0, $s1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $s0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $s1
+  ; CHECK-NEXT:   [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY1]]
+  ; CHECK-NEXT:   $s0 = COPY [[FMUL]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $s0
+  ;
+  ; O3-LABEL: name: test_fmul
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $s0, $s1
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $s0
+  ; O3-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $s1
+  ; O3-NEXT:   [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY1]]
+  ; O3-NEXT:   $s0 = COPY [[FMUL]](s32)
+  ; O3-NEXT:   RET_ReallyLR implicit $s0
   %res = fmul float %arg1, %arg2
   ret float %res
 }
@@ -685,6 +1702,25 @@ define float @test_fmul(float %arg1, float %arg2) {
 ; CHECK-NEXT: $s0 = COPY [[RES]]
 ; CHECK-NEXT: RET_ReallyLR implicit $s0
 define float @test_fdiv(float %arg1, float %arg2) {
+  ; CHECK-LABEL: name: test_fdiv
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $s0, $s1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $s0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $s1
+  ; CHECK-NEXT:   [[FDIV:%[0-9]+]]:_(s32) = G_FDIV [[COPY]], [[COPY1]]
+  ; CHECK-NEXT:   $s0 = COPY [[FDIV]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $s0
+  ;
+  ; O3-LABEL: name: test_fdiv
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $s0, $s1
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $s0
+  ; O3-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $s1
+  ; O3-NEXT:   [[FDIV:%[0-9]+]]:_(s32) = G_FDIV [[COPY]], [[COPY1]]
+  ; O3-NEXT:   $s0 = COPY [[FDIV]](s32)
+  ; O3-NEXT:   RET_ReallyLR implicit $s0
   %res = fdiv float %arg1, %arg2
   ret float %res
 }
@@ -696,6 +1732,25 @@ define float @test_fdiv(float %arg1, float %arg2) {
 ; CHECK-NEXT: $s0 = COPY [[RES]]
 ; CHECK-NEXT: RET_ReallyLR implicit $s0
 define float @test_frem(float %arg1, float %arg2) {
+  ; CHECK-LABEL: name: test_frem
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $s0, $s1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $s0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $s1
+  ; CHECK-NEXT:   [[FREM:%[0-9]+]]:_(s32) = G_FREM [[COPY]], [[COPY1]]
+  ; CHECK-NEXT:   $s0 = COPY [[FREM]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $s0
+  ;
+  ; O3-LABEL: name: test_frem
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $s0, $s1
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $s0
+  ; O3-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $s1
+  ; O3-NEXT:   [[FREM:%[0-9]+]]:_(s32) = G_FREM [[COPY]], [[COPY1]]
+  ; O3-NEXT:   $s0 = COPY [[FREM]](s32)
+  ; O3-NEXT:   RET_ReallyLR implicit $s0
   %res = frem float %arg1, %arg2
   ret float %res
 }
@@ -711,6 +1766,33 @@ define float @test_frem(float %arg1, float %arg2) {
 ; CHECK: G_STORE [[OVERFLOW]](s1), [[GEP]](p0) :: (store (s1) into %ir.addr + 4, align 4)
 declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32)
 define void @test_sadd_overflow(i32 %lhs, i32 %rhs, ptr %addr) {
+  ; CHECK-LABEL: name: test_sadd_overflow
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $w0, $w1, $x2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(p0) = COPY $x2
+  ; CHECK-NEXT:   [[SADDO:%[0-9]+]]:_(s32), [[SADDO1:%[0-9]+]]:_(s1) = G_SADDO [[COPY]], [[COPY1]]
+  ; CHECK-NEXT:   G_STORE [[SADDO]](s32), [[COPY2]](p0) :: (store (s32) into %ir.addr)
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY2]], [[C]](s64)
+  ; CHECK-NEXT:   G_STORE [[SADDO1]](s1), [[PTR_ADD]](p0) :: (store (s1) into %ir.addr + 4, align 4)
+  ; CHECK-NEXT:   RET_ReallyLR
+  ;
+  ; O3-LABEL: name: test_sadd_overflow
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $w0, $w1, $x2
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+  ; O3-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+  ; O3-NEXT:   [[COPY2:%[0-9]+]]:_(p0) = COPY $x2
+  ; O3-NEXT:   [[SADDO:%[0-9]+]]:_(s32), [[SADDO1:%[0-9]+]]:_(s1) = G_SADDO [[COPY]], [[COPY1]]
+  ; O3-NEXT:   G_STORE [[SADDO]](s32), [[COPY2]](p0) :: (store (s32) into %ir.addr)
+  ; O3-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+  ; O3-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY2]], [[C]](s64)
+  ; O3-NEXT:   G_STORE [[SADDO1]](s1), [[PTR_ADD]](p0) :: (store (s1) into %ir.addr + 4, align 4)
+  ; O3-NEXT:   RET_ReallyLR
   %res = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %lhs, i32 %rhs)
   store { i32, i1 } %res, ptr %addr
   ret void
@@ -727,6 +1809,33 @@ define void @test_sadd_overflow(i32 %lhs, i32 %rhs, ptr %addr) {
 ; CHECK: G_STORE [[OVERFLOW]](s1), [[GEP]](p0) :: (store (s1) into %ir.addr + 4, align 4)
 declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32)
 define void @test_uadd_overflow(i32 %lhs, i32 %rhs, ptr %addr) {
+  ; CHECK-LABEL: name: test_uadd_overflow
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $w0, $w1, $x2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(p0) = COPY $x2
+  ; CHECK-NEXT:   [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[COPY]], [[COPY1]]
+  ; CHECK-NEXT:   G_STORE [[UADDO]](s32), [[COPY2]](p0) :: (store (s32) into %ir.addr)
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY2]], [[C]](s64)
+  ; CHECK-NEXT:   G_STORE [[UADDO1]](s1), [[PTR_ADD]](p0) :: (store (s1) into %ir.addr + 4, align 4)
+  ; CHECK-NEXT:   RET_ReallyLR
+  ;
+  ; O3-LABEL: name: test_uadd_overflow
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $w0, $w1, $x2
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+  ; O3-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+  ; O3-NEXT:   [[COPY2:%[0-9]+]]:_(p0) = COPY $x2
+  ; O3-NEXT:   [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[COPY]], [[COPY1]]
+  ; O3-NEXT:   G_STORE [[UADDO]](s32), [[COPY2]](p0) :: (store (s32) into %ir.addr)
+  ; O3-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+  ; O3-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY2]], [[C]](s64)
+  ; O3-NEXT:   G_STORE [[UADDO1]](s1), [[PTR_ADD]](p0) :: (store (s1) into %ir.addr + 4, align 4)
+  ; O3-NEXT:   RET_ReallyLR
   %res = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %lhs, i32 %rhs)
   store { i32, i1 } %res, ptr %addr
   ret void
@@ -743,6 +1852,33 @@ define void @test_uadd_overflow(i32 %lhs, i32 %rhs, ptr %addr) {
 ; CHECK: G_STORE [[OVERFLOW]](s1), [[GEP]](p0) :: (store (s1) into %ir.subr + 4, align 4)
 declare { i32, i1 } @llvm.ssub.with.overflow.i32(i32, i32)
 define void @test_ssub_overflow(i32 %lhs, i32 %rhs, ptr %subr) {
+  ; CHECK-LABEL: name: test_ssub_overflow
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $w0, $w1, $x2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(p0) = COPY $x2
+  ; CHECK-NEXT:   [[SSUBO:%[0-9]+]]:_(s32), [[SSUBO1:%[0-9]+]]:_(s1) = G_SSUBO [[COPY]], [[COPY1]]
+  ; CHECK-NEXT:   G_STORE [[SSUBO]](s32), [[COPY2]](p0) :: (store (s32) into %ir.subr)
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY2]], [[C]](s64)
+  ; CHECK-NEXT:   G_STORE [[SSUBO1]](s1), [[PTR_ADD]](p0) :: (store (s1) into %ir.subr + 4, align 4)
+  ; CHECK-NEXT:   RET_ReallyLR
+  ;
+  ; O3-LABEL: name: test_ssub_overflow
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $w0, $w1, $x2
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+  ; O3-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+  ; O3-NEXT:   [[COPY2:%[0-9]+]]:_(p0) = COPY $x2
+  ; O3-NEXT:   [[SSUBO:%[0-9]+]]:_(s32), [[SSUBO1:%[0-9]+]]:_(s1) = G_SSUBO [[COPY]], [[COPY1]]
+  ; O3-NEXT:   G_STORE [[SSUBO]](s32), [[COPY2]](p0) :: (store (s32) into %ir.subr)
+  ; O3-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+  ; O3-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY2]], [[C]](s64)
+  ; O3-NEXT:   G_STORE [[SSUBO1]](s1), [[PTR_ADD]](p0) :: (store (s1) into %ir.subr + 4, align 4)
+  ; O3-NEXT:   RET_ReallyLR
   %res = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %lhs, i32 %rhs)
   store { i32, i1 } %res, ptr %subr
   ret void
@@ -759,6 +1895,33 @@ define void @test_ssub_overflow(i32 %lhs, i32 %rhs, ptr %subr) {
 ; CHECK: G_STORE [[OVERFLOW]](s1), [[GEP]](p0) :: (store (s1) into %ir.subr + 4, align 4)
 declare { i32, i1 } @llvm.usub.with.overflow.i32(i32, i32)
 define void @test_usub_overflow(i32 %lhs, i32 %rhs, ptr %subr) {
+  ; CHECK-LABEL: name: test_usub_overflow
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $w0, $w1, $x2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(p0) = COPY $x2
+  ; CHECK-NEXT:   [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[COPY]], [[COPY1]]
+  ; CHECK-NEXT:   G_STORE [[USUBO]](s32), [[COPY2]](p0) :: (store (s32) into %ir.subr)
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY2]], [[C]](s64)
+  ; CHECK-NEXT:   G_STORE [[USUBO1]](s1), [[PTR_ADD]](p0) :: (store (s1) into %ir.subr + 4, align 4)
+  ; CHECK-NEXT:   RET_ReallyLR
+  ;
+  ; O3-LABEL: name: test_usub_overflow
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $w0, $w1, $x2
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+  ; O3-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+  ; O3-NEXT:   [[COPY2:%[0-9]+]]:_(p0) = COPY $x2
+  ; O3-NEXT:   [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[COPY]], [[COPY1]]
+  ; O3-NEXT:   G_STORE [[USUBO]](s32), [[COPY2]](p0) :: (store (s32) into %ir.subr)
+  ; O3-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+  ; O3-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY2]], [[C]](s64)
+  ; O3-NEXT:   G_STORE [[USUBO1]](s1), [[PTR_ADD]](p0) :: (store (s1) into %ir.subr + 4, align 4)
+  ; O3-NEXT:   RET_ReallyLR
   %res = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %lhs, i32 %rhs)
   store { i32, i1 } %res, ptr %subr
   ret void
@@ -775,6 +1938,33 @@ define void @test_usub_overflow(i32 %lhs, i32 %rhs, ptr %subr) {
 ; CHECK: G_STORE [[OVERFLOW]](s1), [[GEP]](p0) :: (store (s1) into %ir.addr + 4, align 4)
 declare { i32, i1 } @llvm.smul.with.overflow.i32(i32, i32)
 define void @test_smul_overflow(i32 %lhs, i32 %rhs, ptr %addr) {
+  ; CHECK-LABEL: name: test_smul_overflow
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $w0, $w1, $x2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(p0) = COPY $x2
+  ; CHECK-NEXT:   [[SMULO:%[0-9]+]]:_(s32), [[SMULO1:%[0-9]+]]:_(s1) = G_SMULO [[COPY]], [[COPY1]]
+  ; CHECK-NEXT:   G_STORE [[SMULO]](s32), [[COPY2]](p0) :: (store (s32) into %ir.addr)
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY2]], [[C]](s64)
+  ; CHECK-NEXT:   G_STORE [[SMULO1]](s1), [[PTR_ADD]](p0) :: (store (s1) into %ir.addr + 4, align 4)
+  ; CHECK-NEXT:   RET_ReallyLR
+  ;
+  ; O3-LABEL: name: test_smul_overflow
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $w0, $w1, $x2
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+  ; O3-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+  ; O3-NEXT:   [[COPY2:%[0-9]+]]:_(p0) = COPY $x2
+  ; O3-NEXT:   [[SMULO:%[0-9]+]]:_(s32), [[SMULO1:%[0-9]+]]:_(s1) = G_SMULO [[COPY]], [[COPY1]]
+  ; O3-NEXT:   G_STORE [[SMULO]](s32), [[COPY2]](p0) :: (store (s32) into %ir.addr)
+  ; O3-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+  ; O3-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY2]], [[C]](s64)
+  ; O3-NEXT:   G_STORE [[SMULO1]](s1), [[PTR_ADD]](p0) :: (store (s1) into %ir.addr + 4, align 4)
+  ; O3-NEXT:   RET_ReallyLR
   %res = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 %lhs, i32 %rhs)
   store { i32, i1 } %res, ptr %addr
   ret void
@@ -791,6 +1981,33 @@ define void @test_smul_overflow(i32 %lhs, i32 %rhs, ptr %addr) {
 ; CHECK: G_STORE [[OVERFLOW]](s1), [[GEP]](p0) :: (store (s1) into %ir.addr + 4, align 4)
 declare { i32, i1 } @llvm.umul.with.overflow.i32(i32, i32)
 define void @test_umul_overflow(i32 %lhs, i32 %rhs, ptr %addr) {
+  ; CHECK-LABEL: name: test_umul_overflow
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $w0, $w1, $x2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(p0) = COPY $x2
+  ; CHECK-NEXT:   [[UMULO:%[0-9]+]]:_(s32), [[UMULO1:%[0-9]+]]:_(s1) = G_UMULO [[COPY]], [[COPY1]]
+  ; CHECK-NEXT:   G_STORE [[UMULO]](s32), [[COPY2]](p0) :: (store (s32) into %ir.addr)
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY2]], [[C]](s64)
+  ; CHECK-NEXT:   G_STORE [[UMULO1]](s1), [[PTR_ADD]](p0) :: (store (s1) into %ir.addr + 4, align 4)
+  ; CHECK-NEXT:   RET_ReallyLR
+  ;
+  ; O3-LABEL: name: test_umul_overflow
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $w0, $w1, $x2
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+  ; O3-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+  ; O3-NEXT:   [[COPY2:%[0-9]+]]:_(p0) = COPY $x2
+  ; O3-NEXT:   [[UMULO:%[0-9]+]]:_(s32), [[UMULO1:%[0-9]+]]:_(s1) = G_UMULO [[COPY]], [[COPY1]]
+  ; O3-NEXT:   G_STORE [[UMULO]](s32), [[COPY2]](p0) :: (store (s32) into %ir.addr)
+  ; O3-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+  ; O3-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY2]], [[C]](s64)
+  ; O3-NEXT:   G_STORE [[UMULO1]](s1), [[PTR_ADD]](p0) :: (store (s1) into %ir.addr + 4, align 4)
+  ; O3-NEXT:   RET_ReallyLR
   %res = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %lhs, i32 %rhs)
   store { i32, i1 } %res, ptr %addr
   ret void
@@ -811,6 +2028,41 @@ define void @test_umul_overflow(i32 %lhs, i32 %rhs, ptr %addr) {
 ; CHECK: $w0 = COPY [[LD3]](s32)
 %struct.nested = type {i8, { i8, i32 }, i32}
 define i32 @test_extractvalue(ptr %addr) {
+  ; CHECK-LABEL: name: test_extractvalue
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $x0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p0) :: (load (s8) from %ir.addr, align 4)
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from %ir.addr + 4, align 4)
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+  ; CHECK-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; CHECK-NEXT:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32) from %ir.addr + 8)
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+  ; CHECK-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+  ; CHECK-NEXT:   [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from %ir.addr + 12)
+  ; CHECK-NEXT:   $w0 = COPY [[LOAD2]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $w0
+  ;
+  ; O3-LABEL: name: test_extractvalue
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $x0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; O3-NEXT:   [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p0) :: (load (s8) from %ir.addr, align 4)
+  ; O3-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+  ; O3-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; O3-NEXT:   [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from %ir.addr + 4, align 4)
+  ; O3-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+  ; O3-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; O3-NEXT:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32) from %ir.addr + 8)
+  ; O3-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+  ; O3-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+  ; O3-NEXT:   [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from %ir.addr + 12)
+  ; O3-NEXT:   $w0 = COPY [[LOAD2]](s32)
+  ; O3-NEXT:   RET_ReallyLR implicit $w0
   %struct = load %struct.nested, ptr %addr
   %res = extractvalue %struct.nested %struct, 1, 1
   ret i32 %res
@@ -833,6 +2085,47 @@ define i32 @test_extractvalue(ptr %addr) {
 ; CHECK: [[GEP4:%[0-9]+]]:_(p0) = G_PTR_ADD %1, [[CST1]](s64)
 ; CHECK: G_STORE [[LD3]](s32), [[GEP4]](p0) :: (store (s32) into %ir.addr2 + 4)
 define void @test_extractvalue_agg(ptr %addr, ptr %addr2) {
+  ; CHECK-LABEL: name: test_extractvalue_agg
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $x0, $x1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p0) :: (load (s8) from %ir.addr, align 4)
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from %ir.addr + 4, align 4)
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+  ; CHECK-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; CHECK-NEXT:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32) from %ir.addr + 8)
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+  ; CHECK-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+  ; CHECK-NEXT:   [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from %ir.addr + 12)
+  ; CHECK-NEXT:   G_STORE [[LOAD1]](s8), [[COPY1]](p0) :: (store (s8) into %ir.addr2, align 4)
+  ; CHECK-NEXT:   [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s64)
+  ; CHECK-NEXT:   G_STORE [[LOAD2]](s32), [[PTR_ADD3]](p0) :: (store (s32) into %ir.addr2 + 4)
+  ; CHECK-NEXT:   RET_ReallyLR
+  ;
+  ; O3-LABEL: name: test_extractvalue_agg
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $x0, $x1
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; O3-NEXT:   [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
+  ; O3-NEXT:   [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p0) :: (load (s8) from %ir.addr, align 4)
+  ; O3-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+  ; O3-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; O3-NEXT:   [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from %ir.addr + 4, align 4)
+  ; O3-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+  ; O3-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; O3-NEXT:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32) from %ir.addr + 8)
+  ; O3-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+  ; O3-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+  ; O3-NEXT:   [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from %ir.addr + 12)
+  ; O3-NEXT:   G_STORE [[LOAD1]](s8), [[COPY1]](p0) :: (store (s8) into %ir.addr2, align 4)
+  ; O3-NEXT:   [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s64)
+  ; O3-NEXT:   G_STORE [[LOAD2]](s32), [[PTR_ADD3]](p0) :: (store (s32) into %ir.addr2 + 4)
+  ; O3-NEXT:   RET_ReallyLR
   %struct = load %struct.nested, ptr %addr
   %res = extractvalue %struct.nested %struct, 1
   store {i8, i32} %res, ptr %addr2
@@ -845,6 +2138,25 @@ define void @test_extractvalue_agg(ptr %addr, ptr %addr2) {
 ; CHECK: [[VAL:%[0-9]+]]:_(s8) = G_TRUNC [[VAL32]]
 ; CHECK: G_STORE [[VAL]](s8), [[STRUCT]](p0)
 define void @test_trivial_extract_ptr([1 x ptr] %s, i8 %val) {
+  ; CHECK-LABEL: name: test_trivial_extract_ptr
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $w1, $x0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+  ; CHECK-NEXT:   [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32)
+  ; CHECK-NEXT:   G_STORE [[TRUNC]](s8), [[COPY]](p0) :: (store (s8) into %ir.addr)
+  ; CHECK-NEXT:   RET_ReallyLR
+  ;
+  ; O3-LABEL: name: test_trivial_extract_ptr
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $w1, $x0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; O3-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+  ; O3-NEXT:   [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32)
+  ; O3-NEXT:   G_STORE [[TRUNC]](s8), [[COPY]](p0) :: (store (s8) into %ir.addr)
+  ; O3-NEXT:   RET_ReallyLR
   %addr = extractvalue [1 x ptr] %s, 0
   store i8 %val, ptr %addr
   ret void
@@ -871,6 +2183,55 @@ define void @test_trivial_extract_ptr([1 x ptr] %s, i8 %val) {
 ; CHECK: [[GEP6:%[0-9]+]]:_(p0) = G_PTR_ADD %0, [[CST3]](s64)
 ; CHECK: G_STORE [[LD4]](s32), [[GEP6]](p0) :: (store (s32) into %ir.addr + 12)
 define void @test_insertvalue(ptr %addr, i32 %val) {
+  ; CHECK-LABEL: name: test_insertvalue
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $w1, $x0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p0) :: (load (s8) from %ir.addr, align 4)
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from %ir.addr + 4, align 4)
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+  ; CHECK-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; CHECK-NEXT:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32) from %ir.addr + 8)
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+  ; CHECK-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+  ; CHECK-NEXT:   [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from %ir.addr + 12)
+  ; CHECK-NEXT:   G_STORE [[LOAD]](s8), [[COPY]](p0) :: (store (s8) into %ir.addr, align 4)
+  ; CHECK-NEXT:   [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; CHECK-NEXT:   G_STORE [[LOAD1]](s8), [[PTR_ADD3]](p0) :: (store (s8) into %ir.addr + 4, align 4)
+  ; CHECK-NEXT:   [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; CHECK-NEXT:   G_STORE [[COPY1]](s32), [[PTR_ADD4]](p0) :: (store (s32) into %ir.addr + 8)
+  ; CHECK-NEXT:   [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+  ; CHECK-NEXT:   G_STORE [[LOAD3]](s32), [[PTR_ADD5]](p0) :: (store (s32) into %ir.addr + 12)
+  ; CHECK-NEXT:   RET_ReallyLR
+  ;
+  ; O3-LABEL: name: test_insertvalue
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $w1, $x0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; O3-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+  ; O3-NEXT:   [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p0) :: (load (s8) from %ir.addr, align 4)
+  ; O3-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+  ; O3-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; O3-NEXT:   [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from %ir.addr + 4, align 4)
+  ; O3-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+  ; O3-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; O3-NEXT:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32) from %ir.addr + 8)
+  ; O3-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+  ; O3-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+  ; O3-NEXT:   [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from %ir.addr + 12)
+  ; O3-NEXT:   G_STORE [[LOAD]](s8), [[COPY]](p0) :: (store (s8) into %ir.addr, align 4)
+  ; O3-NEXT:   [[COPY2:%[0-9]+]]:_(p0) = COPY [[PTR_ADD]](p0)
+  ; O3-NEXT:   G_STORE [[LOAD1]](s8), [[COPY2]](p0) :: (store (s8) into %ir.addr + 4, align 4)
+  ; O3-NEXT:   [[COPY3:%[0-9]+]]:_(p0) = COPY [[PTR_ADD1]](p0)
+  ; O3-NEXT:   G_STORE [[COPY1]](s32), [[COPY3]](p0) :: (store (s32) into %ir.addr + 8)
+  ; O3-NEXT:   [[COPY4:%[0-9]+]]:_(p0) = COPY [[PTR_ADD2]](p0)
+  ; O3-NEXT:   G_STORE [[LOAD3]](s32), [[COPY4]](p0) :: (store (s32) into %ir.addr + 12)
+  ; O3-NEXT:   RET_ReallyLR
   %struct = load %struct.nested, ptr %addr
   %newstruct = insertvalue %struct.nested %struct, i32 %val, 1, 1
   store %struct.nested %newstruct, ptr %addr
@@ -878,19 +2239,45 @@ define void @test_insertvalue(ptr %addr, i32 %val) {
 }
 
 define [1 x i64] @test_trivial_insert([1 x i64] %s, i64 %val) {
-; CHECK-LABEL: name: test_trivial_insert
-; CHECK: [[STRUCT:%[0-9]+]]:_(s64) = COPY $x0
-; CHECK: [[VAL:%[0-9]+]]:_(s64) = COPY $x1
-; CHECK: $x0 = COPY [[VAL]]
+  ; CHECK-LABEL: name: test_trivial_insert
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $x0, $x1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
+  ; CHECK-NEXT:   $x0 = COPY [[COPY1]](s64)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $x0
+  ;
+  ; O3-LABEL: name: test_trivial_insert
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $x0, $x1
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+  ; O3-NEXT:   [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
+  ; O3-NEXT:   $x0 = COPY [[COPY1]](s64)
+  ; O3-NEXT:   RET_ReallyLR implicit $x0
   %res = insertvalue [1 x i64] %s, i64 %val, 0
   ret [1 x i64] %res
 }
 
 define [1 x ptr] @test_trivial_insert_ptr([1 x ptr] %s, ptr %val) {
-; CHECK-LABEL: name: test_trivial_insert_ptr
-; CHECK: [[STRUCT:%[0-9]+]]:_(p0) = COPY $x0
-; CHECK: [[VAL:%[0-9]+]]:_(p0) = COPY $x1
-; CHECK: $x0 = COPY [[VAL]]
+  ; CHECK-LABEL: name: test_trivial_insert_ptr
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $x0, $x1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
+  ; CHECK-NEXT:   $x0 = COPY [[COPY1]](p0)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $x0
+  ;
+  ; O3-LABEL: name: test_trivial_insert_ptr
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $x0, $x1
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; O3-NEXT:   [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
+  ; O3-NEXT:   $x0 = COPY [[COPY1]](p0)
+  ; O3-NEXT:   RET_ReallyLR implicit $x0
   %res = insertvalue [1 x ptr] %s, ptr %val, 0
   ret [1 x ptr] %res
 }
@@ -919,6 +2306,61 @@ define [1 x ptr] @test_trivial_insert_ptr([1 x ptr] %s, ptr %val) {
 ; CHECK: [[GEP7:%[0-9]+]]:_(p0) = G_PTR_ADD %0, [[CST4]](s64)
 ; CHECK: G_STORE [[LD6]](s32), [[GEP7]](p0) :: (store (s32) into %ir.addr + 12)
 define void @test_insertvalue_agg(ptr %addr, ptr %addr2) {
+  ; CHECK-LABEL: name: test_insertvalue_agg
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $x0, $x1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY1]](p0) :: (load (s8) from %ir.addr2, align 4)
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s64)
+  ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from %ir.addr2 + 4)
+  ; CHECK-NEXT:   [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p0) :: (load (s8) from %ir.addr, align 4)
+  ; CHECK-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; CHECK-NEXT:   [[LOAD3:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from %ir.addr + 4, align 4)
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+  ; CHECK-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; CHECK-NEXT:   [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from %ir.addr + 8)
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+  ; CHECK-NEXT:   [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+  ; CHECK-NEXT:   [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s32) from %ir.addr + 12)
+  ; CHECK-NEXT:   G_STORE [[LOAD2]](s8), [[COPY]](p0) :: (store (s8) into %ir.addr, align 4)
+  ; CHECK-NEXT:   [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; CHECK-NEXT:   G_STORE [[LOAD]](s8), [[PTR_ADD4]](p0) :: (store (s8) into %ir.addr + 4, align 4)
+  ; CHECK-NEXT:   [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; CHECK-NEXT:   G_STORE [[LOAD1]](s32), [[PTR_ADD5]](p0) :: (store (s32) into %ir.addr + 8)
+  ; CHECK-NEXT:   [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+  ; CHECK-NEXT:   G_STORE [[LOAD5]](s32), [[PTR_ADD6]](p0) :: (store (s32) into %ir.addr + 12)
+  ; CHECK-NEXT:   RET_ReallyLR
+  ;
+  ; O3-LABEL: name: test_insertvalue_agg
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $x0, $x1
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; O3-NEXT:   [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
+  ; O3-NEXT:   [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY1]](p0) :: (load (s8) from %ir.addr2, align 4)
+  ; O3-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+  ; O3-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s64)
+  ; O3-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from %ir.addr2 + 4)
+  ; O3-NEXT:   [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p0) :: (load (s8) from %ir.addr, align 4)
+  ; O3-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; O3-NEXT:   [[LOAD3:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from %ir.addr + 4, align 4)
+  ; O3-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+  ; O3-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; O3-NEXT:   [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from %ir.addr + 8)
+  ; O3-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+  ; O3-NEXT:   [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+  ; O3-NEXT:   [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s32) from %ir.addr + 12)
+  ; O3-NEXT:   G_STORE [[LOAD2]](s8), [[COPY]](p0) :: (store (s8) into %ir.addr, align 4)
+  ; O3-NEXT:   [[COPY2:%[0-9]+]]:_(p0) = COPY [[PTR_ADD1]](p0)
+  ; O3-NEXT:   G_STORE [[LOAD]](s8), [[COPY2]](p0) :: (store (s8) into %ir.addr + 4, align 4)
+  ; O3-NEXT:   [[COPY3:%[0-9]+]]:_(p0) = COPY [[PTR_ADD2]](p0)
+  ; O3-NEXT:   G_STORE [[LOAD1]](s32), [[COPY3]](p0) :: (store (s32) into %ir.addr + 8)
+  ; O3-NEXT:   [[COPY4:%[0-9]+]]:_(p0) = COPY [[PTR_ADD3]](p0)
+  ; O3-NEXT:   G_STORE [[LOAD5]](s32), [[COPY4]](p0) :: (store (s32) into %ir.addr + 12)
+  ; O3-NEXT:   RET_ReallyLR
   %smallstruct = load {i8, i32}, ptr %addr2
   %struct = load %struct.nested, ptr %addr
   %res = insertvalue %struct.nested %struct, {i8, i32} %smallstruct, 1
@@ -936,6 +2378,33 @@ define void @test_insertvalue_agg(ptr %addr, ptr %addr2) {
 ; CHECK: [[RES:%[0-9]+]]:_(s32) = G_SELECT [[TST]](s1), [[LHS]], [[RHS]]
 ; CHECK: $w0 = COPY [[RES]]
 define i32 @test_select(i1 %tst, i32 %lhs, i32 %rhs) {
+  ; CHECK-LABEL: name: test_select
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $w0, $w1, $w2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+  ; CHECK-NEXT:   [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32)
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $w2
+  ; CHECK-NEXT:   [[ASSERT_ZEXT:%[0-9]+]]:_(s8) = G_ASSERT_ZEXT [[TRUNC]], 1
+  ; CHECK-NEXT:   [[TRUNC1:%[0-9]+]]:_(s1) = G_TRUNC [[ASSERT_ZEXT]](s8)
+  ; CHECK-NEXT:   [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[TRUNC1]](s1), [[COPY1]], [[COPY2]]
+  ; CHECK-NEXT:   $w0 = COPY [[SELECT]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $w0
+  ;
+  ; O3-LABEL: name: test_select
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $w0, $w1, $w2
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+  ; O3-NEXT:   [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32)
+  ; O3-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+  ; O3-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $w2
+  ; O3-NEXT:   [[ASSERT_ZEXT:%[0-9]+]]:_(s8) = G_ASSERT_ZEXT [[TRUNC]], 1
+  ; O3-NEXT:   [[TRUNC1:%[0-9]+]]:_(s1) = G_TRUNC [[ASSERT_ZEXT]](s8)
+  ; O3-NEXT:   [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[TRUNC1]](s1), [[COPY1]], [[COPY2]]
+  ; O3-NEXT:   $w0 = COPY [[SELECT]](s32)
+  ; O3-NEXT:   RET_ReallyLR implicit $w0
   %res = select i1 %tst, i32 %lhs, i32 %rhs
   ret i32 %res
 }
@@ -949,6 +2418,33 @@ define i32 @test_select(i1 %tst, i32 %lhs, i32 %rhs) {
 ; CHECK:   [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[TRUNCASSERT]]
 ; CHECK:   [[SELECT:%[0-9]+]]:_(s32) = nnan G_SELECT [[TRUNC]](s1), [[COPY1]], [[COPY2]]
 define float @test_select_flags(i1 %tst, float %lhs, float %rhs) {
+  ; CHECK-LABEL: name: test_select_flags
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $s0, $s1, $w0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+  ; CHECK-NEXT:   [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32)
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $s0
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $s1
+  ; CHECK-NEXT:   [[ASSERT_ZEXT:%[0-9]+]]:_(s8) = G_ASSERT_ZEXT [[TRUNC]], 1
+  ; CHECK-NEXT:   [[TRUNC1:%[0-9]+]]:_(s1) = G_TRUNC [[ASSERT_ZEXT]](s8)
+  ; CHECK-NEXT:   [[SELECT:%[0-9]+]]:_(s32) = nnan G_SELECT [[TRUNC1]](s1), [[COPY1]], [[COPY2]]
+  ; CHECK-NEXT:   $s0 = COPY [[SELECT]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $s0
+  ;
+  ; O3-LABEL: name: test_select_flags
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $s0, $s1, $w0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+  ; O3-NEXT:   [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32)
+  ; O3-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $s0
+  ; O3-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $s1
+  ; O3-NEXT:   [[ASSERT_ZEXT:%[0-9]+]]:_(s8) = G_ASSERT_ZEXT [[TRUNC]], 1
+  ; O3-NEXT:   [[TRUNC1:%[0-9]+]]:_(s1) = G_TRUNC [[ASSERT_ZEXT]](s8)
+  ; O3-NEXT:   [[SELECT:%[0-9]+]]:_(s32) = nnan G_SELECT [[TRUNC1]](s1), [[COPY1]], [[COPY2]]
+  ; O3-NEXT:   $s0 = COPY [[SELECT]](s32)
+  ; O3-NEXT:   RET_ReallyLR implicit $s0
   %res = select nnan i1 %tst, float %lhs, float %rhs
   ret float %res
 }
@@ -962,6 +2458,31 @@ define float @test_select_flags(i1 %tst, float %lhs, float %rhs) {
 ; CHECK:   [[CMP:%[0-9]+]]:_(s1) = nsz G_FCMP floatpred(oeq), [[COPY0]](s32), [[COPY1]]
 ; CHECK:   [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[CMP]](s1), [[COPY2]], [[COPY3]]
 define float @test_select_cmp_flags(float %cmp0, float %cmp1, float %lhs, float %rhs) {
+  ; CHECK-LABEL: name: test_select_cmp_flags
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $s0, $s1, $s2, $s3
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $s0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $s1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $s2
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $s3
+  ; CHECK-NEXT:   [[FCMP:%[0-9]+]]:_(s1) = nsz G_FCMP floatpred(oeq), [[COPY]](s32), [[COPY1]]
+  ; CHECK-NEXT:   [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[COPY2]], [[COPY3]]
+  ; CHECK-NEXT:   $s0 = COPY [[SELECT]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $s0
+  ;
+  ; O3-LABEL: name: test_select_cmp_flags
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $s0, $s1, $s2, $s3
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $s0
+  ; O3-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $s1
+  ; O3-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $s2
+  ; O3-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $s3
+  ; O3-NEXT:   [[FCMP:%[0-9]+]]:_(s1) = nsz G_FCMP floatpred(oeq), [[COPY]](s32), [[COPY1]]
+  ; O3-NEXT:   [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[COPY2]], [[COPY3]]
+  ; O3-NEXT:   $s0 = COPY [[SELECT]](s32)
+  ; O3-NEXT:   RET_ReallyLR implicit $s0
   %tst = fcmp nsz oeq float %cmp0, %cmp1
   %res = select i1 %tst, float %lhs, float %rhs
   ret float %res
@@ -977,6 +2498,33 @@ define float @test_select_cmp_flags(float %cmp0, float %cmp1, float %lhs, float
 ; CHECK: [[RES:%[0-9]+]]:_(p0) = G_SELECT [[TST]](s1), [[LHS]], [[RHS]]
 ; CHECK: $x0 = COPY [[RES]]
 define ptr @test_select_ptr(i1 %tst, ptr %lhs, ptr %rhs) {
+  ; CHECK-LABEL: name: test_select_ptr
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $w0, $x1, $x2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+  ; CHECK-NEXT:   [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32)
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(p0) = COPY $x2
+  ; CHECK-NEXT:   [[ASSERT_ZEXT:%[0-9]+]]:_(s8) = G_ASSERT_ZEXT [[TRUNC]], 1
+  ; CHECK-NEXT:   [[TRUNC1:%[0-9]+]]:_(s1) = G_TRUNC [[ASSERT_ZEXT]](s8)
+  ; CHECK-NEXT:   [[SELECT:%[0-9]+]]:_(p0) = G_SELECT [[TRUNC1]](s1), [[COPY1]], [[COPY2]]
+  ; CHECK-NEXT:   $x0 = COPY [[SELECT]](p0)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $x0
+  ;
+  ; O3-LABEL: name: test_select_ptr
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $w0, $x1, $x2
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+  ; O3-NEXT:   [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32)
+  ; O3-NEXT:   [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
+  ; O3-NEXT:   [[COPY2:%[0-9]+]]:_(p0) = COPY $x2
+  ; O3-NEXT:   [[ASSERT_ZEXT:%[0-9]+]]:_(s8) = G_ASSERT_ZEXT [[TRUNC]], 1
+  ; O3-NEXT:   [[TRUNC1:%[0-9]+]]:_(s1) = G_TRUNC [[ASSERT_ZEXT]](s8)
+  ; O3-NEXT:   [[SELECT:%[0-9]+]]:_(p0) = G_SELECT [[TRUNC1]](s1), [[COPY1]], [[COPY2]]
+  ; O3-NEXT:   $x0 = COPY [[SELECT]](p0)
+  ; O3-NEXT:   RET_ReallyLR implicit $x0
   %res = select i1 %tst, ptr %lhs, ptr %rhs
   ret ptr %res
 }
@@ -991,6 +2539,33 @@ define ptr @test_select_ptr(i1 %tst, ptr %lhs, ptr %rhs) {
 ; CHECK: [[RES:%[0-9]+]]:_(<4 x s32>) = G_SELECT [[TST]](s1), [[LHS]], [[RHS]]
 ; CHECK: $q0 = COPY [[RES]]
 define <4 x i32> @test_select_vec(i1 %tst, <4 x i32> %lhs, <4 x i32> %rhs) {
+  ; CHECK-LABEL: name: test_select_vec
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $q0, $q1, $w0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+  ; CHECK-NEXT:   [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32)
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $q1
+  ; CHECK-NEXT:   [[ASSERT_ZEXT:%[0-9]+]]:_(s8) = G_ASSERT_ZEXT [[TRUNC]], 1
+  ; CHECK-NEXT:   [[TRUNC1:%[0-9]+]]:_(s1) = G_TRUNC [[ASSERT_ZEXT]](s8)
+  ; CHECK-NEXT:   [[SELECT:%[0-9]+]]:_(<4 x s32>) = G_SELECT [[TRUNC1]](s1), [[COPY1]], [[COPY2]]
+  ; CHECK-NEXT:   $q0 = COPY [[SELECT]](<4 x s32>)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $q0
+  ;
+  ; O3-LABEL: name: test_select_vec
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $q0, $q1, $w0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+  ; O3-NEXT:   [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32)
+  ; O3-NEXT:   [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+  ; O3-NEXT:   [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $q1
+  ; O3-NEXT:   [[ASSERT_ZEXT:%[0-9]+]]:_(s8) = G_ASSERT_ZEXT [[TRUNC]], 1
+  ; O3-NEXT:   [[TRUNC1:%[0-9]+]]:_(s1) = G_TRUNC [[ASSERT_ZEXT]](s8)
+  ; O3-NEXT:   [[SELECT:%[0-9]+]]:_(<4 x s32>) = G_SELECT [[TRUNC1]](s1), [[COPY1]], [[COPY2]]
+  ; O3-NEXT:   $q0 = COPY [[SELECT]](<4 x s32>)
+  ; O3-NEXT:   RET_ReallyLR implicit $q0
   %res = select i1 %tst, <4 x i32> %lhs, <4 x i32> %rhs
   ret <4 x i32> %res
 }
@@ -1003,6 +2578,29 @@ define <4 x i32> @test_select_vec(i1 %tst, <4 x i32> %lhs, <4 x i32> %rhs) {
 ; CHECK: [[RES:%[0-9]+]]:_(<4 x s32>) = G_SELECT [[TST]](<4 x s1>), [[LHS]], [[RHS]]
 ; CHECK: $q0 = COPY [[RES]]
 define <4 x i32> @test_vselect_vec(<4 x i32> %tst32, <4 x i32> %lhs, <4 x i32> %rhs) {
+  ; CHECK-LABEL: name: test_vselect_vec
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $q0, $q1, $q2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $q2
+  ; CHECK-NEXT:   [[TRUNC:%[0-9]+]]:_(<4 x s1>) = G_TRUNC [[COPY]](<4 x s32>)
+  ; CHECK-NEXT:   [[SELECT:%[0-9]+]]:_(<4 x s32>) = G_SELECT [[TRUNC]](<4 x s1>), [[COPY1]], [[COPY2]]
+  ; CHECK-NEXT:   $q0 = COPY [[SELECT]](<4 x s32>)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $q0
+  ;
+  ; O3-LABEL: name: test_vselect_vec
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $q0, $q1, $q2
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+  ; O3-NEXT:   [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
+  ; O3-NEXT:   [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $q2
+  ; O3-NEXT:   [[TRUNC:%[0-9]+]]:_(<4 x s1>) = G_TRUNC [[COPY]](<4 x s32>)
+  ; O3-NEXT:   [[SELECT:%[0-9]+]]:_(<4 x s32>) = G_SELECT [[TRUNC]](<4 x s1>), [[COPY1]], [[COPY2]]
+  ; O3-NEXT:   $q0 = COPY [[SELECT]](<4 x s32>)
+  ; O3-NEXT:   RET_ReallyLR implicit $q0
   %tst = trunc <4 x i32> %tst32 to <4 x i1>
   %res = select <4 x i1> %tst, <4 x i32> %lhs, <4 x i32> %rhs
   ret <4 x i32> %res
@@ -1014,6 +2612,25 @@ define <4 x i32> @test_vselect_vec(<4 x i32> %tst32, <4 x i32> %lhs, <4 x i32> %
 ; CHECK: [[RES:%[0-9]+]]:_(s64) = G_FPTOSI [[FP]](s32)
 ; CHECK: $x0 = COPY [[RES]]
 define i64 @test_fptosi(ptr %fp.addr) {
+  ; CHECK-LABEL: name: test_fptosi
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $x0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32) from %ir.fp.addr)
+  ; CHECK-NEXT:   [[FPTOSI:%[0-9]+]]:_(s64) = G_FPTOSI [[LOAD]](s32)
+  ; CHECK-NEXT:   $x0 = COPY [[FPTOSI]](s64)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $x0
+  ;
+  ; O3-LABEL: name: test_fptosi
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $x0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; O3-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32) from %ir.fp.addr)
+  ; O3-NEXT:   [[FPTOSI:%[0-9]+]]:_(s64) = G_FPTOSI [[LOAD]](s32)
+  ; O3-NEXT:   $x0 = COPY [[FPTOSI]](s64)
+  ; O3-NEXT:   RET_ReallyLR implicit $x0
   %fp = load float, ptr %fp.addr
   %res = fptosi float %fp to i64
   ret i64 %res
@@ -1025,6 +2642,25 @@ define i64 @test_fptosi(ptr %fp.addr) {
 ; CHECK: [[RES:%[0-9]+]]:_(s64) = G_FPTOUI [[FP]](s32)
 ; CHECK: $x0 = COPY [[RES]]
 define i64 @test_fptoui(ptr %fp.addr) {
+  ; CHECK-LABEL: name: test_fptoui
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $x0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32) from %ir.fp.addr)
+  ; CHECK-NEXT:   [[FPTOUI:%[0-9]+]]:_(s64) = G_FPTOUI [[LOAD]](s32)
+  ; CHECK-NEXT:   $x0 = COPY [[FPTOUI]](s64)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $x0
+  ;
+  ; O3-LABEL: name: test_fptoui
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $x0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; O3-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32) from %ir.fp.addr)
+  ; O3-NEXT:   [[FPTOUI:%[0-9]+]]:_(s64) = G_FPTOUI [[LOAD]](s32)
+  ; O3-NEXT:   $x0 = COPY [[FPTOUI]](s64)
+  ; O3-NEXT:   RET_ReallyLR implicit $x0
   %fp = load float, ptr %fp.addr
   %res = fptoui float %fp to i64
   ret i64 %res
@@ -1036,6 +2672,25 @@ define i64 @test_fptoui(ptr %fp.addr) {
 ; CHECK: [[FP:%[0-9]+]]:_(s64) = G_SITOFP [[IN]](s32)
 ; CHECK: G_STORE [[FP]](s64), [[ADDR]](p0)
 define void @test_sitofp(ptr %addr, i32 %in) {
+  ; CHECK-LABEL: name: test_sitofp
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $w1, $x0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+  ; CHECK-NEXT:   [[SITOFP:%[0-9]+]]:_(s64) = G_SITOFP [[COPY1]](s32)
+  ; CHECK-NEXT:   G_STORE [[SITOFP]](s64), [[COPY]](p0) :: (store (s64) into %ir.addr)
+  ; CHECK-NEXT:   RET_ReallyLR
+  ;
+  ; O3-LABEL: name: test_sitofp
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $w1, $x0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; O3-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+  ; O3-NEXT:   [[SITOFP:%[0-9]+]]:_(s64) = G_SITOFP [[COPY1]](s32)
+  ; O3-NEXT:   G_STORE [[SITOFP]](s64), [[COPY]](p0) :: (store (s64) into %ir.addr)
+  ; O3-NEXT:   RET_ReallyLR
   %fp = sitofp i32 %in to double
   store double %fp, ptr %addr
   ret void
@@ -1047,6 +2702,25 @@ define void @test_sitofp(ptr %addr, i32 %in) {
 ; CHECK: [[FP:%[0-9]+]]:_(s64) = G_UITOFP [[IN]](s32)
 ; CHECK: G_STORE [[FP]](s64), [[ADDR]](p0)
 define void @test_uitofp(ptr %addr, i32 %in) {
+  ; CHECK-LABEL: name: test_uitofp
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $w1, $x0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+  ; CHECK-NEXT:   [[UITOFP:%[0-9]+]]:_(s64) = G_UITOFP [[COPY1]](s32)
+  ; CHECK-NEXT:   G_STORE [[UITOFP]](s64), [[COPY]](p0) :: (store (s64) into %ir.addr)
+  ; CHECK-NEXT:   RET_ReallyLR
+  ;
+  ; O3-LABEL: name: test_uitofp
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $w1, $x0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; O3-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+  ; O3-NEXT:   [[UITOFP:%[0-9]+]]:_(s64) = G_UITOFP [[COPY1]](s32)
+  ; O3-NEXT:   G_STORE [[UITOFP]](s64), [[COPY]](p0) :: (store (s64) into %ir.addr)
+  ; O3-NEXT:   RET_ReallyLR
   %fp = uitofp i32 %in to double
   store double %fp, ptr %addr
   ret void
@@ -1057,6 +2731,23 @@ define void @test_uitofp(ptr %addr, i32 %in) {
 ; CHECK: [[RES:%[0-9]+]]:_(s64) = G_FPEXT [[IN]](s32)
 ; CHECK: $d0 = COPY [[RES]]
 define double @test_fpext(float %in) {
+  ; CHECK-LABEL: name: test_fpext
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $s0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $s0
+  ; CHECK-NEXT:   [[FPEXT:%[0-9]+]]:_(s64) = G_FPEXT [[COPY]](s32)
+  ; CHECK-NEXT:   $d0 = COPY [[FPEXT]](s64)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $d0
+  ;
+  ; O3-LABEL: name: test_fpext
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $s0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $s0
+  ; O3-NEXT:   [[FPEXT:%[0-9]+]]:_(s64) = G_FPEXT [[COPY]](s32)
+  ; O3-NEXT:   $d0 = COPY [[FPEXT]](s64)
+  ; O3-NEXT:   RET_ReallyLR implicit $d0
   %res = fpext float %in to double
   ret double %res
 }
@@ -1066,6 +2757,23 @@ define double @test_fpext(float %in) {
 ; CHECK: [[RES:%[0-9]+]]:_(s32) = G_FPTRUNC [[IN]](s64)
 ; CHECK: $s0 = COPY [[RES]]
 define float @test_fptrunc(double %in) {
+  ; CHECK-LABEL: name: test_fptrunc
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $d0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s64) = COPY $d0
+  ; CHECK-NEXT:   [[FPTRUNC:%[0-9]+]]:_(s32) = G_FPTRUNC [[COPY]](s64)
+  ; CHECK-NEXT:   $s0 = COPY [[FPTRUNC]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $s0
+  ;
+  ; O3-LABEL: name: test_fptrunc
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $d0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(s64) = COPY $d0
+  ; O3-NEXT:   [[FPTRUNC:%[0-9]+]]:_(s32) = G_FPTRUNC [[COPY]](s64)
+  ; O3-NEXT:   $s0 = COPY [[FPTRUNC]](s32)
+  ; O3-NEXT:   RET_ReallyLR implicit $s0
   %res = fptrunc double %in to float
   ret float %res
 }
@@ -1075,6 +2783,23 @@ define float @test_fptrunc(double %in) {
 ; CHECK: [[TMP:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.500000e+00
 ; CHECK: G_STORE [[TMP]](s32), [[ADDR]](p0)
 define void @test_constant_float(ptr %addr) {
+  ; CHECK-LABEL: name: test_constant_float
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $x0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.500000e+00
+  ; CHECK-NEXT:   G_STORE [[C]](s32), [[COPY]](p0) :: (store (s32) into %ir.addr)
+  ; CHECK-NEXT:   RET_ReallyLR
+  ;
+  ; O3-LABEL: name: test_constant_float
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $x0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; O3-NEXT:   [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.500000e+00
+  ; O3-NEXT:   G_STORE [[C]](s32), [[COPY]](p0) :: (store (s32) into %ir.addr)
+  ; O3-NEXT:   RET_ReallyLR
   store float 1.5, ptr %addr
   ret void
 }
@@ -1088,6 +2813,31 @@ define void @test_constant_float(ptr %addr) {
 ; CHECK: [[TST:%[0-9]+]]:_(s1) = nnan ninf nsz arcp contract afn reassoc G_FCMP floatpred(oge), [[LHS]](s32), [[RHS]]
 ; CHECK: G_STORE [[TST]](s1), [[BOOLADDR]](p0)
 define void @float_comparison(ptr %a.addr, ptr %b.addr, ptr %bool.addr) {
+  ; CHECK-LABEL: name: float_comparison
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $x0, $x1, $x2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(p0) = COPY $x2
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32) from %ir.a.addr)
+  ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[COPY1]](p0) :: (load (s32) from %ir.b.addr)
+  ; CHECK-NEXT:   [[FCMP:%[0-9]+]]:_(s1) = nnan ninf nsz arcp contract afn reassoc G_FCMP floatpred(oge), [[LOAD]](s32), [[LOAD1]]
+  ; CHECK-NEXT:   G_STORE [[FCMP]](s1), [[COPY2]](p0) :: (store (s1) into %ir.bool.addr)
+  ; CHECK-NEXT:   RET_ReallyLR
+  ;
+  ; O3-LABEL: name: float_comparison
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $x0, $x1, $x2
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; O3-NEXT:   [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
+  ; O3-NEXT:   [[COPY2:%[0-9]+]]:_(p0) = COPY $x2
+  ; O3-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32) from %ir.a.addr)
+  ; O3-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[COPY1]](p0) :: (load (s32) from %ir.b.addr)
+  ; O3-NEXT:   [[FCMP:%[0-9]+]]:_(s1) = nnan ninf nsz arcp contract afn reassoc G_FCMP floatpred(oge), [[LOAD]](s32), [[LOAD1]]
+  ; O3-NEXT:   G_STORE [[FCMP]](s1), [[COPY2]](p0) :: (store (s1) into %ir.bool.addr)
+  ; O3-NEXT:   RET_ReallyLR
   %a = load float, ptr %a.addr
   %b = load float, ptr %b.addr
   %res = fcmp nnan ninf nsz arcp contract afn reassoc oge float %a, %b
@@ -1102,6 +2852,37 @@ define void @float_comparison(ptr %a.addr, ptr %b.addr, ptr %bool.addr) {
 ; CHECK: [[R2:%[0-9]+]]:_(s1) = COPY [[ENTRY_R2]](s1)
 ; CHECK: G_ADD [[R1]], [[R2]]
 define i1 @trivial_float_comparison(double %a, double %b) {
+  ; CHECK-LABEL: name: trivial_float_comparison
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $d0, $d1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s64) = COPY $d0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s64) = COPY $d1
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 false
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s1) = COPY [[C]](s1)
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(s1) = COPY [[C1]](s1)
+  ; CHECK-NEXT:   [[ADD:%[0-9]+]]:_(s1) = G_ADD [[COPY2]], [[COPY3]]
+  ; CHECK-NEXT:   [[ZEXT:%[0-9]+]]:_(s8) = G_ZEXT [[ADD]](s1)
+  ; CHECK-NEXT:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ZEXT]](s8)
+  ; CHECK-NEXT:   $w0 = COPY [[ANYEXT]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $w0
+  ;
+  ; O3-LABEL: name: trivial_float_comparison
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $d0, $d1
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(s64) = COPY $d0
+  ; O3-NEXT:   [[COPY1:%[0-9]+]]:_(s64) = COPY $d1
+  ; O3-NEXT:   [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 false
+  ; O3-NEXT:   [[C1:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
+  ; O3-NEXT:   [[COPY2:%[0-9]+]]:_(s1) = COPY [[C]](s1)
+  ; O3-NEXT:   [[COPY3:%[0-9]+]]:_(s1) = COPY [[C1]](s1)
+  ; O3-NEXT:   [[ADD:%[0-9]+]]:_(s1) = G_ADD [[COPY2]], [[COPY3]]
+  ; O3-NEXT:   [[ZEXT:%[0-9]+]]:_(s8) = G_ZEXT [[ADD]](s1)
+  ; O3-NEXT:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ZEXT]](s8)
+  ; O3-NEXT:   $w0 = COPY [[ANYEXT]](s32)
+  ; O3-NEXT:   RET_ReallyLR implicit $w0
   %r1 = fcmp false double %a, %b
   %r2 = fcmp true double %a, %b
   %sum = add i1 %r1, %r2
@@ -1111,91 +2892,199 @@ define i1 @trivial_float_comparison(double %a, double %b) {
 @var = global i32 0
 
 define ptr @test_global() {
-; CHECK-LABEL: name: test_global
-; CHECK: [[TMP:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var{{$}}
-; CHECK: $x0 = COPY [[TMP]](p0)
 
+  ; CHECK-LABEL: name: test_global
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var
+  ; CHECK-NEXT:   $x0 = COPY [[GV]](p0)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $x0
+  ;
+  ; O3-LABEL: name: test_global
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var
+  ; O3-NEXT:   $x0 = COPY [[GV]](p0)
+  ; O3-NEXT:   RET_ReallyLR implicit $x0
   ret ptr @var
 }
 
 @var1 = addrspace(42) global i32 0
 define ptr addrspace(42) @test_global_addrspace() {
-; CHECK-LABEL: name: test_global
-; CHECK: [[TMP:%[0-9]+]]:_(p42) = G_GLOBAL_VALUE @var1{{$}}
-; CHECK: $x0 = COPY [[TMP]](p42)
 
+  ; CHECK-LABEL: name: test_global_addrspace
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p42) = G_GLOBAL_VALUE @var1
+  ; CHECK-NEXT:   $x0 = COPY [[GV]](p42)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $x0
+  ;
+  ; O3-LABEL: name: test_global_addrspace
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   [[GV:%[0-9]+]]:_(p42) = G_GLOBAL_VALUE @var1
+  ; O3-NEXT:   $x0 = COPY [[GV]](p42)
+  ; O3-NEXT:   RET_ReallyLR implicit $x0
   ret ptr addrspace(42) @var1
 }
 
 
 define ptr @test_global_func() {
-; CHECK-LABEL: name: test_global_func
-; CHECK: [[TMP:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @allocai64{{$}}
-; CHECK: $x0 = COPY [[TMP]](p0)
 
+  ; CHECK-LABEL: name: test_global_func
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @allocai64
+  ; CHECK-NEXT:   $x0 = COPY [[GV]](p0)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $x0
+  ;
+  ; O3-LABEL: name: test_global_func
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @allocai64
+  ; O3-NEXT:   $x0 = COPY [[GV]](p0)
+  ; O3-NEXT:   RET_ReallyLR implicit $x0
   ret ptr @allocai64
 }
 
 declare void @llvm.memcpy.p0.p0.i64(ptr, ptr, i64, i1)
 define void @test_memcpy(ptr %dst, ptr %src, i64 %size) {
-; CHECK-LABEL: name: test_memcpy
-; CHECK: [[DST:%[0-9]+]]:_(p0) = COPY $x0
-; CHECK: [[SRC:%[0-9]+]]:_(p0) = COPY $x1
-; CHECK: [[SIZE:%[0-9]+]]:_(s64) = COPY $x2
-; CHECK: G_MEMCPY [[DST]](p0), [[SRC]](p0), [[SIZE]](s64), 0 :: (store (s8) into %ir.dst), (load (s8) from %ir.src)
+  ; CHECK-LABEL: name: test_memcpy
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $x0, $x1, $x2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s64) = COPY $x2
+  ; CHECK-NEXT:   G_MEMCPY [[COPY]](p0), [[COPY1]](p0), [[COPY2]](s64), 0 :: (store (s8) into %ir.dst), (load (s8) from %ir.src)
+  ; CHECK-NEXT:   RET_ReallyLR
+  ;
+  ; O3-LABEL: name: test_memcpy
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $x0, $x1, $x2
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; O3-NEXT:   [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
+  ; O3-NEXT:   [[COPY2:%[0-9]+]]:_(s64) = COPY $x2
+  ; O3-NEXT:   G_MEMCPY [[COPY]](p0), [[COPY1]](p0), [[COPY2]](s64), 0 :: (store (s8) into %ir.dst), (load (s8) from %ir.src)
+  ; O3-NEXT:   RET_ReallyLR
   call void @llvm.memcpy.p0.p0.i64(ptr %dst, ptr %src, i64 %size, i1 0)
   ret void
 }
 
 define void @test_memcpy_tail(ptr %dst, ptr %src, i64 %size) {
-; CHECK-LABEL: name: test_memcpy_tail
-; CHECK: [[DST:%[0-9]+]]:_(p0) = COPY $x0
-; CHECK: [[SRC:%[0-9]+]]:_(p0) = COPY $x1
-; CHECK: [[SIZE:%[0-9]+]]:_(s64) = COPY $x2
-; CHECK: G_MEMCPY [[DST]](p0), [[SRC]](p0), [[SIZE]](s64), 1 :: (store (s8) into %ir.dst), (load (s8) from %ir.src)
+  ; CHECK-LABEL: name: test_memcpy_tail
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $x0, $x1, $x2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s64) = COPY $x2
+  ; CHECK-NEXT:   G_MEMCPY [[COPY]](p0), [[COPY1]](p0), [[COPY2]](s64), 1 :: (store (s8) into %ir.dst), (load (s8) from %ir.src)
+  ; CHECK-NEXT:   RET_ReallyLR
+  ;
+  ; O3-LABEL: name: test_memcpy_tail
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $x0, $x1, $x2
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; O3-NEXT:   [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
+  ; O3-NEXT:   [[COPY2:%[0-9]+]]:_(s64) = COPY $x2
+  ; O3-NEXT:   G_MEMCPY [[COPY]](p0), [[COPY1]](p0), [[COPY2]](s64), 1 :: (store (s8) into %ir.dst), (load (s8) from %ir.src)
+  ; O3-NEXT:   RET_ReallyLR
   tail call void @llvm.memcpy.p0.p0.i64(ptr %dst, ptr %src, i64 %size, i1 0)
   ret void
 }
 
 declare void @llvm.memcpy.p1.p1.i64(ptr addrspace(1), ptr addrspace(1), i64, i1)
 define void @test_memcpy_nonzero_as(ptr addrspace(1) %dst, ptr addrspace(1) %src, i64 %size) {
-; CHECK-LABEL: name: test_memcpy_nonzero_as
-; CHECK: [[DST:%[0-9]+]]:_(p1) = COPY $x0
-; CHECK: [[SRC:%[0-9]+]]:_(p1) = COPY $x1
-; CHECK: [[SIZE:%[0-9]+]]:_(s64) = COPY $x2
-; CHECK: G_MEMCPY [[DST]](p1), [[SRC]](p1), [[SIZE]](s64), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 1)
+  ; CHECK-LABEL: name: test_memcpy_nonzero_as
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $x0, $x1, $x2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p1) = COPY $x0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(p1) = COPY $x1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s64) = COPY $x2
+  ; CHECK-NEXT:   G_MEMCPY [[COPY]](p1), [[COPY1]](p1), [[COPY2]](s64), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 1)
+  ; CHECK-NEXT:   RET_ReallyLR
+  ;
+  ; O3-LABEL: name: test_memcpy_nonzero_as
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $x0, $x1, $x2
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(p1) = COPY $x0
+  ; O3-NEXT:   [[COPY1:%[0-9]+]]:_(p1) = COPY $x1
+  ; O3-NEXT:   [[COPY2:%[0-9]+]]:_(s64) = COPY $x2
+  ; O3-NEXT:   G_MEMCPY [[COPY]](p1), [[COPY1]](p1), [[COPY2]](s64), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 1)
+  ; O3-NEXT:   RET_ReallyLR
   call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) %dst, ptr addrspace(1) %src, i64 %size, i1 0)
   ret void
 }
 
 declare void @llvm.memmove.p0.p0.i64(ptr, ptr, i64, i1)
 define void @test_memmove(ptr %dst, ptr %src, i64 %size) {
-; CHECK-LABEL: name: test_memmove
-; CHECK: [[DST:%[0-9]+]]:_(p0) = COPY $x0
-; CHECK: [[SRC:%[0-9]+]]:_(p0) = COPY $x1
-; CHECK: [[SIZE:%[0-9]+]]:_(s64) = COPY $x2
-; CHECK: G_MEMMOVE [[DST]](p0), [[SRC]](p0), [[SIZE]](s64), 0 :: (store (s8) into %ir.dst), (load (s8) from %ir.src)
+  ; CHECK-LABEL: name: test_memmove
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $x0, $x1, $x2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s64) = COPY $x2
+  ; CHECK-NEXT:   G_MEMMOVE [[COPY]](p0), [[COPY1]](p0), [[COPY2]](s64), 0 :: (store (s8) into %ir.dst), (load (s8) from %ir.src)
+  ; CHECK-NEXT:   RET_ReallyLR
+  ;
+  ; O3-LABEL: name: test_memmove
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $x0, $x1, $x2
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; O3-NEXT:   [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
+  ; O3-NEXT:   [[COPY2:%[0-9]+]]:_(s64) = COPY $x2
+  ; O3-NEXT:   G_MEMMOVE [[COPY]](p0), [[COPY1]](p0), [[COPY2]](s64), 0 :: (store (s8) into %ir.dst), (load (s8) from %ir.src)
+  ; O3-NEXT:   RET_ReallyLR
   call void @llvm.memmove.p0.p0.i64(ptr %dst, ptr %src, i64 %size, i1 0)
   ret void
 }
 
 declare void @llvm.memset.p0.i64(ptr, i8, i64, i1)
 define void @test_memset(ptr %dst, i8 %val, i64 %size) {
-; CHECK-LABEL: name: test_memset
-; CHECK: [[DST:%[0-9]+]]:_(p0) = COPY $x0
-; CHECK: [[SRC_C:%[0-9]+]]:_(s32) = COPY $w1
-; CHECK: [[SRC:%[0-9]+]]:_(s8) = G_TRUNC [[SRC_C]]
-; CHECK: [[SIZE:%[0-9]+]]:_(s64) = COPY $x2
-; CHECK: G_MEMSET [[DST]](p0), [[SRC]](s8), [[SIZE]](s64), 0 :: (store (s8) into %ir.dst)
+  ; CHECK-LABEL: name: test_memset
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $w1, $x0, $x2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+  ; CHECK-NEXT:   [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s64) = COPY $x2
+  ; CHECK-NEXT:   G_MEMSET [[COPY]](p0), [[TRUNC]](s8), [[COPY2]](s64), 0 :: (store (s8) into %ir.dst)
+  ; CHECK-NEXT:   RET_ReallyLR
+  ;
+  ; O3-LABEL: name: test_memset
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $w1, $x0, $x2
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; O3-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+  ; O3-NEXT:   [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32)
+  ; O3-NEXT:   [[COPY2:%[0-9]+]]:_(s64) = COPY $x2
+  ; O3-NEXT:   G_MEMSET [[COPY]](p0), [[TRUNC]](s8), [[COPY2]](s64), 0 :: (store (s8) into %ir.dst)
+  ; O3-NEXT:   RET_ReallyLR
   call void @llvm.memset.p0.i64(ptr %dst, i8 %val, i64 %size, i1 0)
   ret void
 }
 
 define void @test_large_const(ptr %addr) {
-; CHECK-LABEL: name: test_large_const
-; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY $x0
-; CHECK: [[VAL:%[0-9]+]]:_(s128) = G_CONSTANT i128 42
-; CHECK: G_STORE [[VAL]](s128), [[ADDR]](p0)
+  ; CHECK-LABEL: name: test_large_const
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $x0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s128) = G_CONSTANT i128 42
+  ; CHECK-NEXT:   G_STORE [[C]](s128), [[COPY]](p0) :: (store (s128) into %ir.addr)
+  ; CHECK-NEXT:   RET_ReallyLR
+  ;
+  ; O3-LABEL: name: test_large_const
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $x0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; O3-NEXT:   [[C:%[0-9]+]]:_(s128) = G_CONSTANT i128 42
+  ; O3-NEXT:   G_STORE [[C]](s128), [[COPY]](p0) :: (store (s128) into %ir.addr)
+  ; O3-NEXT:   RET_ReallyLR
   store i128 42, ptr %addr
   ret void
 }
@@ -1205,11 +3094,28 @@ define void @test_large_const(ptr %addr) {
 ; after the block's terminators had been emitted. Also make sure the order is
 ; correct.
 define ptr @test_const_placement() {
-; CHECK-LABEL: name: test_const_placement
-; CHECK: bb.{{[0-9]+}} (%ir-block.{{[0-9]+}}):
-; CHECK:   [[VAL_INT:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
-; CHECK:   [[VAL:%[0-9]+]]:_(p0) = G_INTTOPTR [[VAL_INT]](s32)
-; CHECK: bb.{{[0-9]+}}.{{[a-zA-Z0-9.]+}}:
+  ; CHECK-LABEL: name: test_const_placement
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
+  ; CHECK-NEXT:   [[INTTOPTR:%[0-9]+]]:_(p0) = G_INTTOPTR [[C]](s32)
+  ; CHECK-NEXT:   G_BR %bb.2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2.next:
+  ; CHECK-NEXT:   $x0 = COPY [[INTTOPTR]](p0)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $x0
+  ;
+  ; O3-LABEL: name: test_const_placement
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   successors: %bb.2(0x80000000)
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
+  ; O3-NEXT:   [[INTTOPTR:%[0-9]+]]:_(p0) = G_INTTOPTR [[C]](s32)
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT: bb.2.next:
+  ; O3-NEXT:   $x0 = COPY [[INTTOPTR]](p0)
+  ; O3-NEXT:   RET_ReallyLR implicit $x0
   br label %next
 
 next:
@@ -1218,21 +3124,44 @@ next:
 
 declare void @llvm.va_end(ptr)
 define void @test_va_end(ptr %list) {
-; CHECK-LABEL: name: test_va_end
-; CHECK-NOT: va_end
-; CHECK-NOT: INTRINSIC
-; CHECK: RET_ReallyLR
+  ; CHECK-LABEL: name: test_va_end
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $x0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; CHECK-NEXT:   RET_ReallyLR
+  ;
+  ; O3-LABEL: name: test_va_end
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $x0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; O3-NEXT:   RET_ReallyLR
   call void @llvm.va_end(ptr %list)
   ret void
 }
 
 define void @test_va_arg(ptr %list) {
-; CHECK-LABEL: test_va_arg
-; CHECK: [[LIST:%[0-9]+]]:_(p0) = COPY $x0
-; CHECK: G_VAARG [[LIST]](p0), 8
-; CHECK: G_VAARG [[LIST]](p0), 1
-; CHECK: G_VAARG [[LIST]](p0), 16
 
+  ; CHECK-LABEL: name: test_va_arg
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $x0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; CHECK-NEXT:   [[VAARG:%[0-9]+]]:_(s64) = G_VAARG [[COPY]](p0), 8
+  ; CHECK-NEXT:   [[VAARG1:%[0-9]+]]:_(s8) = G_VAARG [[COPY]](p0), 1
+  ; CHECK-NEXT:   [[VAARG2:%[0-9]+]]:_(s128) = G_VAARG [[COPY]](p0), 16
+  ; CHECK-NEXT:   RET_ReallyLR
+  ;
+  ; O3-LABEL: name: test_va_arg
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $x0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; O3-NEXT:   [[VAARG:%[0-9]+]]:_(s64) = G_VAARG [[COPY]](p0), 8
+  ; O3-NEXT:   [[VAARG1:%[0-9]+]]:_(s8) = G_VAARG [[COPY]](p0), 1
+  ; O3-NEXT:   [[VAARG2:%[0-9]+]]:_(s128) = G_VAARG [[COPY]](p0), 16
+  ; O3-NEXT:   RET_ReallyLR
   %v0 = va_arg ptr %list, i64
   %v1 = va_arg ptr %list, i8
   %v2 = va_arg ptr %list, i128
@@ -1241,220 +3170,502 @@ define void @test_va_arg(ptr %list) {
 
 declare float @llvm.pow.f32(float, float)
 define float @test_pow_intrin(float %l, float %r) {
-; CHECK-LABEL: name: test_pow_intrin
-; CHECK: [[LHS:%[0-9]+]]:_(s32) = COPY $s0
-; CHECK: [[RHS:%[0-9]+]]:_(s32) = COPY $s1
-; CHECK: [[RES:%[0-9]+]]:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FPOW [[LHS]], [[RHS]]
-; CHECK: $s0 = COPY [[RES]]
+  ; CHECK-LABEL: name: test_pow_intrin
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $s0, $s1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $s0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $s1
+  ; CHECK-NEXT:   [[FPOW:%[0-9]+]]:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FPOW [[COPY]], [[COPY1]]
+  ; CHECK-NEXT:   $s0 = COPY [[FPOW]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $s0
+  ;
+  ; O3-LABEL: name: test_pow_intrin
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $s0, $s1
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $s0
+  ; O3-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $s1
+  ; O3-NEXT:   [[FPOW:%[0-9]+]]:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FPOW [[COPY]], [[COPY1]]
+  ; O3-NEXT:   $s0 = COPY [[FPOW]](s32)
+  ; O3-NEXT:   RET_ReallyLR implicit $s0
   %res = call nnan ninf nsz arcp contract afn reassoc float @llvm.pow.f32(float %l, float %r)
   ret float %res
 }
 
 declare float @llvm.powi.f32.i32(float, i32)
 define float @test_powi_intrin(float %l, i32 %r) {
-; CHECK-LABEL: name: test_powi_intrin
-; CHECK: [[LHS:%[0-9]+]]:_(s32) = COPY $s0
-; CHECK: [[RHS:%[0-9]+]]:_(s32) = COPY $w0
-; CHECK: [[RES:%[0-9]+]]:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FPOWI [[LHS]], [[RHS]]
-; CHECK: $s0 = COPY [[RES]]
+  ; CHECK-LABEL: name: test_powi_intrin
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $s0, $w0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $s0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $w0
+  ; CHECK-NEXT:   [[FPOWI:%[0-9]+]]:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FPOWI [[COPY]], [[COPY1]](s32)
+  ; CHECK-NEXT:   $s0 = COPY [[FPOWI]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $s0
+  ;
+  ; O3-LABEL: name: test_powi_intrin
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $s0, $w0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $s0
+  ; O3-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $w0
+  ; O3-NEXT:   [[FPOWI:%[0-9]+]]:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FPOWI [[COPY]], [[COPY1]](s32)
+  ; O3-NEXT:   $s0 = COPY [[FPOWI]](s32)
+  ; O3-NEXT:   RET_ReallyLR implicit $s0
   %res = call nnan ninf nsz arcp contract afn reassoc float @llvm.powi.f32.i32(float %l, i32 %r)
   ret float %res
 }
 
 declare float @llvm.fma.f32(float, float, float)
 define float @test_fma_intrin(float %a, float %b, float %c) {
-; CHECK-LABEL: name: test_fma_intrin
-; CHECK: [[A:%[0-9]+]]:_(s32) = COPY $s0
-; CHECK: [[B:%[0-9]+]]:_(s32) = COPY $s1
-; CHECK: [[C:%[0-9]+]]:_(s32) = COPY $s2
-; CHECK: [[RES:%[0-9]+]]:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FMA [[A]], [[B]], [[C]]
-; CHECK: $s0 = COPY [[RES]]
+  ; CHECK-LABEL: name: test_fma_intrin
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $s0, $s1, $s2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $s0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $s1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $s2
+  ; CHECK-NEXT:   [[FMA:%[0-9]+]]:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FMA [[COPY]], [[COPY1]], [[COPY2]]
+  ; CHECK-NEXT:   $s0 = COPY [[FMA]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $s0
+  ;
+  ; O3-LABEL: name: test_fma_intrin
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $s0, $s1, $s2
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $s0
+  ; O3-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $s1
+  ; O3-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $s2
+  ; O3-NEXT:   [[FMA:%[0-9]+]]:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FMA [[COPY]], [[COPY1]], [[COPY2]]
+  ; O3-NEXT:   $s0 = COPY [[FMA]](s32)
+  ; O3-NEXT:   RET_ReallyLR implicit $s0
   %res = call nnan ninf nsz arcp contract afn reassoc float @llvm.fma.f32(float %a, float %b, float %c)
   ret float %res
 }
 
 declare float @llvm.exp.f32(float)
 define float @test_exp_intrin(float %a) {
-; CHECK-LABEL: name: test_exp_intrin
-; CHECK: [[A:%[0-9]+]]:_(s32) = COPY $s0
-; CHECK: [[RES:%[0-9]+]]:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FEXP [[A]]
-; CHECK: $s0 = COPY [[RES]]
+  ; CHECK-LABEL: name: test_exp_intrin
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $s0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $s0
+  ; CHECK-NEXT:   [[FEXP:%[0-9]+]]:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FEXP [[COPY]]
+  ; CHECK-NEXT:   $s0 = COPY [[FEXP]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $s0
+  ;
+  ; O3-LABEL: name: test_exp_intrin
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $s0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $s0
+  ; O3-NEXT:   [[FEXP:%[0-9]+]]:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FEXP [[COPY]]
+  ; O3-NEXT:   $s0 = COPY [[FEXP]](s32)
+  ; O3-NEXT:   RET_ReallyLR implicit $s0
   %res = call nnan ninf nsz arcp contract afn reassoc float @llvm.exp.f32(float %a)
   ret float %res
 }
 
 declare float @llvm.exp2.f32(float)
 define float @test_exp2_intrin(float %a) {
-; CHECK-LABEL: name: test_exp2_intrin
-; CHECK: [[A:%[0-9]+]]:_(s32) = COPY $s0
-; CHECK: [[RES:%[0-9]+]]:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FEXP2 [[A]]
-; CHECK: $s0 = COPY [[RES]]
+  ; CHECK-LABEL: name: test_exp2_intrin
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $s0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $s0
+  ; CHECK-NEXT:   [[FEXP2_:%[0-9]+]]:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FEXP2 [[COPY]]
+  ; CHECK-NEXT:   $s0 = COPY [[FEXP2_]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $s0
+  ;
+  ; O3-LABEL: name: test_exp2_intrin
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $s0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $s0
+  ; O3-NEXT:   [[FEXP2_:%[0-9]+]]:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FEXP2 [[COPY]]
+  ; O3-NEXT:   $s0 = COPY [[FEXP2_]](s32)
+  ; O3-NEXT:   RET_ReallyLR implicit $s0
   %res = call nnan ninf nsz arcp contract afn reassoc float @llvm.exp2.f32(float %a)
   ret float %res
 }
 
 declare float @llvm.log.f32(float)
 define float @test_log_intrin(float %a) {
-; CHECK-LABEL: name: test_log_intrin
-; CHECK: [[A:%[0-9]+]]:_(s32) = COPY $s0
-; CHECK: [[RES:%[0-9]+]]:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FLOG [[A]]
-; CHECK: $s0 = COPY [[RES]]
+  ; CHECK-LABEL: name: test_log_intrin
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $s0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $s0
+  ; CHECK-NEXT:   [[FLOG:%[0-9]+]]:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FLOG [[COPY]]
+  ; CHECK-NEXT:   $s0 = COPY [[FLOG]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $s0
+  ;
+  ; O3-LABEL: name: test_log_intrin
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $s0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $s0
+  ; O3-NEXT:   [[FLOG:%[0-9]+]]:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FLOG [[COPY]]
+  ; O3-NEXT:   $s0 = COPY [[FLOG]](s32)
+  ; O3-NEXT:   RET_ReallyLR implicit $s0
   %res = call nnan ninf nsz arcp contract afn reassoc float @llvm.log.f32(float %a)
   ret float %res
 }
 
 declare float @llvm.log2.f32(float)
 define float @test_log2_intrin(float %a) {
-; CHECK-LABEL: name: test_log2_intrin
-; CHECK: [[A:%[0-9]+]]:_(s32) = COPY $s0
-; CHECK: [[RES:%[0-9]+]]:_(s32) = G_FLOG2 [[A]]
-; CHECK: $s0 = COPY [[RES]]
+  ; CHECK-LABEL: name: test_log2_intrin
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $s0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $s0
+  ; CHECK-NEXT:   [[FLOG2_:%[0-9]+]]:_(s32) = G_FLOG2 [[COPY]]
+  ; CHECK-NEXT:   $s0 = COPY [[FLOG2_]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $s0
+  ;
+  ; O3-LABEL: name: test_log2_intrin
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $s0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $s0
+  ; O3-NEXT:   [[FLOG2_:%[0-9]+]]:_(s32) = G_FLOG2 [[COPY]]
+  ; O3-NEXT:   $s0 = COPY [[FLOG2_]](s32)
+  ; O3-NEXT:   RET_ReallyLR implicit $s0
   %res = call float @llvm.log2.f32(float %a)
   ret float %res
 }
 
 declare float @llvm.log10.f32(float)
 define float @test_log10_intrin(float %a) {
-; CHECK-LABEL: name: test_log10_intrin
-; CHECK: [[A:%[0-9]+]]:_(s32) = COPY $s0
-; CHECK: [[RES:%[0-9]+]]:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FLOG10 [[A]]
-; CHECK: $s0 = COPY [[RES]]
+  ; CHECK-LABEL: name: test_log10_intrin
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $s0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $s0
+  ; CHECK-NEXT:   [[FLOG10_:%[0-9]+]]:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FLOG10 [[COPY]]
+  ; CHECK-NEXT:   $s0 = COPY [[FLOG10_]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $s0
+  ;
+  ; O3-LABEL: name: test_log10_intrin
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $s0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $s0
+  ; O3-NEXT:   [[FLOG10_:%[0-9]+]]:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FLOG10 [[COPY]]
+  ; O3-NEXT:   $s0 = COPY [[FLOG10_]](s32)
+  ; O3-NEXT:   RET_ReallyLR implicit $s0
   %res = call nnan ninf nsz arcp contract afn reassoc float @llvm.log10.f32(float %a)
   ret float %res
 }
 
 declare float @llvm.fabs.f32(float)
 define float @test_fabs_intrin(float %a) {
-; CHECK-LABEL: name: test_fabs_intrin
-; CHECK: [[A:%[0-9]+]]:_(s32) = COPY $s0
-; CHECK: [[RES:%[0-9]+]]:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FABS [[A]]
-; CHECK: $s0 = COPY [[RES]]
+  ; CHECK-LABEL: name: test_fabs_intrin
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $s0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $s0
+  ; CHECK-NEXT:   [[FABS:%[0-9]+]]:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FABS [[COPY]]
+  ; CHECK-NEXT:   $s0 = COPY [[FABS]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $s0
+  ;
+  ; O3-LABEL: name: test_fabs_intrin
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $s0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $s0
+  ; O3-NEXT:   [[FABS:%[0-9]+]]:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FABS [[COPY]]
+  ; O3-NEXT:   $s0 = COPY [[FABS]](s32)
+  ; O3-NEXT:   RET_ReallyLR implicit $s0
   %res = call nnan ninf nsz arcp contract afn reassoc float @llvm.fabs.f32(float %a)
   ret float %res
 }
 
 declare float @llvm.copysign.f32(float, float)
 define float @test_fcopysign_intrin(float %a, float %b) {
-; CHECK-LABEL: name: test_fcopysign_intrin
-; CHECK: [[A:%[0-9]+]]:_(s32) = COPY $s0
-; CHECK: [[B:%[0-9]+]]:_(s32) = COPY $s1
-; CHECK: [[RES:%[0-9]+]]:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FCOPYSIGN [[A]], [[B]]
-; CHECK: $s0 = COPY [[RES]]
 
+  ; CHECK-LABEL: name: test_fcopysign_intrin
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $s0, $s1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $s0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $s1
+  ; CHECK-NEXT:   [[FCOPYSIGN:%[0-9]+]]:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FCOPYSIGN [[COPY]], [[COPY1]](s32)
+  ; CHECK-NEXT:   $s0 = COPY [[FCOPYSIGN]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $s0
+  ;
+  ; O3-LABEL: name: test_fcopysign_intrin
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $s0, $s1
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $s0
+  ; O3-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $s1
+  ; O3-NEXT:   [[FCOPYSIGN:%[0-9]+]]:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FCOPYSIGN [[COPY]], [[COPY1]](s32)
+  ; O3-NEXT:   $s0 = COPY [[FCOPYSIGN]](s32)
+  ; O3-NEXT:   RET_ReallyLR implicit $s0
   %res = call nnan ninf nsz arcp contract afn reassoc float @llvm.copysign.f32(float %a, float %b)
   ret float %res
 }
 
 declare float @llvm.canonicalize.f32(float)
 define float @test_fcanonicalize_intrin(float %a) {
-; CHECK-LABEL: name: test_fcanonicalize_intrin
-; CHECK: [[A:%[0-9]+]]:_(s32) = COPY $s0
-; CHECK: [[RES:%[0-9]+]]:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FCANONICALIZE [[A]]
-; CHECK: $s0 = COPY [[RES]]
+  ; CHECK-LABEL: name: test_fcanonicalize_intrin
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $s0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $s0
+  ; CHECK-NEXT:   [[FCANONICALIZE:%[0-9]+]]:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FCANONICALIZE [[COPY]]
+  ; CHECK-NEXT:   $s0 = COPY [[FCANONICALIZE]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $s0
+  ;
+  ; O3-LABEL: name: test_fcanonicalize_intrin
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $s0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $s0
+  ; O3-NEXT:   [[FCANONICALIZE:%[0-9]+]]:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FCANONICALIZE [[COPY]]
+  ; O3-NEXT:   $s0 = COPY [[FCANONICALIZE]](s32)
+  ; O3-NEXT:   RET_ReallyLR implicit $s0
   %res = call nnan ninf nsz arcp contract afn reassoc float @llvm.canonicalize.f32(float %a)
   ret float %res
 }
 
 declare float @llvm.trunc.f32(float)
 define float @test_intrinsic_trunc(float %a) {
-; CHECK-LABEL: name: test_intrinsic_trunc
-; CHECK: [[A:%[0-9]+]]:_(s32) = COPY $s0
-; CHECK: [[RES:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[A]]
-; CHECK: $s0 = COPY [[RES]]
+  ; CHECK-LABEL: name: test_intrinsic_trunc
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $s0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $s0
+  ; CHECK-NEXT:   [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[COPY]]
+  ; CHECK-NEXT:   $s0 = COPY [[INTRINSIC_TRUNC]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $s0
+  ;
+  ; O3-LABEL: name: test_intrinsic_trunc
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $s0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $s0
+  ; O3-NEXT:   [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[COPY]]
+  ; O3-NEXT:   $s0 = COPY [[INTRINSIC_TRUNC]](s32)
+  ; O3-NEXT:   RET_ReallyLR implicit $s0
   %res = call float @llvm.trunc.f32(float %a)
   ret float %res
 }
 
 declare float @llvm.round.f32(float)
 define float @test_intrinsic_round(float %a) {
-; CHECK-LABEL: name: test_intrinsic_round
-; CHECK: [[A:%[0-9]+]]:_(s32) = COPY $s0
-; CHECK: [[RES:%[0-9]+]]:_(s32) = G_INTRINSIC_ROUND [[A]]
-; CHECK: $s0 = COPY [[RES]]
+  ; CHECK-LABEL: name: test_intrinsic_round
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $s0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $s0
+  ; CHECK-NEXT:   [[INTRINSIC_ROUND:%[0-9]+]]:_(s32) = G_INTRINSIC_ROUND [[COPY]]
+  ; CHECK-NEXT:   $s0 = COPY [[INTRINSIC_ROUND]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $s0
+  ;
+  ; O3-LABEL: name: test_intrinsic_round
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $s0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $s0
+  ; O3-NEXT:   [[INTRINSIC_ROUND:%[0-9]+]]:_(s32) = G_INTRINSIC_ROUND [[COPY]]
+  ; O3-NEXT:   $s0 = COPY [[INTRINSIC_ROUND]](s32)
+  ; O3-NEXT:   RET_ReallyLR implicit $s0
   %res = call float @llvm.round.f32(float %a)
   ret float %res
 }
 
 declare i32 @llvm.lrint.i32.f32(float)
 define i32 @test_intrinsic_lrint(float %a) {
-; CHECK-LABEL: name: test_intrinsic_lrint
-; CHECK: [[A:%[0-9]+]]:_(s32) = COPY $s0
-; CHECK: [[RES:%[0-9]+]]:_(s32) = G_INTRINSIC_LRINT [[A]]
-; CHECK: $w0 = COPY [[RES]]
+  ; CHECK-LABEL: name: test_intrinsic_lrint
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $s0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $s0
+  ; CHECK-NEXT:   [[INTRINSIC_LRINT:%[0-9]+]]:_(s32) = G_INTRINSIC_LRINT [[COPY]](s32)
+  ; CHECK-NEXT:   $w0 = COPY [[INTRINSIC_LRINT]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $w0
+  ;
+  ; O3-LABEL: name: test_intrinsic_lrint
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $s0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $s0
+  ; O3-NEXT:   [[INTRINSIC_LRINT:%[0-9]+]]:_(s32) = G_INTRINSIC_LRINT [[COPY]](s32)
+  ; O3-NEXT:   $w0 = COPY [[INTRINSIC_LRINT]](s32)
+  ; O3-NEXT:   RET_ReallyLR implicit $w0
   %res = call i32 @llvm.lrint.i32.f32(float %a)
   ret i32 %res
 }
 
 declare i32 @llvm.llrint.i32.f32(float)
 define i32 @test_intrinsic_llrint(float %a) {
-; CHECK-LABEL: name: test_intrinsic_llrint
-; CHECK: [[A:%[0-9]+]]:_(s32) = COPY $s0
-; CHECK: [[RES:%[0-9]+]]:_(s32) = G_INTRINSIC_LLRINT [[A]]
-; CHECK: $w0 = COPY [[RES]]
+  ; CHECK-LABEL: name: test_intrinsic_llrint
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $s0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $s0
+  ; CHECK-NEXT:   [[INTRINSIC_LLRINT:%[0-9]+]]:_(s32) = G_INTRINSIC_LLRINT [[COPY]](s32)
+  ; CHECK-NEXT:   $w0 = COPY [[INTRINSIC_LLRINT]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $w0
+  ;
+  ; O3-LABEL: name: test_intrinsic_llrint
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $s0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $s0
+  ; O3-NEXT:   [[INTRINSIC_LLRINT:%[0-9]+]]:_(s32) = G_INTRINSIC_LLRINT [[COPY]](s32)
+  ; O3-NEXT:   $w0 = COPY [[INTRINSIC_LLRINT]](s32)
+  ; O3-NEXT:   RET_ReallyLR implicit $w0
   %res = call i32 @llvm.llrint.i32.f32(float %a)
   ret i32 %res
 }
 
 declare i32 @llvm.ctlz.i32(i32, i1)
 define i32 @test_ctlz_intrinsic_zero_not_undef(i32 %a) {
-; CHECK-LABEL: name: test_ctlz_intrinsic_zero_not_undef
-; CHECK: [[A:%[0-9]+]]:_(s32) = COPY $w0
-; CHECK: [[RES:%[0-9]+]]:_(s32) = G_CTLZ [[A]]
-; CHECK: $w0 = COPY [[RES]]
+  ; CHECK-LABEL: name: test_ctlz_intrinsic_zero_not_undef
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $w0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+  ; CHECK-NEXT:   [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[COPY]](s32)
+  ; CHECK-NEXT:   $w0 = COPY [[CTLZ]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $w0
+  ;
+  ; O3-LABEL: name: test_ctlz_intrinsic_zero_not_undef
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $w0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+  ; O3-NEXT:   [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[COPY]](s32)
+  ; O3-NEXT:   $w0 = COPY [[CTLZ]](s32)
+  ; O3-NEXT:   RET_ReallyLR implicit $w0
   %res = call i32 @llvm.ctlz.i32(i32 %a, i1 0)
   ret i32 %res
 }
 
 declare i32 @llvm.cttz.i32(i32, i1)
 define i32 @test_cttz_intrinsic_zero_undef(i32 %a) {
-; CHECK-LABEL: name: test_cttz_intrinsic_zero_undef
-; CHECK: [[A:%[0-9]+]]:_(s32) = COPY $w0
-; CHECK: [[RES:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[A]]
-; CHECK: $w0 = COPY [[RES]]
+  ; CHECK-LABEL: name: test_cttz_intrinsic_zero_undef
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $w0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+  ; CHECK-NEXT:   [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[COPY]](s32)
+  ; CHECK-NEXT:   $w0 = COPY [[CTTZ_ZERO_UNDEF]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $w0
+  ;
+  ; O3-LABEL: name: test_cttz_intrinsic_zero_undef
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $w0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+  ; O3-NEXT:   [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[COPY]](s32)
+  ; O3-NEXT:   $w0 = COPY [[CTTZ_ZERO_UNDEF]](s32)
+  ; O3-NEXT:   RET_ReallyLR implicit $w0
   %res = call i32 @llvm.cttz.i32(i32 %a, i1 1)
   ret i32 %res
 }
 
 declare i32 @llvm.ctpop.i32(i32)
 define i32 @test_ctpop_intrinsic(i32 %a) {
-; CHECK-LABEL: name: test_ctpop
-; CHECK: [[A:%[0-9]+]]:_(s32) = COPY $w0
-; CHECK: [[RES:%[0-9]+]]:_(s32) = G_CTPOP [[A]]
-; CHECK: $w0 = COPY [[RES]]
+  ; CHECK-LABEL: name: test_ctpop_intrinsic
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $w0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+  ; CHECK-NEXT:   [[CTPOP:%[0-9]+]]:_(s32) = G_CTPOP [[COPY]](s32)
+  ; CHECK-NEXT:   $w0 = COPY [[CTPOP]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $w0
+  ;
+  ; O3-LABEL: name: test_ctpop_intrinsic
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $w0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+  ; O3-NEXT:   [[CTPOP:%[0-9]+]]:_(s32) = G_CTPOP [[COPY]](s32)
+  ; O3-NEXT:   $w0 = COPY [[CTPOP]](s32)
+  ; O3-NEXT:   RET_ReallyLR implicit $w0
   %res = call i32 @llvm.ctpop.i32(i32 %a)
   ret i32 %res
 }
 
 declare i32 @llvm.bitreverse.i32(i32)
 define i32 @test_bitreverse_intrinsic(i32 %a) {
-; CHECK-LABEL: name: test_bitreverse
-; CHECK: [[A:%[0-9]+]]:_(s32) = COPY $w0
-; CHECK: [[RES:%[0-9]+]]:_(s32) = G_BITREVERSE [[A]]
-; CHECK: $w0 = COPY [[RES]]
+  ; CHECK-LABEL: name: test_bitreverse_intrinsic
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $w0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+  ; CHECK-NEXT:   [[BITREVERSE:%[0-9]+]]:_(s32) = G_BITREVERSE [[COPY]]
+  ; CHECK-NEXT:   $w0 = COPY [[BITREVERSE]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $w0
+  ;
+  ; O3-LABEL: name: test_bitreverse_intrinsic
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $w0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+  ; O3-NEXT:   [[BITREVERSE:%[0-9]+]]:_(s32) = G_BITREVERSE [[COPY]]
+  ; O3-NEXT:   $w0 = COPY [[BITREVERSE]](s32)
+  ; O3-NEXT:   RET_ReallyLR implicit $w0
   %res = call i32 @llvm.bitreverse.i32(i32 %a)
   ret i32 %res
 }
 
 declare i32 @llvm.fshl.i32(i32, i32, i32)
 define i32 @test_fshl_intrinsic(i32 %a, i32 %b, i32 %c) {
-; CHECK-LABEL: name: test_fshl_intrinsic
-; CHECK: [[A:%[0-9]+]]:_(s32) = COPY $w0
-; CHECK: [[B:%[0-9]+]]:_(s32) = COPY $w1
-; CHECK: [[C:%[0-9]+]]:_(s32) = COPY $w2
-; CHECK: [[RES:%[0-9]+]]:_(s32) = G_FSHL [[A]], [[B]], [[C]]
-; CHECK: $w0 = COPY [[RES]]
+  ; CHECK-LABEL: name: test_fshl_intrinsic
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $w0, $w1, $w2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $w2
+  ; CHECK-NEXT:   [[FSHL:%[0-9]+]]:_(s32) = G_FSHL [[COPY]], [[COPY1]], [[COPY2]](s32)
+  ; CHECK-NEXT:   $w0 = COPY [[FSHL]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $w0
+  ;
+  ; O3-LABEL: name: test_fshl_intrinsic
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $w0, $w1, $w2
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+  ; O3-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+  ; O3-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $w2
+  ; O3-NEXT:   [[FSHL:%[0-9]+]]:_(s32) = G_FSHL [[COPY]], [[COPY1]], [[COPY2]](s32)
+  ; O3-NEXT:   $w0 = COPY [[FSHL]](s32)
+  ; O3-NEXT:   RET_ReallyLR implicit $w0
   %res = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
   ret i32 %res
 }
 
 declare i32 @llvm.fshr.i32(i32, i32, i32)
 define i32 @test_fshr_intrinsic(i32 %a, i32 %b, i32 %c) {
-; CHECK-LABEL: name: test_fshr_intrinsic
-; CHECK: [[A:%[0-9]+]]:_(s32) = COPY $w0
-; CHECK: [[B:%[0-9]+]]:_(s32) = COPY $w1
-; CHECK: [[C:%[0-9]+]]:_(s32) = COPY $w2
-; CHECK: [[RES:%[0-9]+]]:_(s32) = G_FSHR [[A]], [[B]], [[C]]
-; CHECK: $w0 = COPY [[RES]]
+  ; CHECK-LABEL: name: test_fshr_intrinsic
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $w0, $w1, $w2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $w2
+  ; CHECK-NEXT:   [[FSHR:%[0-9]+]]:_(s32) = G_FSHR [[COPY]], [[COPY1]], [[COPY2]](s32)
+  ; CHECK-NEXT:   $w0 = COPY [[FSHR]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $w0
+  ;
+  ; O3-LABEL: name: test_fshr_intrinsic
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $w0, $w1, $w2
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+  ; O3-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+  ; O3-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $w2
+  ; O3-NEXT:   [[FSHR:%[0-9]+]]:_(s32) = G_FSHR [[COPY]], [[COPY1]], [[COPY2]](s32)
+  ; O3-NEXT:   $w0 = COPY [[FSHR]](s32)
+  ; O3-NEXT:   RET_ReallyLR implicit $w0
   %res = call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 %c)
   ret i32 %res
 }
@@ -1462,14 +3673,21 @@ define i32 @test_fshr_intrinsic(i32 %a, i32 %b, i32 %c) {
 declare void @llvm.lifetime.start.p0(i64, ptr)
 declare void @llvm.lifetime.end.p0(i64, ptr)
 define void @test_lifetime_intrin() {
-; CHECK-LABEL: name: test_lifetime_intrin
-; CHECK: RET_ReallyLR
-; O3-LABEL: name: test_lifetime_intrin
-; O3: {{%[0-9]+}}:_(p0) = G_FRAME_INDEX %stack.0.slot
-; O3-NEXT: LIFETIME_START %stack.0.slot
-; O3-NEXT: G_STORE
-; O3-NEXT: LIFETIME_END %stack.0.slot
-; O3-NEXT: RET_ReallyLR
+  ; CHECK-LABEL: name: test_lifetime_intrin
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 10
+  ; CHECK-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.slot
+  ; CHECK-NEXT:   G_STORE [[C]](s8), [[FRAME_INDEX]](p0) :: (volatile store (s8) into %ir.slot)
+  ; CHECK-NEXT:   RET_ReallyLR
+  ;
+  ; O3-LABEL: name: test_lifetime_intrin
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 10
+  ; O3-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.slot
+  ; O3-NEXT:   LIFETIME_START %stack.0.slot
+  ; O3-NEXT:   G_STORE [[C]](s8), [[FRAME_INDEX]](p0) :: (volatile store (s8) into %ir.slot)
+  ; O3-NEXT:   LIFETIME_END %stack.0.slot
+  ; O3-NEXT:   RET_ReallyLR
   %slot = alloca i8, i32 4
   call void @llvm.lifetime.start.p0(i64 0, ptr %slot)
   store volatile i8 10, ptr %slot
@@ -1478,14 +3696,31 @@ define void @test_lifetime_intrin() {
 }
 
 define void @test_load_store_atomics(ptr %addr) {
-; CHECK-LABEL: name: test_load_store_atomics
-; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY $x0
-; CHECK: [[V0:%[0-9]+]]:_(s8) = G_LOAD [[ADDR]](p0) :: (load unordered (s8) from %ir.addr)
-; CHECK: G_STORE [[V0]](s8), [[ADDR]](p0) :: (store monotonic (s8) into %ir.addr)
-; CHECK: [[V1:%[0-9]+]]:_(s8) = G_LOAD [[ADDR]](p0) :: (load acquire (s8) from %ir.addr)
-; CHECK: G_STORE [[V1]](s8), [[ADDR]](p0) :: (store release (s8) into %ir.addr)
-; CHECK: [[V2:%[0-9]+]]:_(s8) = G_LOAD [[ADDR]](p0) :: (load syncscope("singlethread") seq_cst (s8) from %ir.addr)
-; CHECK: G_STORE [[V2]](s8), [[ADDR]](p0) :: (store syncscope("singlethread") monotonic (s8) into %ir.addr)
+  ; CHECK-LABEL: name: test_load_store_atomics
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $x0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p0) :: (load unordered (s8) from %ir.addr)
+  ; CHECK-NEXT:   G_STORE [[LOAD]](s8), [[COPY]](p0) :: (store monotonic (s8) into %ir.addr)
+  ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p0) :: (load acquire (s8) from %ir.addr)
+  ; CHECK-NEXT:   G_STORE [[LOAD1]](s8), [[COPY]](p0) :: (store release (s8) into %ir.addr)
+  ; CHECK-NEXT:   [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p0) :: (load syncscope("singlethread") seq_cst (s8) from %ir.addr)
+  ; CHECK-NEXT:   G_STORE [[LOAD2]](s8), [[COPY]](p0) :: (store syncscope("singlethread") monotonic (s8) into %ir.addr)
+  ; CHECK-NEXT:   RET_ReallyLR
+  ;
+  ; O3-LABEL: name: test_load_store_atomics
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $x0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; O3-NEXT:   [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p0) :: (load unordered (s8) from %ir.addr)
+  ; O3-NEXT:   G_STORE [[LOAD]](s8), [[COPY]](p0) :: (store monotonic (s8) into %ir.addr)
+  ; O3-NEXT:   [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p0) :: (load acquire (s8) from %ir.addr)
+  ; O3-NEXT:   G_STORE [[LOAD1]](s8), [[COPY]](p0) :: (store release (s8) into %ir.addr)
+  ; O3-NEXT:   [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p0) :: (load syncscope("singlethread") seq_cst (s8) from %ir.addr)
+  ; O3-NEXT:   G_STORE [[LOAD2]](s8), [[COPY]](p0) :: (store syncscope("singlethread") monotonic (s8) into %ir.addr)
+  ; O3-NEXT:   RET_ReallyLR
   %v0 = load atomic i8, ptr %addr unordered, align 1
   store atomic i8 %v0, ptr %addr monotonic, align 1
 
@@ -1499,188 +3734,423 @@ define void @test_load_store_atomics(ptr %addr) {
 }
 
 define float @test_fneg_f32(float %x) {
-; CHECK-LABEL: name: test_fneg_f32
-; CHECK: [[ARG:%[0-9]+]]:_(s32) = COPY $s0
-; CHECK: [[RES:%[0-9]+]]:_(s32) = G_FNEG [[ARG]]
-; CHECK: $s0 = COPY [[RES]](s32)
+  ; CHECK-LABEL: name: test_fneg_f32
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $s0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $s0
+  ; CHECK-NEXT:   [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]]
+  ; CHECK-NEXT:   $s0 = COPY [[FNEG]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $s0
+  ;
+  ; O3-LABEL: name: test_fneg_f32
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $s0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $s0
+  ; O3-NEXT:   [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]]
+  ; O3-NEXT:   $s0 = COPY [[FNEG]](s32)
+  ; O3-NEXT:   RET_ReallyLR implicit $s0
   %neg = fneg float %x
   ret float %neg
 }
 
 define float @test_fneg_f32_fmf(float %x) {
-; CHECK-LABEL: name: test_fneg_f32
-; CHECK: [[ARG:%[0-9]+]]:_(s32) = COPY $s0
-; CHECK: [[RES:%[0-9]+]]:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FNEG [[ARG]]
-; CHECK: $s0 = COPY [[RES]](s32)
+  ; CHECK-LABEL: name: test_fneg_f32_fmf
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $s0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $s0
+  ; CHECK-NEXT:   [[FNEG:%[0-9]+]]:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FNEG [[COPY]]
+  ; CHECK-NEXT:   $s0 = COPY [[FNEG]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $s0
+  ;
+  ; O3-LABEL: name: test_fneg_f32_fmf
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $s0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $s0
+  ; O3-NEXT:   [[FNEG:%[0-9]+]]:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FNEG [[COPY]]
+  ; O3-NEXT:   $s0 = COPY [[FNEG]](s32)
+  ; O3-NEXT:   RET_ReallyLR implicit $s0
   %neg = fneg fast float %x
   ret float %neg
 }
 
 define double @test_fneg_f64(double %x) {
-; CHECK-LABEL: name: test_fneg_f64
-; CHECK: [[ARG:%[0-9]+]]:_(s64) = COPY $d0
-; CHECK: [[RES:%[0-9]+]]:_(s64) = G_FNEG [[ARG]]
-; CHECK: $d0 = COPY [[RES]](s64)
+  ; CHECK-LABEL: name: test_fneg_f64
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $d0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s64) = COPY $d0
+  ; CHECK-NEXT:   [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[COPY]]
+  ; CHECK-NEXT:   $d0 = COPY [[FNEG]](s64)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $d0
+  ;
+  ; O3-LABEL: name: test_fneg_f64
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $d0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(s64) = COPY $d0
+  ; O3-NEXT:   [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[COPY]]
+  ; O3-NEXT:   $d0 = COPY [[FNEG]](s64)
+  ; O3-NEXT:   RET_ReallyLR implicit $d0
   %neg = fneg double %x
   ret double %neg
 }
 
 define double @test_fneg_f64_fmf(double %x) {
-; CHECK-LABEL: name: test_fneg_f64
-; CHECK: [[ARG:%[0-9]+]]:_(s64) = COPY $d0
-; CHECK: [[RES:%[0-9]+]]:_(s64) = nnan ninf nsz arcp contract afn reassoc G_FNEG [[ARG]]
-; CHECK: $d0 = COPY [[RES]](s64)
+  ; CHECK-LABEL: name: test_fneg_f64_fmf
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $d0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s64) = COPY $d0
+  ; CHECK-NEXT:   [[FNEG:%[0-9]+]]:_(s64) = nnan ninf nsz arcp contract afn reassoc G_FNEG [[COPY]]
+  ; CHECK-NEXT:   $d0 = COPY [[FNEG]](s64)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $d0
+  ;
+  ; O3-LABEL: name: test_fneg_f64_fmf
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $d0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(s64) = COPY $d0
+  ; O3-NEXT:   [[FNEG:%[0-9]+]]:_(s64) = nnan ninf nsz arcp contract afn reassoc G_FNEG [[COPY]]
+  ; O3-NEXT:   $d0 = COPY [[FNEG]](s64)
+  ; O3-NEXT:   RET_ReallyLR implicit $d0
   %neg = fneg fast double %x
   ret double %neg
 }
 
 define void @test_trivial_inlineasm() {
-; CHECK-LABEL: name: test_trivial_inlineasm
-; CHECK: INLINEASM &wibble, 1
-; CHECK: INLINEASM &wibble, 0
+  ; CHECK-LABEL: name: test_trivial_inlineasm
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   INLINEASM &wibble, 1 /* sideeffect attdialect */
+  ; CHECK-NEXT:   INLINEASM &wibble, 0 /* attdialect */
+  ; CHECK-NEXT:   RET_ReallyLR
+  ;
+  ; O3-LABEL: name: test_trivial_inlineasm
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   INLINEASM &wibble, 1 /* sideeffect attdialect */
+  ; O3-NEXT:   INLINEASM &wibble, 0 /* attdialect */
+  ; O3-NEXT:   RET_ReallyLR
   call void asm sideeffect "wibble", ""()
   call void asm "wibble", ""()
   ret void
 }
 
 define <2 x i32> @test_insertelement(<2 x i32> %vec, i32 %elt, i32 %idx){
-; CHECK-LABEL: name: test_insertelement
-; CHECK: [[VEC:%[0-9]+]]:_(<2 x s32>) = COPY $d0
-; CHECK: [[ELT:%[0-9]+]]:_(s32) = COPY $w0
-; CHECK: [[IDX:%[0-9]+]]:_(s32) = COPY $w1
-; CHECK: [[IDX2:%[0-9]+]]:_(s64) = G_ZEXT [[IDX]]
-; CHECK: [[RES:%[0-9]+]]:_(<2 x s32>) = G_INSERT_VECTOR_ELT [[VEC]], [[ELT]](s32), [[IDX2]](s64)
-; CHECK: $d0 = COPY [[RES]](<2 x s32>)
+  ; CHECK-LABEL: name: test_insertelement
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $d0, $w0, $w1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $w0
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $w1
+  ; CHECK-NEXT:   [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY2]](s32)
+  ; CHECK-NEXT:   [[IVEC:%[0-9]+]]:_(<2 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY1]](s32), [[ZEXT]](s64)
+  ; CHECK-NEXT:   $d0 = COPY [[IVEC]](<2 x s32>)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $d0
+  ;
+  ; O3-LABEL: name: test_insertelement
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $d0, $w0, $w1
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
+  ; O3-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $w0
+  ; O3-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $w1
+  ; O3-NEXT:   [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY2]](s32)
+  ; O3-NEXT:   [[IVEC:%[0-9]+]]:_(<2 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY1]](s32), [[ZEXT]](s64)
+  ; O3-NEXT:   $d0 = COPY [[IVEC]](<2 x s32>)
+  ; O3-NEXT:   RET_ReallyLR implicit $d0
   %res = insertelement <2 x i32> %vec, i32 %elt, i32 %idx
   ret <2 x i32> %res
 }
 
 define i32 @test_extractelement(<2 x i32> %vec, i32 %idx) {
-; CHECK-LABEL: name: test_extractelement
-; CHECK: [[VEC:%[0-9]+]]:_(<2 x s32>) = COPY $d0
-; CHECK: [[IDX:%[0-9]+]]:_(s32) = COPY $w0
-; CHECK: [[IDXEXT:%[0-9]+]]:_(s64) = G_ZEXT [[IDX]]
-; CHECK: [[RES:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[VEC]](<2 x s32>), [[IDXEXT]](s64)
-; CHECK: $w0 = COPY [[RES]](s32)
+  ; CHECK-LABEL: name: test_extractelement
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $d0, $w0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $w0
+  ; CHECK-NEXT:   [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY1]](s32)
+  ; CHECK-NEXT:   [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s32>), [[ZEXT]](s64)
+  ; CHECK-NEXT:   $w0 = COPY [[EVEC]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $w0
+  ;
+  ; O3-LABEL: name: test_extractelement
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $d0, $w0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
+  ; O3-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $w0
+  ; O3-NEXT:   [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY1]](s32)
+  ; O3-NEXT:   [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s32>), [[ZEXT]](s64)
+  ; O3-NEXT:   $w0 = COPY [[EVEC]](s32)
+  ; O3-NEXT:   RET_ReallyLR implicit $w0
   %res = extractelement <2 x i32> %vec, i32 %idx
   ret i32 %res
 }
 
 define i32 @test_extractelement_const_idx(<2 x i32> %vec) {
-; CHECK-LABEL: name: test_extractelement
-; CHECK: [[VEC:%[0-9]+]]:_(<2 x s32>) = COPY $d0
-; CHECK: [[IDX:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-; CHECK: [[RES:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[VEC]](<2 x s32>), [[IDX]](s64)
-; CHECK: $w0 = COPY [[RES]](s32)
+  ; CHECK-LABEL: name: test_extractelement_const_idx
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $d0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+  ; CHECK-NEXT:   [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s32>), [[C]](s64)
+  ; CHECK-NEXT:   $w0 = COPY [[EVEC]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $w0
+  ;
+  ; O3-LABEL: name: test_extractelement_const_idx
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $d0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
+  ; O3-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+  ; O3-NEXT:   [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s32>), [[C]](s64)
+  ; O3-NEXT:   $w0 = COPY [[EVEC]](s32)
+  ; O3-NEXT:   RET_ReallyLR implicit $w0
   %res = extractelement <2 x i32> %vec, i32 1
   ret i32 %res
 }
 
 define i32 @test_extractelement_const_idx_zext_i1(<2 x i32> %vec) {
-; CHECK-LABEL: name: test_extractelement
-; CHECK: [[VEC:%[0-9]+]]:_(<2 x s32>) = COPY $d0
-; CHECK: [[IDX:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-; CHECK: [[RES:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[VEC]](<2 x s32>), [[IDX]](s64)
-; CHECK: $w0 = COPY [[RES]](s32)
+  ; CHECK-LABEL: name: test_extractelement_const_idx_zext_i1
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $d0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+  ; CHECK-NEXT:   [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s32>), [[C]](s64)
+  ; CHECK-NEXT:   $w0 = COPY [[EVEC]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $w0
+  ;
+  ; O3-LABEL: name: test_extractelement_const_idx_zext_i1
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $d0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
+  ; O3-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+  ; O3-NEXT:   [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s32>), [[C]](s64)
+  ; O3-NEXT:   $w0 = COPY [[EVEC]](s32)
+  ; O3-NEXT:   RET_ReallyLR implicit $w0
   %res = extractelement <2 x i32> %vec, i1 true
   ret i32 %res
 }
 
 define i32 @test_extractelement_const_idx_zext_i8(<2 x i32> %vec) {
-; CHECK-LABEL: name: test_extractelement
-; CHECK: [[VEC:%[0-9]+]]:_(<2 x s32>) = COPY $d0
-; CHECK: [[IDX:%[0-9]+]]:_(s64) = G_CONSTANT i64 255
-; CHECK: [[RES:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[VEC]](<2 x s32>), [[IDX]](s64)
-; CHECK: $w0 = COPY [[RES]](s32)
+  ; CHECK-LABEL: name: test_extractelement_const_idx_zext_i8
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $d0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 255
+  ; CHECK-NEXT:   [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s32>), [[C]](s64)
+  ; CHECK-NEXT:   $w0 = COPY [[EVEC]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $w0
+  ;
+  ; O3-LABEL: name: test_extractelement_const_idx_zext_i8
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $d0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
+  ; O3-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 255
+  ; O3-NEXT:   [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s32>), [[C]](s64)
+  ; O3-NEXT:   $w0 = COPY [[EVEC]](s32)
+  ; O3-NEXT:   RET_ReallyLR implicit $w0
   %res = extractelement <2 x i32> %vec, i8 255
   ret i32 %res
 }
 
 
 define i32 @test_singleelementvector(i32 %elt){
-; CHECK-LABEL: name: test_singleelementvector
-; CHECK: [[ELT:%[0-9]+]]:_(s32) = COPY $w0
-; CHECK-NOT: G_INSERT_VECTOR_ELT
-; CHECK-NOT: G_EXTRACT_VECTOR_ELT
-; CHECK: $w0 = COPY [[ELT]](s32)
+  ; CHECK-LABEL: name: test_singleelementvector
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $w0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+  ; CHECK-NEXT:   $w0 = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $w0
+  ;
+  ; O3-LABEL: name: test_singleelementvector
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $w0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+  ; O3-NEXT:   $w0 = COPY [[COPY]](s32)
+  ; O3-NEXT:   RET_ReallyLR implicit $w0
   %vec = insertelement <1 x i32> undef, i32 %elt, i32 0
   %res = extractelement <1 x i32> %vec, i32 0
   ret i32 %res
 }
 
 define <2 x i32> @test_constantaggzerovector_v2i32() {
-; CHECK-LABEL: name: test_constantaggzerovector_v2i32
-; CHECK: [[ZERO:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-; CHECK: [[VEC:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ZERO]](s32), [[ZERO]](s32)
-; CHECK: $d0 = COPY [[VEC]](<2 x s32>)
+  ; CHECK-LABEL: name: test_constantaggzerovector_v2i32
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32)
+  ; CHECK-NEXT:   $d0 = COPY [[BUILD_VECTOR]](<2 x s32>)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $d0
+  ;
+  ; O3-LABEL: name: test_constantaggzerovector_v2i32
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+  ; O3-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32)
+  ; O3-NEXT:   $d0 = COPY [[BUILD_VECTOR]](<2 x s32>)
+  ; O3-NEXT:   RET_ReallyLR implicit $d0
   ret <2 x i32> zeroinitializer
 }
 
 define <2 x float> @test_constantaggzerovector_v2f32() {
-; CHECK-LABEL: name: test_constantaggzerovector_v2f32
-; CHECK: [[ZERO:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
-; CHECK: [[VEC:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ZERO]](s32), [[ZERO]](s32)
-; CHECK: $d0 = COPY [[VEC]](<2 x s32>)
+  ; CHECK-LABEL: name: test_constantaggzerovector_v2f32
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
+  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32)
+  ; CHECK-NEXT:   $d0 = COPY [[BUILD_VECTOR]](<2 x s32>)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $d0
+  ;
+  ; O3-LABEL: name: test_constantaggzerovector_v2f32
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
+  ; O3-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32)
+  ; O3-NEXT:   $d0 = COPY [[BUILD_VECTOR]](<2 x s32>)
+  ; O3-NEXT:   RET_ReallyLR implicit $d0
   ret <2 x float> zeroinitializer
 }
 
 define i32 @test_constantaggzerovector_v3i32() {
-; CHECK-LABEL: name: test_constantaggzerovector_v3i32
-; CHECK: [[ZERO:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-; CHECK: [[VEC:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ZERO]](s32), [[ZERO]](s32), [[ZERO]](s32)
-; CHECK: G_EXTRACT_VECTOR_ELT [[VEC]](<3 x s32>)
+  ; CHECK-LABEL: name: test_constantaggzerovector_v3i32
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32)
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+  ; CHECK-NEXT:   [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[C1]](s64)
+  ; CHECK-NEXT:   $w0 = COPY [[EVEC]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $w0
+  ;
+  ; O3-LABEL: name: test_constantaggzerovector_v3i32
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+  ; O3-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32)
+  ; O3-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+  ; O3-NEXT:   [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[C1]](s64)
+  ; O3-NEXT:   $w0 = COPY [[EVEC]](s32)
+  ; O3-NEXT:   RET_ReallyLR implicit $w0
   %elt = extractelement <3 x i32> zeroinitializer, i32 1
   ret i32 %elt
 }
 
 define <2 x i32> @test_constantdatavector_v2i32() {
-; CHECK-LABEL: name: test_constantdatavector_v2i32
-; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-; CHECK: [[VEC:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C2]](s32)
-; CHECK: $d0 = COPY [[VEC]](<2 x s32>)
+  ; CHECK-LABEL: name: test_constantdatavector_v2i32
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32)
+  ; CHECK-NEXT:   $d0 = COPY [[BUILD_VECTOR]](<2 x s32>)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $d0
+  ;
+  ; O3-LABEL: name: test_constantdatavector_v2i32
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+  ; O3-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+  ; O3-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32)
+  ; O3-NEXT:   $d0 = COPY [[BUILD_VECTOR]](<2 x s32>)
+  ; O3-NEXT:   RET_ReallyLR implicit $d0
   ret <2 x i32> <i32 1, i32 2>
 }
 
 define i32 @test_constantdatavector_v3i32() {
-; CHECK-LABEL: name: test_constantdatavector_v3i32
-; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-; CHECK: [[VEC:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C2]](s32), [[C3]](s32)
-; CHECK: G_EXTRACT_VECTOR_ELT [[VEC]](<3 x s32>)
+  ; CHECK-LABEL: name: test_constantdatavector_v3i32
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32), [[C2]](s32)
+  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+  ; CHECK-NEXT:   [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[C3]](s64)
+  ; CHECK-NEXT:   $w0 = COPY [[EVEC]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $w0
+  ;
+  ; O3-LABEL: name: test_constantdatavector_v3i32
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+  ; O3-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+  ; O3-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+  ; O3-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32), [[C2]](s32)
+  ; O3-NEXT:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+  ; O3-NEXT:   [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[C3]](s64)
+  ; O3-NEXT:   $w0 = COPY [[EVEC]](s32)
+  ; O3-NEXT:   RET_ReallyLR implicit $w0
   %elt = extractelement <3 x i32> <i32 1, i32 2, i32 3>, i32 1
   ret i32 %elt
 }
 
 define <4 x i32> @test_constantdatavector_v4i32() {
-; CHECK-LABEL: name: test_constantdatavector_v4i32
-; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-; CHECK: [[VEC:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C2]](s32), [[C3]](s32), [[C4]](s32)
-; CHECK: $q0 = COPY [[VEC]](<4 x s32>)
+  ; CHECK-LABEL: name: test_constantdatavector_v4i32
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32), [[C2]](s32), [[C3]](s32)
+  ; CHECK-NEXT:   $q0 = COPY [[BUILD_VECTOR]](<4 x s32>)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $q0
+  ;
+  ; O3-LABEL: name: test_constantdatavector_v4i32
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+  ; O3-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+  ; O3-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+  ; O3-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+  ; O3-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32), [[C2]](s32), [[C3]](s32)
+  ; O3-NEXT:   $q0 = COPY [[BUILD_VECTOR]](<4 x s32>)
+  ; O3-NEXT:   RET_ReallyLR implicit $q0
   ret <4 x i32> <i32 1, i32 2, i32 3, i32 4>
 }
 
 define <2 x double> @test_constantdatavector_v2f64() {
-; CHECK-LABEL: name: test_constantdatavector_v2f64
-; CHECK: [[FC1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00
-; CHECK: [[FC2:%[0-9]+]]:_(s64) = G_FCONSTANT double 2.000000e+00
-; CHECK: [[VEC:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FC1]](s64), [[FC2]](s64)
-; CHECK: $q0 = COPY [[VEC]](<2 x s64>)
+  ; CHECK-LABEL: name: test_constantdatavector_v2f64
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 2.000000e+00
+  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C1]](s64)
+  ; CHECK-NEXT:   $q0 = COPY [[BUILD_VECTOR]](<2 x s64>)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $q0
+  ;
+  ; O3-LABEL: name: test_constantdatavector_v2f64
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00
+  ; O3-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 2.000000e+00
+  ; O3-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C1]](s64)
+  ; O3-NEXT:   $q0 = COPY [[BUILD_VECTOR]](<2 x s64>)
+  ; O3-NEXT:   RET_ReallyLR implicit $q0
   ret <2 x double> <double 1.0, double 2.0>
 }
 
 define i32 @test_constantaggzerovector_v1s32(i32 %arg){
-; CHECK-LABEL: name: test_constantaggzerovector_v1s32
-; CHECK: [[ARG:%[0-9]+]]:_(s32) = COPY $w0
-; CHECK: [[C0:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-; CHECK-NOT: G_MERGE_VALUES
-; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY [[C0]]
-; CHECK-NOT: G_MERGE_VALUES
-; CHECK: G_ADD [[ARG]], [[COPY]]
+  ; CHECK-LABEL: name: test_constantaggzerovector_v1s32
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $w0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+  ; CHECK-NEXT:   [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]]
+  ; CHECK-NEXT:   $w0 = COPY [[ADD]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $w0
+  ;
+  ; O3-LABEL: name: test_constantaggzerovector_v1s32
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $w0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+  ; O3-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+  ; O3-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+  ; O3-NEXT:   [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]]
+  ; O3-NEXT:   $w0 = COPY [[ADD]](s32)
+  ; O3-NEXT:   RET_ReallyLR implicit $w0
   %vec = insertelement <1 x i32> undef, i32 %arg, i32 0
   %add = add <1 x i32> %vec, zeroinitializer
   %res = extractelement <1 x i32> %add, i32 0
@@ -1688,13 +4158,27 @@ define i32 @test_constantaggzerovector_v1s32(i32 %arg){
 }
 
 define i32 @test_constantdatavector_v1s32(i32 %arg){
-; CHECK-LABEL: name: test_constantdatavector_v1s32
-; CHECK: [[ARG:%[0-9]+]]:_(s32) = COPY $w0
-; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-; CHECK-NOT: G_MERGE_VALUES
-; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY [[C0]]
-; CHECK-NOT: G_MERGE_VALUES
-; CHECK: G_ADD [[ARG]], [[COPY]]
+  ; CHECK-LABEL: name: test_constantdatavector_v1s32
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $w0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+  ; CHECK-NEXT:   [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]]
+  ; CHECK-NEXT:   $w0 = COPY [[ADD]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $w0
+  ;
+  ; O3-LABEL: name: test_constantdatavector_v1s32
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $w0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+  ; O3-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+  ; O3-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+  ; O3-NEXT:   [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]]
+  ; O3-NEXT:   $w0 = COPY [[ADD]](s32)
+  ; O3-NEXT:   RET_ReallyLR implicit $w0
   %vec = insertelement <1 x i32> undef, i32 %arg, i32 0
   %add = add <1 x i32> %vec, <i32 1>
   %res = extractelement <1 x i32> %add, i32 0
@@ -1703,103 +4187,255 @@ define i32 @test_constantdatavector_v1s32(i32 %arg){
 
 declare ghccc float @different_call_conv_target(float %x)
 define float @test_different_call_conv_target(float %x) {
-; CHECK-LABEL: name: test_different_call_conv
-; CHECK: [[X:%[0-9]+]]:_(s32) = COPY $s0
-; CHECK: $s8 = COPY [[X]]
-; CHECK: BL @different_call_conv_target, csr_aarch64_noregs, implicit-def $lr, implicit $sp, implicit $s8, implicit-def $s0
+  ; CHECK-LABEL: name: test_different_call_conv_target
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $s0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $s0
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
+  ; CHECK-NEXT:   $s8 = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   BL @different_call_conv_target, csr_aarch64_noregs, implicit-def $lr, implicit $sp, implicit $s8, implicit-def $s0
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $s0
+  ; CHECK-NEXT:   $s0 = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $s0
+  ;
+  ; O3-LABEL: name: test_different_call_conv_target
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $s0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $s0
+  ; O3-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
+  ; O3-NEXT:   $s8 = COPY [[COPY]](s32)
+  ; O3-NEXT:   BL @different_call_conv_target, csr_aarch64_noregs, implicit-def $lr, implicit $sp, implicit $s8, implicit-def $s0
+  ; O3-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
+  ; O3-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $s0
+  ; O3-NEXT:   $s0 = COPY [[COPY1]](s32)
+  ; O3-NEXT:   RET_ReallyLR implicit $s0
   %res = call ghccc float @different_call_conv_target(float %x)
   ret float %res
 }
 
 define <2 x i32> @test_shufflevector_s32_v2s32(i32 %arg) {
-; CHECK-LABEL: name: test_shufflevector_s32_v2s32
-; CHECK: [[ARG:%[0-9]+]]:_(s32) = COPY $w0
-; CHECK-DAG: [[UNDEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-; CHECK: [[VEC:%[0-9]+]]:_(<2 x s32>) = G_SHUFFLE_VECTOR [[ARG]](s32), [[UNDEF]], shufflemask(0, 0)
-; CHECK: $d0 = COPY [[VEC]](<2 x s32>)
+  ; CHECK-LABEL: name: test_shufflevector_s32_v2s32
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $w0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[SHUF:%[0-9]+]]:_(<2 x s32>) = G_SHUFFLE_VECTOR [[COPY]](s32), [[DEF]], shufflemask(0, 0)
+  ; CHECK-NEXT:   $d0 = COPY [[SHUF]](<2 x s32>)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $d0
+  ;
+  ; O3-LABEL: name: test_shufflevector_s32_v2s32
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $w0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+  ; O3-NEXT:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; O3-NEXT:   [[SHUF:%[0-9]+]]:_(<2 x s32>) = G_SHUFFLE_VECTOR [[COPY]](s32), [[DEF]], shufflemask(0, 0)
+  ; O3-NEXT:   $d0 = COPY [[SHUF]](<2 x s32>)
+  ; O3-NEXT:   RET_ReallyLR implicit $d0
   %vec = insertelement <1 x i32> undef, i32 %arg, i32 0
   %res = shufflevector <1 x i32> %vec, <1 x i32> undef, <2 x i32> zeroinitializer
   ret <2 x i32> %res
 }
 
 define i32 @test_shufflevector_v2s32_s32(<2 x i32> %arg) {
-; CHECK-LABEL: name: test_shufflevector_v2s32_s32
-; CHECK: [[ARG:%[0-9]+]]:_(<2 x s32>) = COPY $d0
-; CHECK: [[RES:%[0-9]+]]:_(s32) = G_SHUFFLE_VECTOR [[ARG]](<2 x s32>), [[UNDEF]], shufflemask(1)
-; CHECK: $w0 = COPY [[RES]](s32)
+  ; CHECK-LABEL: name: test_shufflevector_v2s32_s32
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $d0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[SHUF:%[0-9]+]]:_(s32) = G_SHUFFLE_VECTOR [[COPY]](<2 x s32>), [[DEF]], shufflemask(1)
+  ; CHECK-NEXT:   $w0 = COPY [[SHUF]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $w0
+  ;
+  ; O3-LABEL: name: test_shufflevector_v2s32_s32
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $d0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
+  ; O3-NEXT:   [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
+  ; O3-NEXT:   [[SHUF:%[0-9]+]]:_(s32) = G_SHUFFLE_VECTOR [[COPY]](<2 x s32>), [[DEF]], shufflemask(1)
+  ; O3-NEXT:   $w0 = COPY [[SHUF]](s32)
+  ; O3-NEXT:   RET_ReallyLR implicit $w0
   %vec = shufflevector <2 x i32> %arg, <2 x i32> undef, <1 x i32> <i32 1>
   %res = extractelement <1 x i32> %vec, i32 0
   ret i32 %res
 }
 
 define <2 x i32> @test_shufflevector_v2s32_v2s32_undef(<2 x i32> %arg) {
-; CHECK-LABEL: name: test_shufflevector_v2s32_v2s32_undef
-; CHECK: [[ARG:%[0-9]+]]:_(<2 x s32>) = COPY $d0
-; CHECK-DAG: [[UNDEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
-; CHECK: [[VEC:%[0-9]+]]:_(<2 x s32>) = G_SHUFFLE_VECTOR [[ARG]](<2 x s32>), [[UNDEF]], shufflemask(undef, undef)
-; CHECK: $d0 = COPY [[VEC]](<2 x s32>)
+  ; CHECK-LABEL: name: test_shufflevector_v2s32_v2s32_undef
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $d0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[SHUF:%[0-9]+]]:_(<2 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s32>), [[DEF]], shufflemask(undef, undef)
+  ; CHECK-NEXT:   $d0 = COPY [[SHUF]](<2 x s32>)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $d0
+  ;
+  ; O3-LABEL: name: test_shufflevector_v2s32_v2s32_undef
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $d0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
+  ; O3-NEXT:   [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
+  ; O3-NEXT:   [[SHUF:%[0-9]+]]:_(<2 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s32>), [[DEF]], shufflemask(undef, undef)
+  ; O3-NEXT:   $d0 = COPY [[SHUF]](<2 x s32>)
+  ; O3-NEXT:   RET_ReallyLR implicit $d0
   %res = shufflevector <2 x i32> %arg, <2 x i32> undef, <2 x i32> undef
   ret <2 x i32> %res
 }
 
 define <2 x i32> @test_shufflevector_v2s32_v2s32_undef_0(<2 x i32> %arg) {
-; CHECK-LABEL: name: test_shufflevector_v2s32_v2s32_undef_0
-; CHECK: [[ARG:%[0-9]+]]:_(<2 x s32>) = COPY $d0
-; CHECK-DAG: [[UNDEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
-; CHECK: [[VEC:%[0-9]+]]:_(<2 x s32>) = G_SHUFFLE_VECTOR [[ARG]](<2 x s32>), [[UNDEF]], shufflemask(undef, 0)
-; CHECK: $d0 = COPY [[VEC]](<2 x s32>)
+  ; CHECK-LABEL: name: test_shufflevector_v2s32_v2s32_undef_0
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $d0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[SHUF:%[0-9]+]]:_(<2 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s32>), [[DEF]], shufflemask(undef, 0)
+  ; CHECK-NEXT:   $d0 = COPY [[SHUF]](<2 x s32>)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $d0
+  ;
+  ; O3-LABEL: name: test_shufflevector_v2s32_v2s32_undef_0
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $d0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
+  ; O3-NEXT:   [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
+  ; O3-NEXT:   [[SHUF:%[0-9]+]]:_(<2 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s32>), [[DEF]], shufflemask(undef, 0)
+  ; O3-NEXT:   $d0 = COPY [[SHUF]](<2 x s32>)
+  ; O3-NEXT:   RET_ReallyLR implicit $d0
   %res = shufflevector <2 x i32> %arg, <2 x i32> undef, <2 x i32> <i32 undef, i32 0>
   ret <2 x i32> %res
 }
 
 define <2 x i32> @test_shufflevector_v2s32_v2s32_0_undef(<2 x i32> %arg) {
-; CHECK-LABEL: name: test_shufflevector_v2s32_v2s32_0_undef
-; CHECK: [[ARG:%[0-9]+]]:_(<2 x s32>) = COPY $d0
-; CHECK-DAG: [[UNDEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
-; CHECK: [[VEC:%[0-9]+]]:_(<2 x s32>) = G_SHUFFLE_VECTOR [[ARG]](<2 x s32>), [[UNDEF]], shufflemask(0, undef)
-; CHECK: $d0 = COPY [[VEC]](<2 x s32>)
+  ; CHECK-LABEL: name: test_shufflevector_v2s32_v2s32_0_undef
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $d0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[SHUF:%[0-9]+]]:_(<2 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s32>), [[DEF]], shufflemask(0, undef)
+  ; CHECK-NEXT:   $d0 = COPY [[SHUF]](<2 x s32>)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $d0
+  ;
+  ; O3-LABEL: name: test_shufflevector_v2s32_v2s32_0_undef
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $d0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
+  ; O3-NEXT:   [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
+  ; O3-NEXT:   [[SHUF:%[0-9]+]]:_(<2 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s32>), [[DEF]], shufflemask(0, undef)
+  ; O3-NEXT:   $d0 = COPY [[SHUF]](<2 x s32>)
+  ; O3-NEXT:   RET_ReallyLR implicit $d0
   %res = shufflevector <2 x i32> %arg, <2 x i32> undef, <2 x i32> <i32 0, i32 undef>
   ret <2 x i32> %res
 }
 
 define i32 @test_shufflevector_v2s32_v3s32(<2 x i32> %arg) {
-; CHECK-LABEL: name: test_shufflevector_v2s32_v3s32
-; CHECK: [[ARG:%[0-9]+]]:_(<2 x s32>) = COPY $d0
-; CHECK-DAG: [[UNDEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
-; CHECK: [[VEC:%[0-9]+]]:_(<3 x s32>) = G_SHUFFLE_VECTOR [[ARG]](<2 x s32>), [[UNDEF]], shufflemask(1, 0, 1)
-; CHECK: G_EXTRACT_VECTOR_ELT [[VEC]](<3 x s32>)
+  ; CHECK-LABEL: name: test_shufflevector_v2s32_v3s32
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $d0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; CHECK-NEXT:   [[SHUF:%[0-9]+]]:_(<3 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s32>), [[DEF]], shufflemask(1, 0, 1)
+  ; CHECK-NEXT:   [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[SHUF]](<3 x s32>), [[C]](s64)
+  ; CHECK-NEXT:   $w0 = COPY [[EVEC]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $w0
+  ;
+  ; O3-LABEL: name: test_shufflevector_v2s32_v3s32
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $d0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
+  ; O3-NEXT:   [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
+  ; O3-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; O3-NEXT:   [[SHUF:%[0-9]+]]:_(<3 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s32>), [[DEF]], shufflemask(1, 0, 1)
+  ; O3-NEXT:   [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[SHUF]](<3 x s32>), [[C]](s64)
+  ; O3-NEXT:   $w0 = COPY [[EVEC]](s32)
+  ; O3-NEXT:   RET_ReallyLR implicit $w0
   %vec = shufflevector <2 x i32> %arg, <2 x i32> undef, <3 x i32> <i32 1, i32 0, i32 1>
   %res = extractelement <3 x i32> %vec, i32 0
   ret i32 %res
 }
 
 define <4 x i32> @test_shufflevector_v2s32_v4s32(<2 x i32> %arg1, <2 x i32> %arg2) {
-; CHECK-LABEL: name: test_shufflevector_v2s32_v4s32
-; CHECK: [[ARG1:%[0-9]+]]:_(<2 x s32>) = COPY $d0
-; CHECK: [[ARG2:%[0-9]+]]:_(<2 x s32>) = COPY $d1
-; CHECK: [[VEC:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[ARG1]](<2 x s32>), [[ARG2]], shufflemask(0, 1, 2, 3)
-; CHECK: $q0 = COPY [[VEC]](<4 x s32>)
+  ; CHECK-LABEL: name: test_shufflevector_v2s32_v4s32
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $d0, $d1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $d1
+  ; CHECK-NEXT:   [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s32>), [[COPY1]], shufflemask(0, 1, 2, 3)
+  ; CHECK-NEXT:   $q0 = COPY [[SHUF]](<4 x s32>)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $q0
+  ;
+  ; O3-LABEL: name: test_shufflevector_v2s32_v4s32
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $d0, $d1
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
+  ; O3-NEXT:   [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $d1
+  ; O3-NEXT:   [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s32>), [[COPY1]], shufflemask(0, 1, 2, 3)
+  ; O3-NEXT:   $q0 = COPY [[SHUF]](<4 x s32>)
+  ; O3-NEXT:   RET_ReallyLR implicit $q0
   %res = shufflevector <2 x i32> %arg1, <2 x i32> %arg2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   ret <4 x i32> %res
 }
 
 define <2 x i32> @test_shufflevector_v4s32_v2s32(<4 x i32> %arg) {
-; CHECK-LABEL: name: test_shufflevector_v4s32_v2s32
-; CHECK: [[ARG:%[0-9]+]]:_(<4 x s32>) = COPY $q0
-; CHECK-DAG: [[UNDEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
-; CHECK: [[VEC:%[0-9]+]]:_(<2 x s32>) = G_SHUFFLE_VECTOR [[ARG]](<4 x s32>), [[UNDEF]], shufflemask(1, 3)
-; CHECK: $d0 = COPY [[VEC]](<2 x s32>)
+  ; CHECK-LABEL: name: test_shufflevector_v4s32_v2s32
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $q0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[SHUF:%[0-9]+]]:_(<2 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<4 x s32>), [[DEF]], shufflemask(1, 3)
+  ; CHECK-NEXT:   $d0 = COPY [[SHUF]](<2 x s32>)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $d0
+  ;
+  ; O3-LABEL: name: test_shufflevector_v4s32_v2s32
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $q0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+  ; O3-NEXT:   [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
+  ; O3-NEXT:   [[SHUF:%[0-9]+]]:_(<2 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<4 x s32>), [[DEF]], shufflemask(1, 3)
+  ; O3-NEXT:   $d0 = COPY [[SHUF]](<2 x s32>)
+  ; O3-NEXT:   RET_ReallyLR implicit $d0
   %res = shufflevector <4 x i32> %arg, <4 x i32> undef, <2 x i32> <i32 1, i32 3>
   ret <2 x i32> %res
 }
 
 
 define <16 x i8> @test_shufflevector_v8s8_v16s8(<8 x i8> %arg1, <8 x i8> %arg2) {
-; CHECK-LABEL: name: test_shufflevector_v8s8_v16s8
-; CHECK: [[ARG1:%[0-9]+]]:_(<8 x s8>) = COPY $d0
-; CHECK: [[ARG2:%[0-9]+]]:_(<8 x s8>) = COPY $d1
-; CHECK: [[VEC:%[0-9]+]]:_(<16 x s8>) = G_SHUFFLE_VECTOR [[ARG1]](<8 x s8>), [[ARG2]], shufflemask(0, 8, 1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15)
-; CHECK: $q0 = COPY [[VEC]](<16 x s8>)
+  ; CHECK-LABEL: name: test_shufflevector_v8s8_v16s8
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $d0, $d1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(<8 x s8>) = COPY $d0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(<8 x s8>) = COPY $d1
+  ; CHECK-NEXT:   [[SHUF:%[0-9]+]]:_(<16 x s8>) = G_SHUFFLE_VECTOR [[COPY]](<8 x s8>), [[COPY1]], shufflemask(0, 8, 1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15)
+  ; CHECK-NEXT:   $q0 = COPY [[SHUF]](<16 x s8>)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $q0
+  ;
+  ; O3-LABEL: name: test_shufflevector_v8s8_v16s8
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $d0, $d1
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(<8 x s8>) = COPY $d0
+  ; O3-NEXT:   [[COPY1:%[0-9]+]]:_(<8 x s8>) = COPY $d1
+  ; O3-NEXT:   [[SHUF:%[0-9]+]]:_(<16 x s8>) = G_SHUFFLE_VECTOR [[COPY]](<8 x s8>), [[COPY1]], shufflemask(0, 8, 1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15)
+  ; O3-NEXT:   $q0 = COPY [[SHUF]](<16 x s8>)
+  ; O3-NEXT:   RET_ReallyLR implicit $q0
   %res = shufflevector <8 x i8> %arg1, <8 x i8> %arg2, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
   ret <16 x i8> %res
 }
@@ -1810,14 +4446,44 @@ define <16 x i8> @test_shufflevector_v8s8_v16s8(<8 x i8> %arg1, <8 x i8> %arg2)
 ; CHECK: [[M:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UNDEF]](s16), [[UNDEF]](s16), [[UNDEF]](s16), [[F]](s16)
 ; CHECK: $d0 = COPY [[M]](<4 x s16>)
 define <4 x half> @test_constant_vector() {
+  ; CHECK-LABEL: name: test_constant_vector
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3C00
+  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[C]](s16)
+  ; CHECK-NEXT:   $d0 = COPY [[BUILD_VECTOR]](<4 x s16>)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $d0
+  ;
+  ; O3-LABEL: name: test_constant_vector
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+  ; O3-NEXT:   [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3C00
+  ; O3-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[C]](s16)
+  ; O3-NEXT:   $d0 = COPY [[BUILD_VECTOR]](<4 x s16>)
+  ; O3-NEXT:   RET_ReallyLR implicit $d0
   ret <4 x half> <half undef, half undef, half undef, half 0xH3C00>
 }
 
 define i32 @test_target_mem_intrinsic(ptr %addr) {
-; CHECK-LABEL: name: test_target_mem_intrinsic
-; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY $x0
-; CHECK: [[VAL:%[0-9]+]]:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.ldxr), [[ADDR]](p0) :: (volatile load (s32) from %ir.addr)
-; CHECK: G_TRUNC [[VAL]](s64)
+  ; CHECK-LABEL: name: test_target_mem_intrinsic
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $x0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; CHECK-NEXT:   [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.ldxr), [[COPY]](p0) :: (volatile load (s32) from %ir.addr)
+  ; CHECK-NEXT:   [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[INT]](s64)
+  ; CHECK-NEXT:   $w0 = COPY [[TRUNC]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $w0
+  ;
+  ; O3-LABEL: name: test_target_mem_intrinsic
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $x0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; O3-NEXT:   [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.ldxr), [[COPY]](p0) :: (volatile load (s32) from %ir.addr)
+  ; O3-NEXT:   [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[INT]](s64)
+  ; O3-NEXT:   $w0 = COPY [[TRUNC]](s32)
+  ; O3-NEXT:   RET_ReallyLR implicit $w0
   %val = call i64 @llvm.aarch64.ldxr.p0(ptr elementtype(i32) %addr)
   %trunc = trunc i64 %val to i32
   ret i32 %trunc
@@ -1828,10 +4494,19 @@ declare i64 @llvm.aarch64.ldxr.p0(ptr) nounwind
 %zerosize_type = type {}
 
 define %zerosize_type @test_empty_load_store(ptr %ptr, %zerosize_type %in) noinline optnone {
-; CHECK-LABEL: name: test_empty_load_store
-; CHECK-NOT: G_STORE
-; CHECK-NOT: G_LOAD
-; CHECK: RET_ReallyLR
+  ; CHECK-LABEL: name: test_empty_load_store
+  ; CHECK: bb.1.entry:
+  ; CHECK-NEXT:   liveins: $x0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; CHECK-NEXT:   RET_ReallyLR
+  ;
+  ; O3-LABEL: name: test_empty_load_store
+  ; O3: bb.1.entry:
+  ; O3-NEXT:   liveins: $x0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; O3-NEXT:   RET_ReallyLR
 entry:
   store %zerosize_type undef, ptr undef, align 4
   %val = load %zerosize_type, ptr %ptr, align 4
@@ -1840,24 +4515,64 @@ entry:
 
 
 define i64 @test_phi_loop(i32 %n) {
-; CHECK-LABEL: name: test_phi_loop
-; CHECK: [[ARG1:%[0-9]+]]:_(s32) = COPY $w0
-; CHECK: [[CST1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-; CHECK: [[CST2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-; CHECK: [[CST3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-; CHECK: [[CST4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-
-; CHECK: [[PN1:%[0-9]+]]:_(s32) = G_PHI [[ARG1]](s32), %bb.1, [[SUB:%[0-9]+]](s32), %bb.2
-; CHECK: [[PN2:%[0-9]+]]:_(s64) = G_PHI [[CST3]](s64), %bb.1, [[PN3:%[0-9]+]](s64), %bb.2
-; CHECK: [[PN3]]:_(s64) = G_PHI [[CST4]](s64), %bb.1, [[ADD:%[0-9]+]](s64), %bb.2
-; CHECK: [[ADD]]:_(s64) = G_ADD [[PN2]], [[PN3]]
-; CHECK: [[SUB]]:_(s32) = G_SUB [[PN1]], [[CST1]]
-; CHECK: [[CMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sle), [[PN1]](s32), [[CST2]]
-; CHECK: G_BRCOND [[CMP]](s1), %bb.3
-; CHECK: G_BR %bb.2
-
-; CHECK: $x0 = COPY [[PN2]](s64)
-; CHECK: RET_ReallyLR implicit $x0
+
+
+  ; CHECK-LABEL: name: test_phi_loop
+  ; CHECK: bb.1.entry:
+  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
+  ; CHECK-NEXT:   liveins: $w0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+  ; CHECK-NEXT:   G_BR %bb.2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2.loop:
+  ; CHECK-NEXT:   successors: %bb.3(0x40000000), %bb.2(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[PHI:%[0-9]+]]:_(s32) = G_PHI [[COPY]](s32), %bb.1, %6(s32), %bb.2
+  ; CHECK-NEXT:   [[PHI1:%[0-9]+]]:_(s64) = G_PHI [[C2]](s64), %bb.1, %3(s64), %bb.2
+  ; CHECK-NEXT:   [[PHI2:%[0-9]+]]:_(s64) = G_PHI [[C3]](s64), %bb.1, %4(s64), %bb.2
+  ; CHECK-NEXT:   [[ADD:%[0-9]+]]:_(s64) = G_ADD [[PHI1]], [[PHI2]]
+  ; CHECK-NEXT:   [[SUB:%[0-9]+]]:_(s32) = G_SUB [[PHI]], [[C]]
+  ; CHECK-NEXT:   [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sle), [[PHI]](s32), [[C1]]
+  ; CHECK-NEXT:   G_BRCOND [[ICMP]](s1), %bb.3
+  ; CHECK-NEXT:   G_BR %bb.2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.3.exit:
+  ; CHECK-NEXT:   $x0 = COPY [[PHI1]](s64)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $x0
+  ;
+  ; O3-LABEL: name: test_phi_loop
+  ; O3: bb.1.entry:
+  ; O3-NEXT:   successors: %bb.2(0x80000000)
+  ; O3-NEXT:   liveins: $w0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+  ; O3-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+  ; O3-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+  ; O3-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+  ; O3-NEXT:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; O3-NEXT:   [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+  ; O3-NEXT:   [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[C]]
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT: bb.2.loop:
+  ; O3-NEXT:   successors: %bb.3(0x04000000), %bb.2(0x7c000000)
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[PHI:%[0-9]+]]:_(s32) = G_PHI %8(s32), %bb.2, [[ADD]](s32), %bb.1
+  ; O3-NEXT:   [[PHI1:%[0-9]+]]:_(s64) = G_PHI [[C3]](s64), %bb.1, %5(s64), %bb.2
+  ; O3-NEXT:   [[PHI2:%[0-9]+]]:_(s64) = G_PHI [[C4]](s64), %bb.1, %6(s64), %bb.2
+  ; O3-NEXT:   [[ADD1:%[0-9]+]]:_(s64) = G_ADD [[PHI1]], [[PHI2]]
+  ; O3-NEXT:   [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[PHI]], [[C1]]
+  ; O3-NEXT:   [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sle), [[ADD2]](s32), [[C2]]
+  ; O3-NEXT:   G_BRCOND [[ICMP]](s1), %bb.3
+  ; O3-NEXT:   G_BR %bb.2
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT: bb.3.exit:
+  ; O3-NEXT:   $x0 = COPY [[PHI1]](s64)
+  ; O3-NEXT:   RET_ReallyLR implicit $x0
 entry:
   br label %loop
 
@@ -1879,46 +4594,112 @@ exit:
 }
 
 define void @test_phi_diamond(ptr %a.ptr, ptr %b.ptr, i1 %selector, ptr %dst) {
-; CHECK-LABEL: name: test_phi_diamond
-; CHECK: [[ARG1:%[0-9]+]]:_(p0) = COPY $x0
-; CHECK: [[ARG2:%[0-9]+]]:_(p0) = COPY $x1
-; CHECK: [[ARG3:%[0-9]+]]:_(s32) = COPY $w2
-; CHECK: [[TRUNC8:%[0-9]+]]:_(s8) = G_TRUNC [[ARG3]]
-; CHECK: [[ARG4:%[0-9]+]]:_(p0) = COPY $x3
-; CHECK: [[TRUNCASSERT:%[0-9]+]]:_(s8) = G_ASSERT_ZEXT [[TRUNC8]], 1
-; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[TRUNCASSERT]]
-; CHECK: G_BRCOND [[TRUNC]](s1), %bb.2
-; CHECK: G_BR %bb.3
-
-; CHECK: [[LD1:%[0-9]+]]:_(s8) = G_LOAD [[ARG1]](p0) :: (load (s8) from %ir.a.ptr, align 4)
-; CHECK: [[CST1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_PTR_ADD [[ARG1]], [[CST1]](s64)
-; CHECK: [[LD2:%[0-9]+]]:_(s16) = G_LOAD [[GEP1]](p0) :: (load (s16) from %ir.a.ptr + 2)
-; CHECK: [[CST2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-; CHECK: [[GEP2:%[0-9]+]]:_(p0) = G_PTR_ADD [[ARG1]], [[CST2]](s64)
-; CHECK: [[LD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p0) :: (load (s32) from %ir.a.ptr + 4)
-; CHECK: G_BR %bb.4
-
-; CHECK: [[LD4:%[0-9]+]]:_(s8) = G_LOAD [[ARG2]](p0) :: (load (s8) from %ir.b.ptr, align 4)
-; CHECK: [[CST3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-; CHECK: [[GEP3:%[0-9]+]]:_(p0) = G_PTR_ADD [[ARG2]], [[CST3]](s64)
-; CHECK: [[LD5:%[0-9]+]]:_(s16) = G_LOAD [[GEP3]](p0) :: (load (s16) from %ir.b.ptr + 2)
-; CHECK: [[CST4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-; CHECK: [[GEP4:%[0-9]+]]:_(p0) = G_PTR_ADD [[ARG2]], [[CST4]](s64)
-; CHECK: [[LD6:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p0) :: (load (s32) from %ir.b.ptr + 4)
-
-; CHECK: [[PN1:%[0-9]+]]:_(s8) = G_PHI [[LD1]](s8), %bb.2, [[LD4]](s8), %bb.3
-; CHECK: [[PN2:%[0-9]+]]:_(s16) = G_PHI [[LD2]](s16), %bb.2, [[LD5]](s16), %bb.3
-; CHECK: [[PN3:%[0-9]+]]:_(s32) = G_PHI [[LD3]](s32), %bb.2, [[LD6]](s32), %bb.3
-; CHECK: G_STORE [[PN1]](s8), [[ARG4]](p0) :: (store (s8) into %ir.dst, align 4)
-; CHECK: [[CST5:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-; CHECK: [[GEP5:%[0-9]+]]:_(p0) = G_PTR_ADD [[ARG4]], [[CST5]](s64)
-; CHECK: G_STORE [[PN2]](s16), [[GEP5]](p0) :: (store (s16) into %ir.dst + 2)
-; CHECK: [[CST6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-; CHECK: [[GEP6:%[0-9]+]]:_(p0) = G_PTR_ADD [[ARG4]], [[CST6]](s64)
-; CHECK: G_STORE [[PN3]](s32), [[GEP6]](p0) :: (store (s32) into %ir.dst + 4)
-; CHECK: RET_ReallyLR
 
+
+
+
+  ; CHECK-LABEL: name: test_phi_diamond
+  ; CHECK: bb.1.entry:
+  ; CHECK-NEXT:   successors: %bb.2(0x40000000), %bb.3(0x40000000)
+  ; CHECK-NEXT:   liveins: $w2, $x0, $x1, $x3
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $w2
+  ; CHECK-NEXT:   [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(p0) = COPY $x3
+  ; CHECK-NEXT:   [[ASSERT_ZEXT:%[0-9]+]]:_(s8) = G_ASSERT_ZEXT [[TRUNC]], 1
+  ; CHECK-NEXT:   [[TRUNC1:%[0-9]+]]:_(s1) = G_TRUNC [[ASSERT_ZEXT]](s8)
+  ; CHECK-NEXT:   G_BRCOND [[TRUNC1]](s1), %bb.2
+  ; CHECK-NEXT:   G_BR %bb.3
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2.store.a:
+  ; CHECK-NEXT:   successors: %bb.4(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p0) :: (load (s8) from %ir.a.ptr, align 4)
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from %ir.a.ptr + 2)
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+  ; CHECK-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; CHECK-NEXT:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32) from %ir.a.ptr + 4)
+  ; CHECK-NEXT:   G_BR %bb.4
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.3.store.b:
+  ; CHECK-NEXT:   successors: %bb.4(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[LOAD3:%[0-9]+]]:_(s8) = G_LOAD [[COPY1]](p0) :: (load (s8) from %ir.b.ptr, align 4)
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+  ; CHECK-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C2]](s64)
+  ; CHECK-NEXT:   [[LOAD4:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD2]](p0) :: (load (s16) from %ir.b.ptr + 2)
+  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+  ; CHECK-NEXT:   [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C3]](s64)
+  ; CHECK-NEXT:   [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s32) from %ir.b.ptr + 4)
+  ; CHECK-NEXT:   G_BR %bb.4
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.4.join:
+  ; CHECK-NEXT:   [[PHI:%[0-9]+]]:_(s8) = G_PHI [[LOAD]](s8), %bb.2, [[LOAD3]](s8), %bb.3
+  ; CHECK-NEXT:   [[PHI1:%[0-9]+]]:_(s16) = G_PHI [[LOAD1]](s16), %bb.2, [[LOAD4]](s16), %bb.3
+  ; CHECK-NEXT:   [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[LOAD2]](s32), %bb.2, [[LOAD5]](s32), %bb.3
+  ; CHECK-NEXT:   G_STORE [[PHI]](s8), [[COPY3]](p0) :: (store (s8) into %ir.dst, align 4)
+  ; CHECK-NEXT:   [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+  ; CHECK-NEXT:   [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY3]], [[C4]](s64)
+  ; CHECK-NEXT:   G_STORE [[PHI1]](s16), [[PTR_ADD4]](p0) :: (store (s16) into %ir.dst + 2)
+  ; CHECK-NEXT:   [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+  ; CHECK-NEXT:   [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY3]], [[C5]](s64)
+  ; CHECK-NEXT:   G_STORE [[PHI2]](s32), [[PTR_ADD5]](p0) :: (store (s32) into %ir.dst + 4)
+  ; CHECK-NEXT:   RET_ReallyLR
+  ;
+  ; O3-LABEL: name: test_phi_diamond
+  ; O3: bb.1.entry:
+  ; O3-NEXT:   successors: %bb.2(0x40000000), %bb.3(0x40000000)
+  ; O3-NEXT:   liveins: $w2, $x0, $x1, $x3
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; O3-NEXT:   [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
+  ; O3-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $w2
+  ; O3-NEXT:   [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY2]](s32)
+  ; O3-NEXT:   [[COPY3:%[0-9]+]]:_(p0) = COPY $x3
+  ; O3-NEXT:   [[ASSERT_ZEXT:%[0-9]+]]:_(s8) = G_ASSERT_ZEXT [[TRUNC]], 1
+  ; O3-NEXT:   [[TRUNC1:%[0-9]+]]:_(s1) = G_TRUNC [[ASSERT_ZEXT]](s8)
+  ; O3-NEXT:   G_BRCOND [[TRUNC1]](s1), %bb.2
+  ; O3-NEXT:   G_BR %bb.3
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT: bb.2.store.a:
+  ; O3-NEXT:   successors: %bb.4(0x80000000)
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p0) :: (load (s8) from %ir.a.ptr, align 4)
+  ; O3-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+  ; O3-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; O3-NEXT:   [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from %ir.a.ptr + 2)
+  ; O3-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+  ; O3-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+  ; O3-NEXT:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32) from %ir.a.ptr + 4)
+  ; O3-NEXT:   G_BR %bb.4
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT: bb.3.store.b:
+  ; O3-NEXT:   successors: %bb.4(0x80000000)
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[LOAD3:%[0-9]+]]:_(s8) = G_LOAD [[COPY1]](p0) :: (load (s8) from %ir.b.ptr, align 4)
+  ; O3-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+  ; O3-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C2]](s64)
+  ; O3-NEXT:   [[LOAD4:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD2]](p0) :: (load (s16) from %ir.b.ptr + 2)
+  ; O3-NEXT:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+  ; O3-NEXT:   [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C3]](s64)
+  ; O3-NEXT:   [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p0) :: (load (s32) from %ir.b.ptr + 4)
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT: bb.4.join:
+  ; O3-NEXT:   [[PHI:%[0-9]+]]:_(s8) = G_PHI [[LOAD]](s8), %bb.2, [[LOAD3]](s8), %bb.3
+  ; O3-NEXT:   [[PHI1:%[0-9]+]]:_(s16) = G_PHI [[LOAD1]](s16), %bb.2, [[LOAD4]](s16), %bb.3
+  ; O3-NEXT:   [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[LOAD2]](s32), %bb.2, [[LOAD5]](s32), %bb.3
+  ; O3-NEXT:   G_STORE [[PHI]](s8), [[COPY3]](p0) :: (store (s8) into %ir.dst, align 4)
+  ; O3-NEXT:   [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+  ; O3-NEXT:   [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY3]], [[C4]](s64)
+  ; O3-NEXT:   G_STORE [[PHI1]](s16), [[PTR_ADD4]](p0) :: (store (s16) into %ir.dst + 2)
+  ; O3-NEXT:   [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+  ; O3-NEXT:   [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY3]], [[C5]](s64)
+  ; O3-NEXT:   G_STORE [[PHI2]](s32), [[PTR_ADD5]](p0) :: (store (s32) into %ir.dst + 4)
+  ; O3-NEXT:   RET_ReallyLR
 entry:
   br i1 %selector, label %store.a, label %store.b
 
@@ -1941,63 +4722,134 @@ join:
 %agg.nested = type {i32, i32, %agg.inner, i32}
 
 define void @test_nested_aggregate_const(ptr %ptr) {
-; CHECK-LABEL: name: test_nested_aggregate_const
-; CHECK: [[BASE:%[0-9]+]]:_(p0) = COPY $x0
-; CHECK: [[CST1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-; CHECK: [[CST2:%[0-9]+]]:_(s16) = G_CONSTANT i16 2
-; CHECK: [[CST3:%[0-9]+]]:_(s8) = G_CONSTANT i8 3
-; CHECK: [[CST4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
-; CHECK: [[CST5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-; CHECK: [[CST6:%[0-9]+]]:_(s32) = G_CONSTANT i32 13
-; CHECK: G_STORE [[CST1]](s32), [[BASE]](p0) :: (store (s32) into %ir.ptr, align 8)
-; CHECK: [[CST7:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_PTR_ADD [[BASE]], [[CST7]](s64)
-; CHECK: G_STORE [[CST1]](s32), [[GEP1]](p0) :: (store (s32) into %ir.ptr + 4)
-; CHECK: [[CST8:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-; CHECK: [[GEP2:%[0-9]+]]:_(p0) = G_PTR_ADD [[BASE]], [[CST8]](s64)
-; CHECK: G_STORE [[CST2]](s16), [[GEP2]](p0) :: (store (s16) into %ir.ptr + 8, align 8)
-; CHECK: [[CST9:%[0-9]+]]:_(s64) = G_CONSTANT i64 10
-; CHECK: [[GEP3:%[0-9]+]]:_(p0) = G_PTR_ADD [[BASE]], [[CST9]](s64)
-; CHECK: G_STORE [[CST3]](s8), [[GEP3]](p0) :: (store (s8) into %ir.ptr + 10, align 2)
-; CHECK: [[CST10:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-; CHECK: [[GEP4:%[0-9]+]]:_(p0) = G_PTR_ADD [[BASE]], [[CST10]](s64)
-; CHECK: G_STORE [[CST4]](s64), [[GEP4]](p0) :: (store (s64) into %ir.ptr + 16)
-; CHECK: [[CST11:%[0-9]+]]:_(s64) = G_CONSTANT i64 24
-; CHECK: [[GEP5:%[0-9]+]]:_(p0) = G_PTR_ADD [[BASE]], [[CST11]](s64)
-; CHECK: G_STORE [[CST5]](s64), [[GEP5]](p0) :: (store (s64) into %ir.ptr + 24)
-; CHECK: [[CST12:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
-; CHECK: [[GEP6:%[0-9]+]]:_(p0) = G_PTR_ADD [[BASE]], [[CST12]](s64)
-; CHECK: G_STORE [[CST6]](s32), [[GEP6]](p0) :: (store (s32) into %ir.ptr + 32, align 8)
+  ; CHECK-LABEL: name: test_nested_aggregate_const
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $x0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 2
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s8) = G_CONSTANT i8 3
+  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+  ; CHECK-NEXT:   [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+  ; CHECK-NEXT:   [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 13
+  ; CHECK-NEXT:   G_STORE [[C]](s32), [[COPY]](p0) :: (store (s32) into %ir.ptr, align 8)
+  ; CHECK-NEXT:   [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64)
+  ; CHECK-NEXT:   G_STORE [[C]](s32), [[PTR_ADD]](p0) :: (store (s32) into %ir.ptr + 4)
+  ; CHECK-NEXT:   [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+  ; CHECK-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64)
+  ; CHECK-NEXT:   G_STORE [[C1]](s16), [[PTR_ADD1]](p0) :: (store (s16) into %ir.ptr + 8, align 8)
+  ; CHECK-NEXT:   [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 10
+  ; CHECK-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C8]](s64)
+  ; CHECK-NEXT:   G_STORE [[C2]](s8), [[PTR_ADD2]](p0) :: (store (s8) into %ir.ptr + 10, align 2)
+  ; CHECK-NEXT:   [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+  ; CHECK-NEXT:   [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C9]](s64)
+  ; CHECK-NEXT:   G_STORE [[C3]](s64), [[PTR_ADD3]](p0) :: (store (s64) into %ir.ptr + 16)
+  ; CHECK-NEXT:   [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 24
+  ; CHECK-NEXT:   [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C10]](s64)
+  ; CHECK-NEXT:   G_STORE [[C4]](s64), [[PTR_ADD4]](p0) :: (store (s64) into %ir.ptr + 24)
+  ; CHECK-NEXT:   [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
+  ; CHECK-NEXT:   [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C11]](s64)
+  ; CHECK-NEXT:   G_STORE [[C5]](s32), [[PTR_ADD5]](p0) :: (store (s32) into %ir.ptr + 32, align 8)
+  ; CHECK-NEXT:   RET_ReallyLR
+  ;
+  ; O3-LABEL: name: test_nested_aggregate_const
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $x0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; O3-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+  ; O3-NEXT:   [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 2
+  ; O3-NEXT:   [[C2:%[0-9]+]]:_(s8) = G_CONSTANT i8 3
+  ; O3-NEXT:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+  ; O3-NEXT:   [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+  ; O3-NEXT:   [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 13
+  ; O3-NEXT:   G_STORE [[C]](s32), [[COPY]](p0) :: (store (s32) into %ir.ptr, align 8)
+  ; O3-NEXT:   [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+  ; O3-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64)
+  ; O3-NEXT:   G_STORE [[C]](s32), [[PTR_ADD]](p0) :: (store (s32) into %ir.ptr + 4)
+  ; O3-NEXT:   [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+  ; O3-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64)
+  ; O3-NEXT:   G_STORE [[C1]](s16), [[PTR_ADD1]](p0) :: (store (s16) into %ir.ptr + 8, align 8)
+  ; O3-NEXT:   [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 10
+  ; O3-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C8]](s64)
+  ; O3-NEXT:   G_STORE [[C2]](s8), [[PTR_ADD2]](p0) :: (store (s8) into %ir.ptr + 10, align 2)
+  ; O3-NEXT:   [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+  ; O3-NEXT:   [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C9]](s64)
+  ; O3-NEXT:   G_STORE [[C3]](s64), [[PTR_ADD3]](p0) :: (store (s64) into %ir.ptr + 16)
+  ; O3-NEXT:   [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 24
+  ; O3-NEXT:   [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C10]](s64)
+  ; O3-NEXT:   G_STORE [[C4]](s64), [[PTR_ADD4]](p0) :: (store (s64) into %ir.ptr + 24)
+  ; O3-NEXT:   [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
+  ; O3-NEXT:   [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C11]](s64)
+  ; O3-NEXT:   G_STORE [[C5]](s32), [[PTR_ADD5]](p0) :: (store (s32) into %ir.ptr + 32, align 8)
+  ; O3-NEXT:   RET_ReallyLR
   store %agg.nested { i32 1, i32 1, %agg.inner { i16 2, i8 3, %agg.inner.inner {i64 5, i64 8} }, i32 13}, ptr %ptr
   ret void
 }
 
 define i1 @return_i1_zext() {
-; AAPCS ABI says that booleans can only be 1 or 0, so we need to zero-extend.
-; CHECK-LABEL: name: return_i1_zext
-; CHECK: [[CST:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
-; CHECK: [[ZEXT:%[0-9]+]]:_(s8) = G_ZEXT [[CST]](s1)
-; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ZEXT]](s8)
-; CHECK: $w0 = COPY [[ANYEXT]](s32)
-; CHECK: RET_ReallyLR implicit $w0
+  ; CHECK-LABEL: name: return_i1_zext
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
+  ; CHECK-NEXT:   [[ZEXT:%[0-9]+]]:_(s8) = G_ZEXT [[C]](s1)
+  ; CHECK-NEXT:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ZEXT]](s8)
+  ; CHECK-NEXT:   $w0 = COPY [[ANYEXT]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $w0
+  ;
+  ; O3-LABEL: name: return_i1_zext
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
+  ; O3-NEXT:   [[ZEXT:%[0-9]+]]:_(s8) = G_ZEXT [[C]](s1)
+  ; O3-NEXT:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ZEXT]](s8)
+  ; O3-NEXT:   $w0 = COPY [[ANYEXT]](s32)
+  ; O3-NEXT:   RET_ReallyLR implicit $w0
   ret i1 true
 }
 
 ; Try one cmpxchg
 define i32 @test_atomic_cmpxchg_1(ptr %addr) {
-; CHECK-LABEL: name: test_atomic_cmpxchg_1
-; CHECK:       bb.1.entry:
-; CHECK-NEXT:  successors: %bb.{{[^)]+}}
-; CHECK-NEXT:  liveins: $x0
-; CHECK:         [[ADDR:%[0-9]+]]:_(p0) = COPY $x0
-; CHECK-NEXT:    [[OLDVAL:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-; CHECK-NEXT:    [[NEWVAL:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-; CHECK:       bb.2.repeat:
-; CHECK-NEXT:    successors: %bb.3({{[^)]+}}), %bb.2({{[^)]+}})
-; CHECK:         [[OLDVALRES:%[0-9]+]]:_(s32), [[SUCCESS:%[0-9]+]]:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS [[ADDR]](p0), [[OLDVAL]], [[NEWVAL]] :: (load store monotonic monotonic (s32) on %ir.addr)
-; CHECK-NEXT:    G_BRCOND [[SUCCESS]](s1), %bb.3
-; CHECK-NEXT:    G_BR %bb.2
-; CHECK:       bb.3.done:
+  ; CHECK-LABEL: name: test_atomic_cmpxchg_1
+  ; CHECK: bb.1.entry:
+  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
+  ; CHECK-NEXT:   liveins: $x0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+  ; CHECK-NEXT:   G_BR %bb.2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2.repeat:
+  ; CHECK-NEXT:   successors: %bb.3(0x40000000), %bb.2(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[ATOMIC_CMPXCHG_WITH_SUCCESS:%[0-9]+]]:_(s32), [[ATOMIC_CMPXCHG_WITH_SUCCESS1:%[0-9]+]]:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS [[COPY]](p0), [[C]], [[C1]] :: (load store monotonic monotonic (s32) on %ir.addr)
+  ; CHECK-NEXT:   G_BRCOND [[ATOMIC_CMPXCHG_WITH_SUCCESS1]](s1), %bb.3
+  ; CHECK-NEXT:   G_BR %bb.2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.3.done:
+  ; CHECK-NEXT:   $w0 = COPY [[ATOMIC_CMPXCHG_WITH_SUCCESS]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $w0
+  ;
+  ; O3-LABEL: name: test_atomic_cmpxchg_1
+  ; O3: bb.1.entry:
+  ; O3-NEXT:   successors: %bb.2(0x80000000)
+  ; O3-NEXT:   liveins: $x0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; O3-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+  ; O3-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT: bb.2.repeat:
+  ; O3-NEXT:   successors: %bb.3(0x04000000), %bb.2(0x7c000000)
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[ATOMIC_CMPXCHG_WITH_SUCCESS:%[0-9]+]]:_(s32), [[ATOMIC_CMPXCHG_WITH_SUCCESS1:%[0-9]+]]:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS [[COPY]](p0), [[C]], [[C1]] :: (load store monotonic monotonic (s32) on %ir.addr)
+  ; O3-NEXT:   G_BRCOND [[ATOMIC_CMPXCHG_WITH_SUCCESS1]](s1), %bb.3
+  ; O3-NEXT:   G_BR %bb.2
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT: bb.3.done:
+  ; O3-NEXT:   $w0 = COPY [[ATOMIC_CMPXCHG_WITH_SUCCESS]](s32)
+  ; O3-NEXT:   RET_ReallyLR implicit $w0
 entry:
   br label %repeat
 repeat:
@@ -2011,19 +4863,46 @@ done:
 
 ; Try one cmpxchg
 define i32 @test_weak_atomic_cmpxchg_1(ptr %addr) {
-; CHECK-LABEL: name: test_weak_atomic_cmpxchg_1
-; CHECK:       bb.1.entry:
-; CHECK-NEXT:  successors: %bb.{{[^)]+}}
-; CHECK-NEXT:  liveins: $x0
-; CHECK:         [[ADDR:%[0-9]+]]:_(p0) = COPY $x0
-; CHECK-NEXT:    [[OLDVAL:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-; CHECK-NEXT:    [[NEWVAL:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-; CHECK:       bb.2.repeat:
-; CHECK-NEXT:    successors: %bb.3({{[^)]+}}), %bb.2({{[^)]+}})
-; CHECK:         [[OLDVALRES:%[0-9]+]]:_(s32), [[SUCCESS:%[0-9]+]]:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS [[ADDR]](p0), [[OLDVAL]], [[NEWVAL]] :: (load store monotonic monotonic (s32) on %ir.addr)
-; CHECK-NEXT:    G_BRCOND [[SUCCESS]](s1), %bb.3
-; CHECK-NEXT:    G_BR %bb.2
-; CHECK:       bb.3.done:
+  ; CHECK-LABEL: name: test_weak_atomic_cmpxchg_1
+  ; CHECK: bb.1.entry:
+  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
+  ; CHECK-NEXT:   liveins: $x0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+  ; CHECK-NEXT:   G_BR %bb.2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2.repeat:
+  ; CHECK-NEXT:   successors: %bb.3(0x40000000), %bb.2(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[ATOMIC_CMPXCHG_WITH_SUCCESS:%[0-9]+]]:_(s32), [[ATOMIC_CMPXCHG_WITH_SUCCESS1:%[0-9]+]]:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS [[COPY]](p0), [[C]], [[C1]] :: (load store monotonic monotonic (s32) on %ir.addr)
+  ; CHECK-NEXT:   G_BRCOND [[ATOMIC_CMPXCHG_WITH_SUCCESS1]](s1), %bb.3
+  ; CHECK-NEXT:   G_BR %bb.2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.3.done:
+  ; CHECK-NEXT:   $w0 = COPY [[ATOMIC_CMPXCHG_WITH_SUCCESS]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $w0
+  ;
+  ; O3-LABEL: name: test_weak_atomic_cmpxchg_1
+  ; O3: bb.1.entry:
+  ; O3-NEXT:   successors: %bb.2(0x80000000)
+  ; O3-NEXT:   liveins: $x0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; O3-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+  ; O3-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT: bb.2.repeat:
+  ; O3-NEXT:   successors: %bb.3(0x04000000), %bb.2(0x7c000000)
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[ATOMIC_CMPXCHG_WITH_SUCCESS:%[0-9]+]]:_(s32), [[ATOMIC_CMPXCHG_WITH_SUCCESS1:%[0-9]+]]:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS [[COPY]](p0), [[C]], [[C1]] :: (load store monotonic monotonic (s32) on %ir.addr)
+  ; O3-NEXT:   G_BRCOND [[ATOMIC_CMPXCHG_WITH_SUCCESS1]](s1), %bb.3
+  ; O3-NEXT:   G_BR %bb.2
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT: bb.3.done:
+  ; O3-NEXT:   $w0 = COPY [[ATOMIC_CMPXCHG_WITH_SUCCESS]](s32)
+  ; O3-NEXT:   RET_ReallyLR implicit $w0
 entry:
   br label %repeat
 repeat:
@@ -2037,19 +4916,48 @@ done:
 
 ; Try one cmpxchg with a small type and high atomic ordering.
 define i16 @test_atomic_cmpxchg_2(ptr %addr) {
-; CHECK-LABEL: name: test_atomic_cmpxchg_2
-; CHECK:       bb.1.entry:
-; CHECK-NEXT:  successors: %bb.2({{[^)]+}})
-; CHECK-NEXT:  liveins: $x0
-; CHECK:         [[ADDR:%[0-9]+]]:_(p0) = COPY $x0
-; CHECK-NEXT:    [[OLDVAL:%[0-9]+]]:_(s16) = G_CONSTANT i16 0
-; CHECK-NEXT:    [[NEWVAL:%[0-9]+]]:_(s16) = G_CONSTANT i16 1
-; CHECK:       bb.2.repeat:
-; CHECK-NEXT:    successors: %bb.3({{[^)]+}}), %bb.2({{[^)]+}})
-; CHECK:         [[OLDVALRES:%[0-9]+]]:_(s16), [[SUCCESS:%[0-9]+]]:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS [[ADDR]](p0), [[OLDVAL]], [[NEWVAL]] :: (load store seq_cst seq_cst (s16) on %ir.addr)
-; CHECK-NEXT:    G_BRCOND [[SUCCESS]](s1), %bb.3
-; CHECK-NEXT:    G_BR %bb.2
-; CHECK:       bb.3.done:
+  ; CHECK-LABEL: name: test_atomic_cmpxchg_2
+  ; CHECK: bb.1.entry:
+  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
+  ; CHECK-NEXT:   liveins: $x0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 0
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 1
+  ; CHECK-NEXT:   G_BR %bb.2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2.repeat:
+  ; CHECK-NEXT:   successors: %bb.3(0x40000000), %bb.2(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[ATOMIC_CMPXCHG_WITH_SUCCESS:%[0-9]+]]:_(s16), [[ATOMIC_CMPXCHG_WITH_SUCCESS1:%[0-9]+]]:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS [[COPY]](p0), [[C]], [[C1]] :: (load store seq_cst seq_cst (s16) on %ir.addr)
+  ; CHECK-NEXT:   G_BRCOND [[ATOMIC_CMPXCHG_WITH_SUCCESS1]](s1), %bb.3
+  ; CHECK-NEXT:   G_BR %bb.2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.3.done:
+  ; CHECK-NEXT:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ATOMIC_CMPXCHG_WITH_SUCCESS]](s16)
+  ; CHECK-NEXT:   $w0 = COPY [[ANYEXT]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $w0
+  ;
+  ; O3-LABEL: name: test_atomic_cmpxchg_2
+  ; O3: bb.1.entry:
+  ; O3-NEXT:   successors: %bb.2(0x80000000)
+  ; O3-NEXT:   liveins: $x0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; O3-NEXT:   [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 0
+  ; O3-NEXT:   [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 1
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT: bb.2.repeat:
+  ; O3-NEXT:   successors: %bb.3(0x04000000), %bb.2(0x7c000000)
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[ATOMIC_CMPXCHG_WITH_SUCCESS:%[0-9]+]]:_(s16), [[ATOMIC_CMPXCHG_WITH_SUCCESS1:%[0-9]+]]:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS [[COPY]](p0), [[C]], [[C1]] :: (load store seq_cst seq_cst (s16) on %ir.addr)
+  ; O3-NEXT:   G_BRCOND [[ATOMIC_CMPXCHG_WITH_SUCCESS1]](s1), %bb.3
+  ; O3-NEXT:   G_BR %bb.2
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT: bb.3.done:
+  ; O3-NEXT:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ATOMIC_CMPXCHG_WITH_SUCCESS]](s16)
+  ; O3-NEXT:   $w0 = COPY [[ANYEXT]](s32)
+  ; O3-NEXT:   RET_ReallyLR implicit $w0
 entry:
   br label %repeat
 repeat:
@@ -2063,19 +4971,46 @@ done:
 
 ; Try one cmpxchg where the success order and failure order differ.
 define i64 @test_atomic_cmpxchg_3(ptr %addr) {
-; CHECK-LABEL: name: test_atomic_cmpxchg_3
-; CHECK:       bb.1.entry:
-; CHECK-NEXT:  successors: %bb.2({{[^)]+}})
-; CHECK-NEXT:  liveins: $x0
-; CHECK:         [[ADDR:%[0-9]+]]:_(p0) = COPY $x0
-; CHECK-NEXT:    [[OLDVAL:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-; CHECK-NEXT:    [[NEWVAL:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-; CHECK:       bb.2.repeat:
-; CHECK-NEXT:    successors: %bb.3({{[^)]+}}), %bb.2({{[^)]+}})
-; CHECK:         [[OLDVALRES:%[0-9]+]]:_(s64), [[SUCCESS:%[0-9]+]]:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS [[ADDR]](p0), [[OLDVAL]], [[NEWVAL]] :: (load store seq_cst acquire (s64) on %ir.addr)
-; CHECK-NEXT:    G_BRCOND [[SUCCESS]](s1), %bb.3
-; CHECK-NEXT:    G_BR %bb.2
-; CHECK:       bb.3.done:
+  ; CHECK-LABEL: name: test_atomic_cmpxchg_3
+  ; CHECK: bb.1.entry:
+  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
+  ; CHECK-NEXT:   liveins: $x0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+  ; CHECK-NEXT:   G_BR %bb.2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2.repeat:
+  ; CHECK-NEXT:   successors: %bb.3(0x40000000), %bb.2(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[ATOMIC_CMPXCHG_WITH_SUCCESS:%[0-9]+]]:_(s64), [[ATOMIC_CMPXCHG_WITH_SUCCESS1:%[0-9]+]]:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS [[COPY]](p0), [[C]], [[C1]] :: (load store seq_cst acquire (s64) on %ir.addr)
+  ; CHECK-NEXT:   G_BRCOND [[ATOMIC_CMPXCHG_WITH_SUCCESS1]](s1), %bb.3
+  ; CHECK-NEXT:   G_BR %bb.2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.3.done:
+  ; CHECK-NEXT:   $x0 = COPY [[ATOMIC_CMPXCHG_WITH_SUCCESS]](s64)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $x0
+  ;
+  ; O3-LABEL: name: test_atomic_cmpxchg_3
+  ; O3: bb.1.entry:
+  ; O3-NEXT:   successors: %bb.2(0x80000000)
+  ; O3-NEXT:   liveins: $x0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; O3-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; O3-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT: bb.2.repeat:
+  ; O3-NEXT:   successors: %bb.3(0x04000000), %bb.2(0x7c000000)
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[ATOMIC_CMPXCHG_WITH_SUCCESS:%[0-9]+]]:_(s64), [[ATOMIC_CMPXCHG_WITH_SUCCESS1:%[0-9]+]]:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS [[COPY]](p0), [[C]], [[C1]] :: (load store seq_cst acquire (s64) on %ir.addr)
+  ; O3-NEXT:   G_BRCOND [[ATOMIC_CMPXCHG_WITH_SUCCESS1]](s1), %bb.3
+  ; O3-NEXT:   G_BR %bb.2
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT: bb.3.done:
+  ; O3-NEXT:   $x0 = COPY [[ATOMIC_CMPXCHG_WITH_SUCCESS]](s64)
+  ; O3-NEXT:   RET_ReallyLR implicit $x0
 entry:
   br label %repeat
 repeat:
@@ -2089,48 +5024,100 @@ done:
 
 ; Try a monotonic atomicrmw xchg
 define i32 @test_atomicrmw_xchg(ptr %addr) {
-; CHECK-LABEL: name: test_atomicrmw_xchg
-; CHECK:       bb.1 (%ir-block.{{[0-9]+}}):
-; CHECK-NEXT:  liveins: $x0
-; CHECK:         [[ADDR:%[0-9]+]]:_(p0) = COPY $x0
-; CHECK-NEXT:    [[VAL:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-; CHECK-NEXT:    [[OLDVALRES:%[0-9]+]]:_(s32) = G_ATOMICRMW_XCHG [[ADDR]](p0), [[VAL]] :: (load store monotonic (s32) on %ir.addr)
+  ; CHECK-LABEL: name: test_atomicrmw_xchg
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $x0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+  ; CHECK-NEXT:   [[ATOMICRMW_XCHG:%[0-9]+]]:_(s32) = G_ATOMICRMW_XCHG [[COPY]](p0), [[C]] :: (load store monotonic (s32) on %ir.addr)
+  ; CHECK-NEXT:   $w0 = COPY [[ATOMICRMW_XCHG]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $w0
+  ;
+  ; O3-LABEL: name: test_atomicrmw_xchg
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $x0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; O3-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+  ; O3-NEXT:   [[ATOMICRMW_XCHG:%[0-9]+]]:_(s32) = G_ATOMICRMW_XCHG [[COPY]](p0), [[C]] :: (load store monotonic (s32) on %ir.addr)
+  ; O3-NEXT:   $w0 = COPY [[ATOMICRMW_XCHG]](s32)
+  ; O3-NEXT:   RET_ReallyLR implicit $w0
   %oldval = atomicrmw xchg ptr %addr, i32 1 monotonic
   ret i32 %oldval
 }
 
 ; Try an acquire atomicrmw add
 define i32 @test_atomicrmw_add(ptr %addr) {
-; CHECK-LABEL: name: test_atomicrmw_add
-; CHECK:       bb.1 (%ir-block.{{[0-9]+}}):
-; CHECK-NEXT:  liveins: $x0
-; CHECK:         [[ADDR:%[0-9]+]]:_(p0) = COPY $x0
-; CHECK-NEXT:    [[VAL:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-; CHECK-NEXT:    [[OLDVALRES:%[0-9]+]]:_(s32) = G_ATOMICRMW_ADD [[ADDR]](p0), [[VAL]] :: (load store acquire (s32) on %ir.addr)
+  ; CHECK-LABEL: name: test_atomicrmw_add
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $x0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+  ; CHECK-NEXT:   [[ATOMICRMW_ADD:%[0-9]+]]:_(s32) = G_ATOMICRMW_ADD [[COPY]](p0), [[C]] :: (load store acquire (s32) on %ir.addr)
+  ; CHECK-NEXT:   $w0 = COPY [[ATOMICRMW_ADD]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $w0
+  ;
+  ; O3-LABEL: name: test_atomicrmw_add
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $x0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; O3-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+  ; O3-NEXT:   [[ATOMICRMW_ADD:%[0-9]+]]:_(s32) = G_ATOMICRMW_ADD [[COPY]](p0), [[C]] :: (load store acquire (s32) on %ir.addr)
+  ; O3-NEXT:   $w0 = COPY [[ATOMICRMW_ADD]](s32)
+  ; O3-NEXT:   RET_ReallyLR implicit $w0
   %oldval = atomicrmw add ptr %addr, i32 1 acquire
   ret i32 %oldval
 }
 
 ; Try a release atomicrmw sub
 define i32 @test_atomicrmw_sub(ptr %addr) {
-; CHECK-LABEL: name: test_atomicrmw_sub
-; CHECK:       bb.1 (%ir-block.{{[0-9]+}}):
-; CHECK-NEXT:  liveins: $x0
-; CHECK:         [[ADDR:%[0-9]+]]:_(p0) = COPY $x0
-; CHECK-NEXT:    [[VAL:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-; CHECK-NEXT:    [[OLDVALRES:%[0-9]+]]:_(s32) = G_ATOMICRMW_SUB [[ADDR]](p0), [[VAL]] :: (load store release (s32) on %ir.addr)
+  ; CHECK-LABEL: name: test_atomicrmw_sub
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $x0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+  ; CHECK-NEXT:   [[ATOMICRMW_SUB:%[0-9]+]]:_(s32) = G_ATOMICRMW_SUB [[COPY]](p0), [[C]] :: (load store release (s32) on %ir.addr)
+  ; CHECK-NEXT:   $w0 = COPY [[ATOMICRMW_SUB]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $w0
+  ;
+  ; O3-LABEL: name: test_atomicrmw_sub
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $x0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; O3-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+  ; O3-NEXT:   [[ATOMICRMW_SUB:%[0-9]+]]:_(s32) = G_ATOMICRMW_SUB [[COPY]](p0), [[C]] :: (load store release (s32) on %ir.addr)
+  ; O3-NEXT:   $w0 = COPY [[ATOMICRMW_SUB]](s32)
+  ; O3-NEXT:   RET_ReallyLR implicit $w0
   %oldval = atomicrmw sub ptr %addr, i32 1 release
   ret i32 %oldval
 }
 
 ; Try an acq_rel atomicrmw and
 define i32 @test_atomicrmw_and(ptr %addr) {
-; CHECK-LABEL: name: test_atomicrmw_and
-; CHECK:       bb.1 (%ir-block.{{[0-9]+}}):
-; CHECK-NEXT:  liveins: $x0
-; CHECK:         [[ADDR:%[0-9]+]]:_(p0) = COPY $x0
-; CHECK-NEXT:    [[VAL:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-; CHECK-NEXT:    [[OLDVALRES:%[0-9]+]]:_(s32) = G_ATOMICRMW_AND [[ADDR]](p0), [[VAL]] :: (load store acq_rel (s32) on %ir.addr)
+  ; CHECK-LABEL: name: test_atomicrmw_and
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $x0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+  ; CHECK-NEXT:   [[ATOMICRMW_AND:%[0-9]+]]:_(s32) = G_ATOMICRMW_AND [[COPY]](p0), [[C]] :: (load store acq_rel (s32) on %ir.addr)
+  ; CHECK-NEXT:   $w0 = COPY [[ATOMICRMW_AND]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $w0
+  ;
+  ; O3-LABEL: name: test_atomicrmw_and
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $x0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; O3-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+  ; O3-NEXT:   [[ATOMICRMW_AND:%[0-9]+]]:_(s32) = G_ATOMICRMW_AND [[COPY]](p0), [[C]] :: (load store acq_rel (s32) on %ir.addr)
+  ; O3-NEXT:   $w0 = COPY [[ATOMICRMW_AND]](s32)
+  ; O3-NEXT:   RET_ReallyLR implicit $w0
   %oldval = atomicrmw and ptr %addr, i32 1 acq_rel
   ret i32 %oldval
 }
@@ -2138,95 +5125,204 @@ define i32 @test_atomicrmw_and(ptr %addr) {
 ; Try an seq_cst atomicrmw nand. NAND isn't supported by LSE, so it
 ; expands to G_ATOMIC_CMPXCHG_WITH_SUCCESS.
 define i32 @test_atomicrmw_nand(ptr %addr) {
-; CHECK-LABEL: name: test_atomicrmw_nand
-; CHECK:       bb.1 (%ir-block.{{[0-9]+}}):
-; CHECK-NEXT:  successors: %bb.2(0x80000000)
-; CHECK-NEXT:  liveins: $x0
-; CHECK:         [[ADDR:%[0-9]+]]:_(p0) = COPY $x0
-; CHECK-NEXT:    [[VAL:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-; CHECK-NEXT:    [[NEG1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
-; CHECK-NEXT:    [[OLDVALSTART:%[0-9]+]]:_(s32) = G_LOAD [[ADDR]](p0) :: (load (s32) from %ir.addr)
-; CHECK:       bb.2.atomicrmw.start:
-; CHECK-NEXT:    successors: %bb.3({{[^)]+}}), %bb.2({{[^)]+}})
-; CHECK:         [[OLDVAL:%[0-9]+]]:_(s32) = G_PHI [[OLDVALSTART]](s32), %bb.1, [[OLDVALRES:%[0-9]+]](s32), %bb.2
-; CHECK-NEXT:    [[AND:%[0-9]+]]:_(s32) = G_AND [[OLDVAL]], [[VAL]]
-; CHECK-NEXT:    [[NEWVAL:%[0-9]+]]:_(s32) = G_XOR [[AND]], [[NEG1]]
-; CHECK:         [[OLDVALRES]]:_(s32), [[SUCCESS:%[0-9]+]]:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS [[ADDR]](p0), [[OLDVAL]], [[NEWVAL]] :: (load store seq_cst seq_cst (s32) on %ir.addr)
-; CHECK-NEXT:    G_BRCOND [[SUCCESS]](s1), %bb.3
-; CHECK-NEXT:    G_BR %bb.2
-; CHECK:       bb.3.atomicrmw.end:
+  ; CHECK-LABEL: name: test_atomicrmw_nand
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
+  ; CHECK-NEXT:   liveins: $x0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32) from %ir.addr)
+  ; CHECK-NEXT:   G_BR %bb.2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2.atomicrmw.start:
+  ; CHECK-NEXT:   successors: %bb.3(0x40000000), %bb.2(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[PHI:%[0-9]+]]:_(s32) = G_PHI [[LOAD]](s32), %bb.1, %7(s32), %bb.2
+  ; CHECK-NEXT:   [[AND:%[0-9]+]]:_(s32) = G_AND [[PHI]], [[C]]
+  ; CHECK-NEXT:   [[XOR:%[0-9]+]]:_(s32) = G_XOR [[AND]], [[C1]]
+  ; CHECK-NEXT:   [[ATOMIC_CMPXCHG_WITH_SUCCESS:%[0-9]+]]:_(s32), [[ATOMIC_CMPXCHG_WITH_SUCCESS1:%[0-9]+]]:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS [[COPY]](p0), [[PHI]], [[XOR]] :: (load store seq_cst seq_cst (s32) on %ir.addr)
+  ; CHECK-NEXT:   G_BRCOND [[ATOMIC_CMPXCHG_WITH_SUCCESS1]](s1), %bb.3
+  ; CHECK-NEXT:   G_BR %bb.2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.3.atomicrmw.end:
+  ; CHECK-NEXT:   $w0 = COPY [[ATOMIC_CMPXCHG_WITH_SUCCESS]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $w0
+  ;
+  ; O3-LABEL: name: test_atomicrmw_nand
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   successors: %bb.2(0x80000000)
+  ; O3-NEXT:   liveins: $x0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; O3-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+  ; O3-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+  ; O3-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32) from %ir.addr)
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT: bb.2.atomicrmw.start:
+  ; O3-NEXT:   successors: %bb.3(0x04000000), %bb.2(0x7c000000)
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[PHI:%[0-9]+]]:_(s32) = G_PHI [[LOAD]](s32), %bb.1, %7(s32), %bb.2
+  ; O3-NEXT:   [[AND:%[0-9]+]]:_(s32) = G_AND [[PHI]], [[C]]
+  ; O3-NEXT:   [[XOR:%[0-9]+]]:_(s32) = G_XOR [[AND]], [[C1]]
+  ; O3-NEXT:   [[ATOMIC_CMPXCHG_WITH_SUCCESS:%[0-9]+]]:_(s32), [[ATOMIC_CMPXCHG_WITH_SUCCESS1:%[0-9]+]]:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS [[COPY]](p0), [[PHI]], [[XOR]] :: (load store seq_cst seq_cst (s32) on %ir.addr)
+  ; O3-NEXT:   G_BRCOND [[ATOMIC_CMPXCHG_WITH_SUCCESS1]](s1), %bb.3
+  ; O3-NEXT:   G_BR %bb.2
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT: bb.3.atomicrmw.end:
+  ; O3-NEXT:   $w0 = COPY [[ATOMIC_CMPXCHG_WITH_SUCCESS]](s32)
+  ; O3-NEXT:   RET_ReallyLR implicit $w0
   %oldval = atomicrmw nand ptr %addr, i32 1 seq_cst
   ret i32 %oldval
 }
 
 ; Try an seq_cst atomicrmw or
 define i32 @test_atomicrmw_or(ptr %addr) {
-; CHECK-LABEL: name: test_atomicrmw_or
-; CHECK:       bb.1 (%ir-block.{{[0-9]+}}):
-; CHECK-NEXT:  liveins: $x0
-; CHECK:         [[ADDR:%[0-9]+]]:_(p0) = COPY $x0
-; CHECK-NEXT:    [[VAL:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-; CHECK-NEXT:    [[OLDVALRES:%[0-9]+]]:_(s32) = G_ATOMICRMW_OR [[ADDR]](p0), [[VAL]] :: (load store seq_cst (s32) on %ir.addr)
+  ; CHECK-LABEL: name: test_atomicrmw_or
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $x0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+  ; CHECK-NEXT:   [[ATOMICRMW_OR:%[0-9]+]]:_(s32) = G_ATOMICRMW_OR [[COPY]](p0), [[C]] :: (load store seq_cst (s32) on %ir.addr)
+  ; CHECK-NEXT:   $w0 = COPY [[ATOMICRMW_OR]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $w0
+  ;
+  ; O3-LABEL: name: test_atomicrmw_or
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $x0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; O3-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+  ; O3-NEXT:   [[ATOMICRMW_OR:%[0-9]+]]:_(s32) = G_ATOMICRMW_OR [[COPY]](p0), [[C]] :: (load store seq_cst (s32) on %ir.addr)
+  ; O3-NEXT:   $w0 = COPY [[ATOMICRMW_OR]](s32)
+  ; O3-NEXT:   RET_ReallyLR implicit $w0
   %oldval = atomicrmw or ptr %addr, i32 1 seq_cst
   ret i32 %oldval
 }
 
 ; Try an seq_cst atomicrmw xor
 define i32 @test_atomicrmw_xor(ptr %addr) {
-; CHECK-LABEL: name: test_atomicrmw_xor
-; CHECK:       bb.1 (%ir-block.{{[0-9]+}}):
-; CHECK-NEXT:  liveins: $x0
-; CHECK:         [[ADDR:%[0-9]+]]:_(p0) = COPY $x0
-; CHECK-NEXT:    [[VAL:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-; CHECK-NEXT:    [[OLDVALRES:%[0-9]+]]:_(s32) = G_ATOMICRMW_XOR [[ADDR]](p0), [[VAL]] :: (load store seq_cst (s32) on %ir.addr)
+  ; CHECK-LABEL: name: test_atomicrmw_xor
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $x0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+  ; CHECK-NEXT:   [[ATOMICRMW_XOR:%[0-9]+]]:_(s32) = G_ATOMICRMW_XOR [[COPY]](p0), [[C]] :: (load store seq_cst (s32) on %ir.addr)
+  ; CHECK-NEXT:   $w0 = COPY [[ATOMICRMW_XOR]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $w0
+  ;
+  ; O3-LABEL: name: test_atomicrmw_xor
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $x0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; O3-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+  ; O3-NEXT:   [[ATOMICRMW_XOR:%[0-9]+]]:_(s32) = G_ATOMICRMW_XOR [[COPY]](p0), [[C]] :: (load store seq_cst (s32) on %ir.addr)
+  ; O3-NEXT:   $w0 = COPY [[ATOMICRMW_XOR]](s32)
+  ; O3-NEXT:   RET_ReallyLR implicit $w0
   %oldval = atomicrmw xor ptr %addr, i32 1 seq_cst
   ret i32 %oldval
 }
 
 ; Try an seq_cst atomicrmw min
 define i32 @test_atomicrmw_min(ptr %addr) {
-; CHECK-LABEL: name: test_atomicrmw_min
-; CHECK:       bb.1 (%ir-block.{{[0-9]+}}):
-; CHECK-NEXT:  liveins: $x0
-; CHECK:         [[ADDR:%[0-9]+]]:_(p0) = COPY $x0
-; CHECK-NEXT:    [[VAL:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-; CHECK-NEXT:    [[OLDVALRES:%[0-9]+]]:_(s32) = G_ATOMICRMW_MIN [[ADDR]](p0), [[VAL]] :: (load store seq_cst (s32) on %ir.addr)
+  ; CHECK-LABEL: name: test_atomicrmw_min
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $x0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+  ; CHECK-NEXT:   [[ATOMICRMW_MIN:%[0-9]+]]:_(s32) = G_ATOMICRMW_MIN [[COPY]](p0), [[C]] :: (load store seq_cst (s32) on %ir.addr)
+  ; CHECK-NEXT:   $w0 = COPY [[ATOMICRMW_MIN]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $w0
+  ;
+  ; O3-LABEL: name: test_atomicrmw_min
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $x0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; O3-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+  ; O3-NEXT:   [[ATOMICRMW_MIN:%[0-9]+]]:_(s32) = G_ATOMICRMW_MIN [[COPY]](p0), [[C]] :: (load store seq_cst (s32) on %ir.addr)
+  ; O3-NEXT:   $w0 = COPY [[ATOMICRMW_MIN]](s32)
+  ; O3-NEXT:   RET_ReallyLR implicit $w0
   %oldval = atomicrmw min ptr %addr, i32 1 seq_cst
   ret i32 %oldval
 }
 
 ; Try an seq_cst atomicrmw max
 define i32 @test_atomicrmw_max(ptr %addr) {
-; CHECK-LABEL: name: test_atomicrmw_max
-; CHECK:       bb.1 (%ir-block.{{[0-9]+}}):
-; CHECK-NEXT:  liveins: $x0
-; CHECK:         [[ADDR:%[0-9]+]]:_(p0) = COPY $x0
-; CHECK-NEXT:    [[VAL:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-; CHECK-NEXT:    [[OLDVALRES:%[0-9]+]]:_(s32) = G_ATOMICRMW_MAX [[ADDR]](p0), [[VAL]] :: (load store seq_cst (s32) on %ir.addr)
+  ; CHECK-LABEL: name: test_atomicrmw_max
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $x0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+  ; CHECK-NEXT:   [[ATOMICRMW_MAX:%[0-9]+]]:_(s32) = G_ATOMICRMW_MAX [[COPY]](p0), [[C]] :: (load store seq_cst (s32) on %ir.addr)
+  ; CHECK-NEXT:   $w0 = COPY [[ATOMICRMW_MAX]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $w0
+  ;
+  ; O3-LABEL: name: test_atomicrmw_max
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $x0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; O3-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+  ; O3-NEXT:   [[ATOMICRMW_MAX:%[0-9]+]]:_(s32) = G_ATOMICRMW_MAX [[COPY]](p0), [[C]] :: (load store seq_cst (s32) on %ir.addr)
+  ; O3-NEXT:   $w0 = COPY [[ATOMICRMW_MAX]](s32)
+  ; O3-NEXT:   RET_ReallyLR implicit $w0
   %oldval = atomicrmw max ptr %addr, i32 1 seq_cst
   ret i32 %oldval
 }
 
 ; Try an seq_cst atomicrmw unsigned min
 define i32 @test_atomicrmw_umin(ptr %addr) {
-; CHECK-LABEL: name: test_atomicrmw_umin
-; CHECK:       bb.1 (%ir-block.{{[0-9]+}}):
-; CHECK-NEXT:  liveins: $x0
-; CHECK:         [[ADDR:%[0-9]+]]:_(p0) = COPY $x0
-; CHECK-NEXT:    [[VAL:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-; CHECK-NEXT:    [[OLDVALRES:%[0-9]+]]:_(s32) = G_ATOMICRMW_UMIN [[ADDR]](p0), [[VAL]] :: (load store seq_cst (s32) on %ir.addr)
+  ; CHECK-LABEL: name: test_atomicrmw_umin
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $x0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+  ; CHECK-NEXT:   [[ATOMICRMW_UMIN:%[0-9]+]]:_(s32) = G_ATOMICRMW_UMIN [[COPY]](p0), [[C]] :: (load store seq_cst (s32) on %ir.addr)
+  ; CHECK-NEXT:   $w0 = COPY [[ATOMICRMW_UMIN]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $w0
+  ;
+  ; O3-LABEL: name: test_atomicrmw_umin
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $x0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; O3-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+  ; O3-NEXT:   [[ATOMICRMW_UMIN:%[0-9]+]]:_(s32) = G_ATOMICRMW_UMIN [[COPY]](p0), [[C]] :: (load store seq_cst (s32) on %ir.addr)
+  ; O3-NEXT:   $w0 = COPY [[ATOMICRMW_UMIN]](s32)
+  ; O3-NEXT:   RET_ReallyLR implicit $w0
   %oldval = atomicrmw umin ptr %addr, i32 1 seq_cst
   ret i32 %oldval
 }
 
 ; Try an seq_cst atomicrmw unsigned max
 define i32 @test_atomicrmw_umax(ptr %addr) {
-; CHECK-LABEL: name: test_atomicrmw_umax
-; CHECK:       bb.1 (%ir-block.{{[0-9]+}}):
-; CHECK-NEXT:  liveins: $x0
-; CHECK:         [[ADDR:%[0-9]+]]:_(p0) = COPY $x0
-; CHECK-NEXT:    [[VAL:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-; CHECK-NEXT:    [[OLDVALRES:%[0-9]+]]:_(s32) = G_ATOMICRMW_UMAX [[ADDR]](p0), [[VAL]] :: (load store seq_cst (s32) on %ir.addr)
+  ; CHECK-LABEL: name: test_atomicrmw_umax
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $x0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+  ; CHECK-NEXT:   [[ATOMICRMW_UMAX:%[0-9]+]]:_(s32) = G_ATOMICRMW_UMAX [[COPY]](p0), [[C]] :: (load store seq_cst (s32) on %ir.addr)
+  ; CHECK-NEXT:   $w0 = COPY [[ATOMICRMW_UMAX]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $w0
+  ;
+  ; O3-LABEL: name: test_atomicrmw_umax
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $x0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; O3-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+  ; O3-NEXT:   [[ATOMICRMW_UMAX:%[0-9]+]]:_(s32) = G_ATOMICRMW_UMAX [[COPY]](p0), [[C]] :: (load store seq_cst (s32) on %ir.addr)
+  ; O3-NEXT:   $w0 = COPY [[ATOMICRMW_UMAX]](s32)
+  ; O3-NEXT:   RET_ReallyLR implicit $w0
   %oldval = atomicrmw umax ptr %addr, i32 1 seq_cst
   ret i32 %oldval
 }
@@ -2234,9 +5330,28 @@ define i32 @test_atomicrmw_umax(ptr %addr) {
 @addr = global ptr null
 
 define void @test_blockaddress() {
-; CHECK-LABEL: name: test_blockaddress
-; CHECK: [[BADDR:%[0-9]+]]:_(p0) = G_BLOCK_ADDR blockaddress(@test_blockaddress, %ir-block.block)
-; CHECK: G_STORE [[BADDR]](p0)
+  ; CHECK-LABEL: name: test_blockaddress
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[BLOCK_ADDR:%[0-9]+]]:_(p0) = G_BLOCK_ADDR blockaddress(@test_blockaddress, %ir-block.block)
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @addr
+  ; CHECK-NEXT:   G_STORE [[BLOCK_ADDR]](p0), [[GV]](p0) :: (store (p0) into @addr)
+  ; CHECK-NEXT:   G_BRINDIRECT [[BLOCK_ADDR]](p0)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2.block (ir-block-address-taken %ir-block.block):
+  ; CHECK-NEXT:   RET_ReallyLR
+  ;
+  ; O3-LABEL: name: test_blockaddress
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   successors: %bb.2(0x80000000)
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[BLOCK_ADDR:%[0-9]+]]:_(p0) = G_BLOCK_ADDR blockaddress(@test_blockaddress, %ir-block.block)
+  ; O3-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @addr
+  ; O3-NEXT:   G_STORE [[BLOCK_ADDR]](p0), [[GV]](p0) :: (store (p0) into @addr)
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT: bb.2.block (ir-block-address-taken %ir-block.block):
+  ; O3-NEXT:   RET_ReallyLR
   store ptr blockaddress(@test_blockaddress, %block), ptr @addr
   indirectbr ptr blockaddress(@test_blockaddress, %block), [label %block]
 block:
@@ -2247,9 +5362,17 @@ block:
 declare ptr @llvm.invariant.start.p0(i64, ptr nocapture) readonly nounwind
 declare void @llvm.invariant.end.p0(ptr, i64, ptr nocapture) nounwind
 define void @test_invariant_intrin() {
-; CHECK-LABEL: name: test_invariant_intrin
-; CHECK: %{{[0-9]+}}:_(s64) = G_IMPLICIT_DEF
-; CHECK-NEXT: RET_ReallyLR
+  ; CHECK-LABEL: name: test_invariant_intrin
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.x
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   RET_ReallyLR
+  ;
+  ; O3-LABEL: name: test_invariant_intrin
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.x
+  ; O3-NEXT:   [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+  ; O3-NEXT:   RET_ReallyLR
   %x = alloca %t
   %inv = call ptr @llvm.invariant.start.p0(i64 8, ptr %x)
   call void @llvm.invariant.end.p0(ptr %inv, i64 8, ptr %x)
@@ -2258,81 +5381,232 @@ define void @test_invariant_intrin() {
 
 declare float @llvm.ceil.f32(float)
 define float @test_ceil_f32(float %x) {
-  ; CHECK-LABEL: name:            test_ceil_f32
-  ; CHECK: %{{[0-9]+}}:_(s32) = G_FCEIL %{{[0-9]+}}
+  ; CHECK-LABEL: name: test_ceil_f32
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $s0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $s0
+  ; CHECK-NEXT:   [[FCEIL:%[0-9]+]]:_(s32) = G_FCEIL [[COPY]]
+  ; CHECK-NEXT:   $s0 = COPY [[FCEIL]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $s0
+  ;
+  ; O3-LABEL: name: test_ceil_f32
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $s0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $s0
+  ; O3-NEXT:   [[FCEIL:%[0-9]+]]:_(s32) = G_FCEIL [[COPY]]
+  ; O3-NEXT:   $s0 = COPY [[FCEIL]](s32)
+  ; O3-NEXT:   RET_ReallyLR implicit $s0
   %y = call float @llvm.ceil.f32(float %x)
   ret float %y
 }
 
 declare double @llvm.ceil.f64(double)
 define double @test_ceil_f64(double %x) {
-  ; CHECK-LABEL: name:            test_ceil_f64
-  ; CHECK: %{{[0-9]+}}:_(s64) = G_FCEIL %{{[0-9]+}}
+  ; CHECK-LABEL: name: test_ceil_f64
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $d0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s64) = COPY $d0
+  ; CHECK-NEXT:   [[FCEIL:%[0-9]+]]:_(s64) = G_FCEIL [[COPY]]
+  ; CHECK-NEXT:   $d0 = COPY [[FCEIL]](s64)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $d0
+  ;
+  ; O3-LABEL: name: test_ceil_f64
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $d0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(s64) = COPY $d0
+  ; O3-NEXT:   [[FCEIL:%[0-9]+]]:_(s64) = G_FCEIL [[COPY]]
+  ; O3-NEXT:   $d0 = COPY [[FCEIL]](s64)
+  ; O3-NEXT:   RET_ReallyLR implicit $d0
   %y = call double @llvm.ceil.f64(double %x)
   ret double %y
 }
 
 declare <2 x float> @llvm.ceil.v2f32(<2 x float>)
 define <2 x float> @test_ceil_v2f32(<2 x float> %x) {
-  ; CHECK-LABEL: name:            test_ceil_v2f32
-  ; CHECK: %{{[0-9]+}}:_(<2 x s32>) = G_FCEIL %{{[0-9]+}}
+  ; CHECK-LABEL: name: test_ceil_v2f32
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $d0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
+  ; CHECK-NEXT:   [[FCEIL:%[0-9]+]]:_(<2 x s32>) = G_FCEIL [[COPY]]
+  ; CHECK-NEXT:   $d0 = COPY [[FCEIL]](<2 x s32>)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $d0
+  ;
+  ; O3-LABEL: name: test_ceil_v2f32
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $d0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
+  ; O3-NEXT:   [[FCEIL:%[0-9]+]]:_(<2 x s32>) = G_FCEIL [[COPY]]
+  ; O3-NEXT:   $d0 = COPY [[FCEIL]](<2 x s32>)
+  ; O3-NEXT:   RET_ReallyLR implicit $d0
   %y = call <2 x float> @llvm.ceil.v2f32(<2 x float> %x)
   ret <2 x float> %y
 }
 
 declare <4 x float> @llvm.ceil.v4f32(<4 x float>)
 define <4 x float> @test_ceil_v4f32(<4 x float> %x) {
-  ; CHECK-LABEL: name:            test_ceil_v4f32
-  ; CHECK: %{{[0-9]+}}:_(<4 x s32>) = G_FCEIL %{{[0-9]+}}
-  ; SELECT: %{{[0-9]+}}:fpr128 = FRINTPv4f32 %{{[0-9]+}}
+  ; CHECK-LABEL: name: test_ceil_v4f32
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $q0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
+  ; CHECK-NEXT:   [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY]](<2 x s64>)
+  ; CHECK-NEXT:   [[FCEIL:%[0-9]+]]:_(<4 x s32>) = G_FCEIL [[BITCAST]]
+  ; CHECK-NEXT:   $q0 = COPY [[FCEIL]](<4 x s32>)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $q0
+  ;
+  ; O3-LABEL: name: test_ceil_v4f32
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $q0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
+  ; O3-NEXT:   [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY]](<2 x s64>)
+  ; O3-NEXT:   [[FCEIL:%[0-9]+]]:_(<4 x s32>) = G_FCEIL [[BITCAST]]
+  ; O3-NEXT:   $q0 = COPY [[FCEIL]](<4 x s32>)
+  ; O3-NEXT:   RET_ReallyLR implicit $q0
   %y = call <4 x float> @llvm.ceil.v4f32(<4 x float> %x)
   ret <4 x float> %y
 }
 
 declare <2 x double> @llvm.ceil.v2f64(<2 x double>)
 define <2 x double> @test_ceil_v2f64(<2 x double> %x) {
-  ; CHECK-LABEL: name:            test_ceil_v2f64
-  ; CHECK: %{{[0-9]+}}:_(<2 x s64>) = G_FCEIL %{{[0-9]+}}
+  ; CHECK-LABEL: name: test_ceil_v2f64
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $q0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
+  ; CHECK-NEXT:   [[FCEIL:%[0-9]+]]:_(<2 x s64>) = G_FCEIL [[COPY]]
+  ; CHECK-NEXT:   $q0 = COPY [[FCEIL]](<2 x s64>)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $q0
+  ;
+  ; O3-LABEL: name: test_ceil_v2f64
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $q0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
+  ; O3-NEXT:   [[FCEIL:%[0-9]+]]:_(<2 x s64>) = G_FCEIL [[COPY]]
+  ; O3-NEXT:   $q0 = COPY [[FCEIL]](<2 x s64>)
+  ; O3-NEXT:   RET_ReallyLR implicit $q0
   %y = call <2 x double> @llvm.ceil.v2f64(<2 x double> %x)
   ret <2 x double> %y
 }
 
 declare float @llvm.cos.f32(float)
 define float @test_cos_f32(float %x) {
-  ; CHECK-LABEL: name:            test_cos_f32
-  ; CHECK: %{{[0-9]+}}:_(s32) = G_FCOS %{{[0-9]+}}
+  ; CHECK-LABEL: name: test_cos_f32
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $s0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $s0
+  ; CHECK-NEXT:   [[FCOS:%[0-9]+]]:_(s32) = G_FCOS [[COPY]]
+  ; CHECK-NEXT:   $s0 = COPY [[FCOS]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $s0
+  ;
+  ; O3-LABEL: name: test_cos_f32
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $s0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $s0
+  ; O3-NEXT:   [[FCOS:%[0-9]+]]:_(s32) = G_FCOS [[COPY]]
+  ; O3-NEXT:   $s0 = COPY [[FCOS]](s32)
+  ; O3-NEXT:   RET_ReallyLR implicit $s0
   %y = call float @llvm.cos.f32(float %x)
   ret float %y
 }
 
 declare float @llvm.sin.f32(float)
 define float @test_sin_f32(float %x) {
-  ; CHECK-LABEL: name:            test_sin_f32
-  ; CHECK: %{{[0-9]+}}:_(s32) = G_FSIN %{{[0-9]+}}
+  ; CHECK-LABEL: name: test_sin_f32
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $s0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $s0
+  ; CHECK-NEXT:   [[FSIN:%[0-9]+]]:_(s32) = G_FSIN [[COPY]]
+  ; CHECK-NEXT:   $s0 = COPY [[FSIN]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $s0
+  ;
+  ; O3-LABEL: name: test_sin_f32
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $s0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $s0
+  ; O3-NEXT:   [[FSIN:%[0-9]+]]:_(s32) = G_FSIN [[COPY]]
+  ; O3-NEXT:   $s0 = COPY [[FSIN]](s32)
+  ; O3-NEXT:   RET_ReallyLR implicit $s0
   %y = call float @llvm.sin.f32(float %x)
   ret float %y
 }
 
 declare float @llvm.sqrt.f32(float)
 define float @test_sqrt_f32(float %x) {
-  ; CHECK-LABEL: name:            test_sqrt_f32
-  ; CHECK: %{{[0-9]+}}:_(s32) = G_FSQRT %{{[0-9]+}}
+  ; CHECK-LABEL: name: test_sqrt_f32
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $s0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $s0
+  ; CHECK-NEXT:   [[FSQRT:%[0-9]+]]:_(s32) = G_FSQRT [[COPY]]
+  ; CHECK-NEXT:   $s0 = COPY [[FSQRT]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $s0
+  ;
+  ; O3-LABEL: name: test_sqrt_f32
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $s0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $s0
+  ; O3-NEXT:   [[FSQRT:%[0-9]+]]:_(s32) = G_FSQRT [[COPY]]
+  ; O3-NEXT:   $s0 = COPY [[FSQRT]](s32)
+  ; O3-NEXT:   RET_ReallyLR implicit $s0
   %y = call float @llvm.sqrt.f32(float %x)
   ret float %y
 }
 
 declare float @llvm.floor.f32(float)
 define float @test_floor_f32(float %x) {
-  ; CHECK-LABEL: name:            test_floor_f32
-  ; CHECK: %{{[0-9]+}}:_(s32) = G_FFLOOR %{{[0-9]+}}
+  ; CHECK-LABEL: name: test_floor_f32
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $s0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $s0
+  ; CHECK-NEXT:   [[FFLOOR:%[0-9]+]]:_(s32) = G_FFLOOR [[COPY]]
+  ; CHECK-NEXT:   $s0 = COPY [[FFLOOR]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $s0
+  ;
+  ; O3-LABEL: name: test_floor_f32
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $s0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $s0
+  ; O3-NEXT:   [[FFLOOR:%[0-9]+]]:_(s32) = G_FFLOOR [[COPY]]
+  ; O3-NEXT:   $s0 = COPY [[FFLOOR]](s32)
+  ; O3-NEXT:   RET_ReallyLR implicit $s0
   %y = call float @llvm.floor.f32(float %x)
   ret float %y
 }
 
 declare float @llvm.nearbyint.f32(float)
 define float @test_nearbyint_f32(float %x) {
-  ; CHECK-LABEL: name:            test_nearbyint_f32
-  ; CHECK: %{{[0-9]+}}:_(s32) = G_FNEARBYINT %{{[0-9]+}}
+  ; CHECK-LABEL: name: test_nearbyint_f32
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $s0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $s0
+  ; CHECK-NEXT:   [[FNEARBYINT:%[0-9]+]]:_(s32) = G_FNEARBYINT [[COPY]]
+  ; CHECK-NEXT:   $s0 = COPY [[FNEARBYINT]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $s0
+  ;
+  ; O3-LABEL: name: test_nearbyint_f32
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $s0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $s0
+  ; O3-NEXT:   [[FNEARBYINT:%[0-9]+]]:_(s32) = G_FNEARBYINT [[COPY]]
+  ; O3-NEXT:   $s0 = COPY [[FNEARBYINT]](s32)
+  ; O3-NEXT:   RET_ReallyLR implicit $s0
   %y = call float @llvm.nearbyint.f32(float %x)
   ret float %y
 }
@@ -2340,6 +5614,21 @@ define float @test_nearbyint_f32(float %x) {
 ; CHECK-LABEL: name: test_llvm.aarch64.neon.ld3.v4i32.p0i32
 ; CHECK: %1:_(<4 x s32>), %2:_(<4 x s32>), %3:_(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.neon.ld3), %0(p0) :: (load (s384) from %ir.ptr, align 64)
 define void @test_llvm.aarch64.neon.ld3.v4i32.p0i32(ptr %ptr) {
+  ; CHECK-LABEL: name: test_llvm.aarch64.neon.ld3.v4i32.p0i32
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $x0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; CHECK-NEXT:   [[INT:%[0-9]+]]:_(<4 x s32>), [[INT1:%[0-9]+]]:_(<4 x s32>), [[INT2:%[0-9]+]]:_(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.neon.ld3), [[COPY]](p0) :: (load (s384) from %ir.ptr, align 64)
+  ; CHECK-NEXT:   RET_ReallyLR
+  ;
+  ; O3-LABEL: name: test_llvm.aarch64.neon.ld3.v4i32.p0i32
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $x0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; O3-NEXT:   [[INT:%[0-9]+]]:_(<4 x s32>), [[INT1:%[0-9]+]]:_(<4 x s32>), [[INT2:%[0-9]+]]:_(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.neon.ld3), [[COPY]](p0) :: (load (s384) from %ir.ptr, align 64)
+  ; O3-NEXT:   RET_ReallyLR
   %arst = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0(ptr %ptr)
   ret void
 }
@@ -2347,11 +5636,33 @@ define void @test_llvm.aarch64.neon.ld3.v4i32.p0i32(ptr %ptr) {
 declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0(ptr) #3
 
 define void @test_i1_arg_zext(ptr %f) {
-; CHECK-LABEL: name: test_i1_arg_zext
-; CHECK: [[I1:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
-; CHECK: [[ZEXT0:%[0-9]+]]:_(s8) = G_ZEXT [[I1]](s1)
-; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ZEXT0]](s8)
-; CHECK: $w0 = COPY [[ZEXT1]](s32)
+  ; CHECK-LABEL: name: test_i1_arg_zext
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $x0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:gpr64(p0) = COPY $x0
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
+  ; CHECK-NEXT:   [[ZEXT:%[0-9]+]]:_(s8) = G_ZEXT [[C]](s1)
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
+  ; CHECK-NEXT:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ZEXT]](s8)
+  ; CHECK-NEXT:   $w0 = COPY [[ANYEXT]](s32)
+  ; CHECK-NEXT:   BLR [[COPY]](p0), csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $w0
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
+  ; CHECK-NEXT:   RET_ReallyLR
+  ;
+  ; O3-LABEL: name: test_i1_arg_zext
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $x0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:gpr64(p0) = COPY $x0
+  ; O3-NEXT:   [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
+  ; O3-NEXT:   [[ZEXT:%[0-9]+]]:_(s8) = G_ZEXT [[C]](s1)
+  ; O3-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
+  ; O3-NEXT:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ZEXT]](s8)
+  ; O3-NEXT:   $w0 = COPY [[ANYEXT]](s32)
+  ; O3-NEXT:   BLR [[COPY]](p0), csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $w0
+  ; O3-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
+  ; O3-NEXT:   RET_ReallyLR
   call void %f(i1 true)
   ret void
 }
@@ -2360,9 +5671,16 @@ declare ptr @llvm.stacksave()
 declare void @llvm.stackrestore(ptr)
 define void @test_stacksaverestore() {
   ; CHECK-LABEL: name: test_stacksaverestore
-  ; CHECK: [[SAVE:%[0-9]+]]:_(p0) = G_STACKSAVE
-  ; CHECK-NEXT: G_STACKRESTORE [[SAVE]]
-  ; CHECK-NEXT: RET_ReallyLR
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   [[STACKSAVE:%[0-9]+]]:_(p0) = G_STACKSAVE
+  ; CHECK-NEXT:   G_STACKRESTORE [[STACKSAVE]](p0)
+  ; CHECK-NEXT:   RET_ReallyLR
+  ;
+  ; O3-LABEL: name: test_stacksaverestore
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   [[STACKSAVE:%[0-9]+]]:_(p0) = G_STACKSAVE
+  ; O3-NEXT:   G_STACKRESTORE [[STACKSAVE]](p0)
+  ; O3-NEXT:   RET_ReallyLR
   %sp = call ptr @llvm.stacksave()
   call void @llvm.stackrestore(ptr %sp)
   ret void
@@ -2370,27 +5688,70 @@ define void @test_stacksaverestore() {
 
 declare float @llvm.rint.f32(float)
 define float @test_rint_f32(float %x) {
-  ; CHECK-LABEL: name:            test_rint_f32
-  ; CHECK: %{{[0-9]+}}:_(s32) = G_FRINT %{{[0-9]+}}
+  ; CHECK-LABEL: name: test_rint_f32
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $s0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $s0
+  ; CHECK-NEXT:   [[FRINT:%[0-9]+]]:_(s32) = G_FRINT [[COPY]]
+  ; CHECK-NEXT:   $s0 = COPY [[FRINT]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $s0
+  ;
+  ; O3-LABEL: name: test_rint_f32
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $s0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $s0
+  ; O3-NEXT:   [[FRINT:%[0-9]+]]:_(s32) = G_FRINT [[COPY]]
+  ; O3-NEXT:   $s0 = COPY [[FRINT]](s32)
+  ; O3-NEXT:   RET_ReallyLR implicit $s0
   %y = call float @llvm.rint.f32(float %x)
   ret float %y
 }
 
 declare void @llvm.assume(i1)
 define void @test_assume(i1 %x) {
-  ; CHECK-LABEL: name:            test_assume
-  ; CHECK-NOT: llvm.assume
-  ; CHECK: RET_ReallyLR
+  ; CHECK-LABEL: name: test_assume
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $w0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+  ; CHECK-NEXT:   [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32)
+  ; CHECK-NEXT:   [[ASSERT_ZEXT:%[0-9]+]]:_(s8) = G_ASSERT_ZEXT [[TRUNC]], 1
+  ; CHECK-NEXT:   [[TRUNC1:%[0-9]+]]:_(s1) = G_TRUNC [[ASSERT_ZEXT]](s8)
+  ; CHECK-NEXT:   RET_ReallyLR
+  ;
+  ; O3-LABEL: name: test_assume
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $w0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+  ; O3-NEXT:   [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32)
+  ; O3-NEXT:   [[ASSERT_ZEXT:%[0-9]+]]:_(s8) = G_ASSERT_ZEXT [[TRUNC]], 1
+  ; O3-NEXT:   [[TRUNC1:%[0-9]+]]:_(s1) = G_TRUNC [[ASSERT_ZEXT]](s8)
+  ; O3-NEXT:   RET_ReallyLR
   call void @llvm.assume(i1 %x)
   ret void
 }
 
 declare void @llvm.experimental.noalias.scope.decl(metadata)
 define void @test.llvm.noalias.scope.decl(ptr %P, ptr %Q) nounwind ssp {
-  tail call void @llvm.experimental.noalias.scope.decl(metadata !3)
   ; CHECK-LABEL: name: test.llvm.noalias.scope.decl
-  ; CHECK-NOT: llvm.experimental.noalias.scope.decl
-  ; CHECK: RET_ReallyLR
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $x0, $x1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
+  ; CHECK-NEXT:   RET_ReallyLR
+  ;
+  ; O3-LABEL: name: test.llvm.noalias.scope.decl
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $x0, $x1
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; O3-NEXT:   [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
+  ; O3-NEXT:   RET_ReallyLR
+  tail call void @llvm.experimental.noalias.scope.decl(metadata !3)
   ret void
 }
 
@@ -2401,55 +5762,113 @@ define void @test.llvm.noalias.scope.decl(ptr %P, ptr %Q) nounwind ssp {
 
 declare void @llvm.sideeffect()
 define void @test_sideeffect() {
-  ; CHECK-LABEL: name:            test_sideeffect
-  ; CHECK-NOT: llvm.sideeffect
-  ; CHECK: RET_ReallyLR
+  ; CHECK-LABEL: name: test_sideeffect
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   RET_ReallyLR
+  ;
+  ; O3-LABEL: name: test_sideeffect
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   RET_ReallyLR
   call void @llvm.sideeffect()
   ret void
 }
 
 declare void @llvm.var.annotation(ptr, ptr, ptr, i32, ptr)
 define void @test_var_annotation(ptr, ptr, ptr, i32) {
-  ; CHECK-LABEL: name:            test_var_annotation
-  ; CHECK-NOT: llvm.var.annotation
-  ; CHECK: RET_ReallyLR
+  ; CHECK-LABEL: name: test_var_annotation
+  ; CHECK: bb.1 (%ir-block.4):
+  ; CHECK-NEXT:   liveins: $w3, $x0, $x1, $x2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(p0) = COPY $x2
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $w3
+  ; CHECK-NEXT:   RET_ReallyLR
+  ;
+  ; O3-LABEL: name: test_var_annotation
+  ; O3: bb.1 (%ir-block.4):
+  ; O3-NEXT:   liveins: $w3, $x0, $x1, $x2
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; O3-NEXT:   [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
+  ; O3-NEXT:   [[COPY2:%[0-9]+]]:_(p0) = COPY $x2
+  ; O3-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $w3
+  ; O3-NEXT:   RET_ReallyLR
   call void @llvm.var.annotation(ptr %0, ptr %1, ptr %2, i32 %3, ptr null)
   ret void
 }
 
 declare i64 @llvm.readcyclecounter()
 define i64 @test_readcyclecounter() {
-  ; CHECK-LABEL: name:            test_readcyclecounter
-  ; CHECK: [[RES:%[0-9]+]]:_(s64) = G_READCYCLECOUNTER{{$}}
-  ; CHECK-NEXT: $x0 = COPY [[RES]]
-  ; CHECK-NEXT: RET_ReallyLR implicit $x0
+  ; CHECK-LABEL: name: test_readcyclecounter
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   [[READCYCLECOUNTER:%[0-9]+]]:_(s64) = G_READCYCLECOUNTER
+  ; CHECK-NEXT:   $x0 = COPY [[READCYCLECOUNTER]](s64)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $x0
+  ;
+  ; O3-LABEL: name: test_readcyclecounter
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   [[READCYCLECOUNTER:%[0-9]+]]:_(s64) = G_READCYCLECOUNTER
+  ; O3-NEXT:   $x0 = COPY [[READCYCLECOUNTER]](s64)
+  ; O3-NEXT:   RET_ReallyLR implicit $x0
   %res = call i64 @llvm.readcyclecounter()
   ret i64 %res
 }
 
 define i64 @test_freeze(i64 %a) {
-  ; CHECK-LABEL: name:            test_freeze
-  ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
-  ; CHECK-NEXT: [[RES:%[0-9]+]]:_(s64) = G_FREEZE [[COPY]]
-  ; CHECK-NEXT: $x0 = COPY [[RES]]
-  ; CHECK-NEXT: RET_ReallyLR implicit $x0
+  ; CHECK-LABEL: name: test_freeze
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $x0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+  ; CHECK-NEXT:   [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[COPY]]
+  ; CHECK-NEXT:   $x0 = COPY [[FREEZE]](s64)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $x0
+  ;
+  ; O3-LABEL: name: test_freeze
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $x0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+  ; O3-NEXT:   [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[COPY]]
+  ; O3-NEXT:   $x0 = COPY [[FREEZE]](s64)
+  ; O3-NEXT:   RET_ReallyLR implicit $x0
   %res = freeze i64 %a
   ret i64 %res
 }
 
 define {i8, i32} @test_freeze_struct(ptr %addr) {
-  ; CHECK-LABEL: name:            test_freeze_struct
-  ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
-  ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p0)
-  ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-  ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]]
-  ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0)
-  ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s8) = G_FREEZE [[LOAD]]
-  ; CHECK-NEXT: [[FREEZE1:%[0-9]+]]:_(s32) = G_FREEZE [[LOAD1]]
-  ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FREEZE]]
-  ; CHECK-NEXT: $w0 = COPY [[ANYEXT]]
-  ; CHECK-NEXT: $w1 = COPY [[FREEZE1]]
-  ; CHECK-NEXT: RET_ReallyLR implicit $w0, implicit $w1
+  ; CHECK-LABEL: name: test_freeze_struct
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $x0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p0) :: (load (s8) from %ir.addr, align 4)
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from %ir.addr + 4)
+  ; CHECK-NEXT:   [[FREEZE:%[0-9]+]]:_(s8) = G_FREEZE [[LOAD]]
+  ; CHECK-NEXT:   [[FREEZE1:%[0-9]+]]:_(s32) = G_FREEZE [[LOAD1]]
+  ; CHECK-NEXT:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FREEZE]](s8)
+  ; CHECK-NEXT:   $w0 = COPY [[ANYEXT]](s32)
+  ; CHECK-NEXT:   $w1 = COPY [[FREEZE1]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $w0, implicit $w1
+  ;
+  ; O3-LABEL: name: test_freeze_struct
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $x0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; O3-NEXT:   [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p0) :: (load (s8) from %ir.addr, align 4)
+  ; O3-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+  ; O3-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+  ; O3-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from %ir.addr + 4)
+  ; O3-NEXT:   [[FREEZE:%[0-9]+]]:_(s8) = G_FREEZE [[LOAD]]
+  ; O3-NEXT:   [[FREEZE1:%[0-9]+]]:_(s32) = G_FREEZE [[LOAD1]]
+  ; O3-NEXT:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FREEZE]](s8)
+  ; O3-NEXT:   $w0 = COPY [[ANYEXT]](s32)
+  ; O3-NEXT:   $w1 = COPY [[FREEZE1]](s32)
+  ; O3-NEXT:   RET_ReallyLR implicit $w0, implicit $w1
   %load = load { i8, i32 }, ptr %addr
   %res = freeze {i8, i32} %load
   ret {i8, i32} %res
@@ -2461,11 +5880,21 @@ declare i64 @llvm.lround.i64.f32(float) nounwind readnone
 define i64 @lround(float %x) {
   ; CHECK-LABEL: name: lround
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $s0
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $s0
-  ; CHECK:   [[LROUND:%[0-9]+]]:_(s64) = G_LROUND [[COPY]](s32)
-  ; CHECK:   $x0 = COPY [[LROUND]](s64)
-  ; CHECK:   RET_ReallyLR implicit $x0
+  ; CHECK-NEXT:   liveins: $s0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $s0
+  ; CHECK-NEXT:   [[LROUND:%[0-9]+]]:_(s64) = G_LROUND [[COPY]](s32)
+  ; CHECK-NEXT:   $x0 = COPY [[LROUND]](s64)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $x0
+  ;
+  ; O3-LABEL: name: lround
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $s0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $s0
+  ; O3-NEXT:   [[LROUND:%[0-9]+]]:_(s64) = G_LROUND [[COPY]](s32)
+  ; O3-NEXT:   $x0 = COPY [[LROUND]](s64)
+  ; O3-NEXT:   RET_ReallyLR implicit $x0
   %lround = tail call i64 @llvm.lround.i64.f32(float %x)
   ret i64 %lround
 }
@@ -2474,11 +5903,217 @@ declare i64 @llvm.llround.i64.f32(float) nounwind readnone
 define i64 @llround(float %x) {
   ; CHECK-LABEL: name: llround
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $s0
-  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $s0
-  ; CHECK:   [[LLROUND:%[0-9]+]]:_(s64) = G_LLROUND [[COPY]](s32)
-  ; CHECK:   $x0 = COPY [[LLROUND]](s64)
-  ; CHECK:   RET_ReallyLR implicit $x0
+  ; CHECK-NEXT:   liveins: $s0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $s0
+  ; CHECK-NEXT:   [[LLROUND:%[0-9]+]]:_(s64) = G_LLROUND [[COPY]](s32)
+  ; CHECK-NEXT:   $x0 = COPY [[LLROUND]](s64)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $x0
+  ;
+  ; O3-LABEL: name: llround
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $s0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $s0
+  ; O3-NEXT:   [[LLROUND:%[0-9]+]]:_(s64) = G_LLROUND [[COPY]](s32)
+  ; O3-NEXT:   $x0 = COPY [[LLROUND]](s64)
+  ; O3-NEXT:   RET_ReallyLR implicit $x0
   %lround = tail call i64 @llvm.llround.i64.f32(float %x)
   ret i64 %lround
 }
+
+define i32 @gep_nusw_nuw(ptr %ptr, i32 %idx) {
+  ; CHECK-LABEL: name: gep_nusw_nuw
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $w1, $x0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+  ; CHECK-NEXT:   [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[COPY1]](s32)
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+  ; CHECK-NEXT:   [[MUL:%[0-9]+]]:_(s64) = G_MUL [[SEXT]], [[C]]
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[MUL]](s64)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(p0) = COPY [[PTR_ADD]](p0)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY2]](p0) :: (load (s32) from %ir.gep1)
+  ; CHECK-NEXT:   [[MUL1:%[0-9]+]]:_(s64) = G_MUL [[SEXT]], [[C]]
+  ; CHECK-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[MUL1]](s64)
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+  ; CHECK-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p0) = nuw nsw G_PTR_ADD [[PTR_ADD1]], [[C1]](s64)
+  ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from %ir.gep2)
+  ; CHECK-NEXT:   [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD]], [[LOAD1]]
+  ; CHECK-NEXT:   $w0 = COPY [[ADD]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $w0
+  ;
+  ; O3-LABEL: name: gep_nusw_nuw
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $w1, $x0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; O3-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+  ; O3-NEXT:   [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[COPY1]](s32)
+  ; O3-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+  ; O3-NEXT:   [[MUL:%[0-9]+]]:_(s64) = G_MUL [[SEXT]], [[C]]
+  ; O3-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[MUL]](s64)
+  ; O3-NEXT:   [[COPY2:%[0-9]+]]:_(p0) = COPY [[PTR_ADD]](p0)
+  ; O3-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY2]](p0) :: (load (s32) from %ir.gep1)
+  ; O3-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+  ; O3-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw nsw G_PTR_ADD [[PTR_ADD]], [[C1]](s64)
+  ; O3-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32) from %ir.gep2)
+  ; O3-NEXT:   [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD]], [[LOAD1]]
+  ; O3-NEXT:   $w0 = COPY [[ADD]](s32)
+  ; O3-NEXT:   RET_ReallyLR implicit $w0
+  %sidx = sext i32 %idx to i64
+  %gep1 = getelementptr inbounds [4 x i32], ptr %ptr, i64 %sidx, i64 0
+  %v1 = load i32, ptr %gep1
+  %gep2 = getelementptr nusw nuw [4 x i32], ptr %ptr, i64 %sidx, i64 1
+  %v2 = load i32, ptr %gep2
+  %res = add i32 %v1, %v2
+  ret i32 %res
+ }
+
+define i32 @gep_nuw(ptr %ptr, i32 %idx) {
+  ; CHECK-LABEL: name: gep_nuw
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $w1, $x0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+  ; CHECK-NEXT:   [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[COPY1]](s32)
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+  ; CHECK-NEXT:   [[MUL:%[0-9]+]]:_(s64) = G_MUL [[SEXT]], [[C]]
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[MUL]](s64)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(p0) = COPY [[PTR_ADD]](p0)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY2]](p0) :: (load (s32) from %ir.gep1)
+  ; CHECK-NEXT:   [[MUL1:%[0-9]+]]:_(s64) = G_MUL [[SEXT]], [[C]]
+  ; CHECK-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[MUL1]](s64)
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+  ; CHECK-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p0) = nuw G_PTR_ADD [[PTR_ADD1]], [[C1]](s64)
+  ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from %ir.gep2)
+  ; CHECK-NEXT:   [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD]], [[LOAD1]]
+  ; CHECK-NEXT:   $w0 = COPY [[ADD]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $w0
+  ;
+  ; O3-LABEL: name: gep_nuw
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $w1, $x0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; O3-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+  ; O3-NEXT:   [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[COPY1]](s32)
+  ; O3-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+  ; O3-NEXT:   [[MUL:%[0-9]+]]:_(s64) = G_MUL [[SEXT]], [[C]]
+  ; O3-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[MUL]](s64)
+  ; O3-NEXT:   [[COPY2:%[0-9]+]]:_(p0) = COPY [[PTR_ADD]](p0)
+  ; O3-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY2]](p0) :: (load (s32) from %ir.gep1)
+  ; O3-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+  ; O3-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw G_PTR_ADD [[PTR_ADD]], [[C1]](s64)
+  ; O3-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32) from %ir.gep2)
+  ; O3-NEXT:   [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD]], [[LOAD1]]
+  ; O3-NEXT:   $w0 = COPY [[ADD]](s32)
+  ; O3-NEXT:   RET_ReallyLR implicit $w0
+  %sidx = sext i32 %idx to i64
+  %gep1 = getelementptr inbounds [4 x i32], ptr %ptr, i64 %sidx, i64 0
+  %v1 = load i32, ptr %gep1
+  %gep2 = getelementptr nuw [4 x i32], ptr %ptr, i64 %sidx, i64 1
+  %v2 = load i32, ptr %gep2
+  %res = add i32 %v1, %v2
+  ret i32 %res
+ }
+
+define i32 @gep_nusw(ptr %ptr, i32 %idx) {
+  ; CHECK-LABEL: name: gep_nusw
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $w1, $x0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+  ; CHECK-NEXT:   [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[COPY1]](s32)
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+  ; CHECK-NEXT:   [[MUL:%[0-9]+]]:_(s64) = G_MUL [[SEXT]], [[C]]
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[MUL]](s64)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(p0) = COPY [[PTR_ADD]](p0)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY2]](p0) :: (load (s32) from %ir.gep1)
+  ; CHECK-NEXT:   [[MUL1:%[0-9]+]]:_(s64) = G_MUL [[SEXT]], [[C]]
+  ; CHECK-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[MUL1]](s64)
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+  ; CHECK-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p0) = nsw G_PTR_ADD [[PTR_ADD1]], [[C1]](s64)
+  ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from %ir.gep2)
+  ; CHECK-NEXT:   [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD]], [[LOAD1]]
+  ; CHECK-NEXT:   $w0 = COPY [[ADD]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $w0
+  ;
+  ; O3-LABEL: name: gep_nusw
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $w1, $x0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; O3-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+  ; O3-NEXT:   [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[COPY1]](s32)
+  ; O3-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+  ; O3-NEXT:   [[MUL:%[0-9]+]]:_(s64) = G_MUL [[SEXT]], [[C]]
+  ; O3-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[MUL]](s64)
+  ; O3-NEXT:   [[COPY2:%[0-9]+]]:_(p0) = COPY [[PTR_ADD]](p0)
+  ; O3-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY2]](p0) :: (load (s32) from %ir.gep1)
+  ; O3-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+  ; O3-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p0) = nsw G_PTR_ADD [[PTR_ADD]], [[C1]](s64)
+  ; O3-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32) from %ir.gep2)
+  ; O3-NEXT:   [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD]], [[LOAD1]]
+  ; O3-NEXT:   $w0 = COPY [[ADD]](s32)
+  ; O3-NEXT:   RET_ReallyLR implicit $w0
+  %sidx = sext i32 %idx to i64
+  %gep1 = getelementptr inbounds [4 x i32], ptr %ptr, i64 %sidx, i64 0
+  %v1 = load i32, ptr %gep1
+  %gep2 = getelementptr nusw [4 x i32], ptr %ptr, i64 %sidx, i64 1
+  %v2 = load i32, ptr %gep2
+  %res = add i32 %v1, %v2
+  ret i32 %res
+ }
+
+define i32 @gep_none(ptr %ptr, i32 %idx) {
+  ; CHECK-LABEL: name: gep_none
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $w1, $x0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+  ; CHECK-NEXT:   [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[COPY1]](s32)
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+  ; CHECK-NEXT:   [[MUL:%[0-9]+]]:_(s64) = G_MUL [[SEXT]], [[C]]
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[MUL]](s64)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(p0) = COPY [[PTR_ADD]](p0)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY2]](p0) :: (load (s32) from %ir.gep1)
+  ; CHECK-NEXT:   [[MUL1:%[0-9]+]]:_(s64) = G_MUL [[SEXT]], [[C]]
+  ; CHECK-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[MUL1]](s64)
+  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+  ; CHECK-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64)
+  ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from %ir.gep2)
+  ; CHECK-NEXT:   [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD]], [[LOAD1]]
+  ; CHECK-NEXT:   $w0 = COPY [[ADD]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $w0
+  ;
+  ; O3-LABEL: name: gep_none
+  ; O3: bb.1 (%ir-block.0):
+  ; O3-NEXT:   liveins: $w1, $x0
+  ; O3-NEXT: {{  $}}
+  ; O3-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; O3-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+  ; O3-NEXT:   [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[COPY1]](s32)
+  ; O3-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+  ; O3-NEXT:   [[MUL:%[0-9]+]]:_(s64) = G_MUL [[SEXT]], [[C]]
+  ; O3-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[MUL]](s64)
+  ; O3-NEXT:   [[COPY2:%[0-9]+]]:_(p0) = COPY [[PTR_ADD]](p0)
+  ; O3-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY2]](p0) :: (load (s32) from %ir.gep1)
+  ; O3-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+  ; O3-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD]], [[C1]](s64)
+  ; O3-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32) from %ir.gep2)
+  ; O3-NEXT:   [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD]], [[LOAD1]]
+  ; O3-NEXT:   $w0 = COPY [[ADD]](s32)
+  ; O3-NEXT:   RET_ReallyLR implicit $w0
+  %sidx = sext i32 %idx to i64
+  %gep1 = getelementptr inbounds [4 x i32], ptr %ptr, i64 %sidx, i64 0
+  %v1 = load i32, ptr %gep1
+  %gep2 = getelementptr [4 x i32], ptr %ptr, i64 %sidx, i64 1
+  %v2 = load i32, ptr %gep2
+  %res = add i32 %v1, %v2
+  ret i32 %res
+ }

>From c4c1484f32366cca12be94cba76e24f71e1778ad Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= <schuett at gmail.com>
Date: Thu, 30 May 2024 22:10:11 +0200
Subject: [PATCH 2/3] add nusw

---
 llvm/include/llvm/CodeGen/MachineInstr.h | 2 ++
 llvm/lib/CodeGen/MIRParser/MILexer.cpp   | 1 +
 llvm/lib/CodeGen/MIRParser/MILexer.h     | 1 +
 llvm/lib/CodeGen/MIRPrinter.cpp          | 2 ++
 llvm/lib/CodeGen/MachineInstr.cpp        | 2 +-
 5 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/llvm/include/llvm/CodeGen/MachineInstr.h b/llvm/include/llvm/CodeGen/MachineInstr.h
index db48a0ae55145..b3cb5c8b84839 100644
--- a/llvm/include/llvm/CodeGen/MachineInstr.h
+++ b/llvm/include/llvm/CodeGen/MachineInstr.h
@@ -117,6 +117,8 @@ class MachineInstr
     NoConvergent = 1 << 17,  // Call does not require convergence guarantees.
     NonNeg = 1 << 18,        // The operand is non-negative.
     Disjoint = 1 << 19,      // Each bit is zero in at least one of the inputs.
+    NoUSWrap = 1 << 20,      // Instruction supports geps
+                             // no unsigned signed wrap.
   };
 
 private:
diff --git a/llvm/lib/CodeGen/MIRParser/MILexer.cpp b/llvm/lib/CodeGen/MIRParser/MILexer.cpp
index 7bb2165532047..114f0e8a57108 100644
--- a/llvm/lib/CodeGen/MIRParser/MILexer.cpp
+++ b/llvm/lib/CodeGen/MIRParser/MILexer.cpp
@@ -212,6 +212,7 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) {
       .Case("reassoc", MIToken::kw_reassoc)
       .Case("nuw", MIToken::kw_nuw)
       .Case("nsw", MIToken::kw_nsw)
+      .Case("nusw", MIToken::kw_nusw)
       .Case("exact", MIToken::kw_exact)
       .Case("nneg", MIToken::kw_nneg)
       .Case("disjoint", MIToken::kw_disjoint)
diff --git a/llvm/lib/CodeGen/MIRParser/MILexer.h b/llvm/lib/CodeGen/MIRParser/MILexer.h
index 6617ec68e9415..49a19896804ee 100644
--- a/llvm/lib/CodeGen/MIRParser/MILexer.h
+++ b/llvm/lib/CodeGen/MIRParser/MILexer.h
@@ -69,6 +69,7 @@ struct MIToken {
     kw_contract,
     kw_afn,
     kw_reassoc,
+    kw_nusw,
     kw_nuw,
     kw_nsw,
     kw_exact,
diff --git a/llvm/lib/CodeGen/MIRPrinter.cpp b/llvm/lib/CodeGen/MIRPrinter.cpp
index 26d534f369ae5..49993f7381ec7 100644
--- a/llvm/lib/CodeGen/MIRPrinter.cpp
+++ b/llvm/lib/CodeGen/MIRPrinter.cpp
@@ -813,6 +813,8 @@ void MIPrinter::print(const MachineInstr &MI) {
     OS << "nneg ";
   if (MI.getFlag(MachineInstr::Disjoint))
     OS << "disjoint ";
+  if (MI.getFlag(MachineInstr::NoUSWrap))
+    OS << "nusw ";
 
   OS << TII->getName(MI.getOpcode());
   if (I < E)
diff --git a/llvm/lib/CodeGen/MachineInstr.cpp b/llvm/lib/CodeGen/MachineInstr.cpp
index b3c0abe4688eb..198af9339c159 100644
--- a/llvm/lib/CodeGen/MachineInstr.cpp
+++ b/llvm/lib/CodeGen/MachineInstr.cpp
@@ -578,7 +578,7 @@ uint32_t MachineInstr::copyFlagsFromInstruction(const Instruction &I) {
       MIFlags |= MachineInstr::MIFlag::NoUWrap;
   } else if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&I)) {
     if (GEP->hasNoUnsignedSignedWrap())
-      MIFlags |= MachineInstr::MIFlag::NoSWrap;
+      MIFlags |= MachineInstr::MIFlag::NoUSWrap;
     if (GEP->hasNoUnsignedWrap())
       MIFlags |= MachineInstr::MIFlag::NoUWrap;
   }

>From ade2dbe7f9539ecdd02033b1abb7b2b4aca333e6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= <schuett at gmail.com>
Date: Thu, 30 May 2024 22:41:51 +0200
Subject: [PATCH 3/3] update poison flags

---
 .../llvm/CodeGen/GlobalISel/GenericMachineInstrs.h        | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
index 2b3efc3b609f0..70d959621403a 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
@@ -36,13 +36,13 @@ class GenericMachineInstr : public MachineInstr {
   }
 
   bool hasPoisonGeneratingFlags() const {
-    return getFlags() & (NoUWrap | NoSWrap | IsExact | Disjoint | NonNeg |
-                         FmNoNans | FmNoInfs);
+    return getFlags() & (NoUWrap | NoSWrap | NoUSWrap | IsExact | Disjoint |
+                         NonNeg | FmNoNans | FmNoInfs);
   }
 
   void dropPoisonGeneratingFlags() {
-    clearFlags(NoUWrap | NoSWrap | IsExact | Disjoint | NonNeg | FmNoNans |
-               FmNoInfs);
+    clearFlags(NoUWrap | NoSWrap | NoUSWrap | IsExact | Disjoint | NonNeg |
+               FmNoNans | FmNoInfs);
     assert(!hasPoisonGeneratingFlags());
   }
 };



More information about the llvm-commits mailing list