[llvm] [CGP] Eliminate noop bitcasts (PR #146961)
Philip Reames via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 3 14:09:07 PDT 2025
https://github.com/preames created https://github.com/llvm/llvm-project/pull/146961
The change itself is very straight forward, it's mostly just updating tests.
Somewhat surprisingly, this does cause some minor codegen differences in a few tests.
>From 902914a24ebf1ec252c124901b0a51cadd82ae38 Mon Sep 17 00:00:00 2001
From: Philip Reames <preames at rivosinc.com>
Date: Thu, 3 Jul 2025 11:21:36 -0700
Subject: [PATCH] [CGP] Eliminate noop bitcasts
The change itself is very straight forward, it's mostly just updating tests.
Somewhat surprisingly, this does cause some minor codegen differences in
a few tests.
---
llvm/lib/CodeGen/CodeGenPrepare.cpp | 7 +
llvm/test/CodeGen/AArch64/memcpy-scoped-aa.ll | 20 +-
.../CodeGen/AArch64/merge-scoped-aa-store.ll | 4 +-
.../irtranslator-call-return-values.ll | 4 +-
.../AMDGPU/GlobalISel/irtranslator-call.ll | 4 +-
.../GlobalISel/irtranslator-indirect-call.ll | 2 +-
.../GlobalISel/irtranslator-sibling-call.ll | 2 +-
.../branch-folding-implicit-def-subreg.ll | 4 +-
...al-regcopy-and-spill-missed-at-regalloc.ll | 8 +-
.../CodeGen/AMDGPU/spill-vector-superclass.ll | 2 +-
llvm/test/CodeGen/SystemZ/isel-debug.ll | 2 +-
llvm/test/CodeGen/Thumb2/active_lane_mask.ll | 5 +-
.../x86_64-irtranslator-struct-return.ll | 54 ++--
llvm/test/CodeGen/X86/memcpy-scoped-aa.ll | 36 +--
.../X86/merge-store-partially-alias-loads.ll | 2 +-
llvm/test/CodeGen/X86/pr44140.ll | 12 +-
.../CodeGen/X86/sse-intrinsics-fast-isel.ll | 280 ++++++++----------
.../statepoint-cmp-sunk-past-statepoint.ll | 8 +-
.../codegenprepare/sunk-addr.ll | 3 +-
llvm/test/DebugInfo/X86/bitcast-di.ll | 35 +--
.../X86/masked-gather-struct-gep.ll | 5 +-
.../CodeGenPrepare/X86/sink-addr-recreate.ll | 11 +-
.../CodeGenPrepare/X86/sink-addrmode-base.ll | 63 ++--
.../X86/sink-addrmode-two-phi.ll | 3 +-
24 files changed, 260 insertions(+), 316 deletions(-)
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 9bbb89e37865d..bb397c1c35bf8 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -8720,6 +8720,13 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, ModifyDT &ModifiedDT) {
if (isa<Constant>(CI->getOperand(0)))
return AnyChange;
+ // Remove noop bitcasts
+ if (isa<BitCastInst>(I) && I->getType() == I->getOperand(0)->getType()) {
+ replaceAllUsesWith(I, I->getOperand(0), FreshBBs, IsHugeFunc);
+ I->eraseFromParent();
+ return true;
+ }
+
if (OptimizeNoopCopyExpression(CI, *TLI, *DL))
return true;
diff --git a/llvm/test/CodeGen/AArch64/memcpy-scoped-aa.ll b/llvm/test/CodeGen/AArch64/memcpy-scoped-aa.ll
index 7dde168024278..f77d300adb2c7 100644
--- a/llvm/test/CodeGen/AArch64/memcpy-scoped-aa.ll
+++ b/llvm/test/CodeGen/AArch64/memcpy-scoped-aa.ll
@@ -9,8 +9,8 @@
; MIR-DAG: ![[SET1:[0-9]+]] = !{![[SCOPE1]]}
; MIR-LABEL: name: test_memcpy
-; MIR: %2:fpr128 = LDRQui %0, 1 :: (load (s128) from %ir.p1, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]])
-; MIR-NEXT: STRQui killed %2, %0, 0 :: (store (s128) into %ir.p0, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]])
+; MIR: %2:fpr128 = LDRQui %0, 1 :: (load (s128) from %ir.add.ptr, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]])
+; MIR-NEXT: STRQui killed %2, %0, 0 :: (store (s128) into %ir.p, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]])
define i32 @test_memcpy(ptr nocapture %p, ptr nocapture readonly %q) {
; CHECK-LABEL: test_memcpy:
; CHECK: // %bb.0:
@@ -32,8 +32,8 @@ define i32 @test_memcpy(ptr nocapture %p, ptr nocapture readonly %q) {
}
; MIR-LABEL: name: test_memcpy_inline
-; MIR: %2:fpr128 = LDRQui %0, 1 :: (load (s128) from %ir.p1, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]])
-; MIR-NEXT: STRQui killed %2, %0, 0 :: (store (s128) into %ir.p0, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]])
+; MIR: %2:fpr128 = LDRQui %0, 1 :: (load (s128) from %ir.add.ptr, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]])
+; MIR-NEXT: STRQui killed %2, %0, 0 :: (store (s128) into %ir.p, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]])
define i32 @test_memcpy_inline(ptr nocapture %p, ptr nocapture readonly %q) {
; CHECK-LABEL: test_memcpy_inline:
; CHECK: // %bb.0:
@@ -55,8 +55,8 @@ define i32 @test_memcpy_inline(ptr nocapture %p, ptr nocapture readonly %q) {
}
; MIR-LABEL: name: test_memmove
-; MIR: %2:fpr128 = LDRQui %0, 1 :: (load (s128) from %ir.p1, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]])
-; MIR-NEXT: STRQui killed %2, %0, 0 :: (store (s128) into %ir.p0, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]])
+; MIR: %2:fpr128 = LDRQui %0, 1 :: (load (s128) from %ir.add.ptr, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]])
+; MIR-NEXT: STRQui killed %2, %0, 0 :: (store (s128) into %ir.p, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]])
define i32 @test_memmove(ptr nocapture %p, ptr nocapture readonly %q) {
; CHECK-LABEL: test_memmove:
; CHECK: // %bb.0:
@@ -79,8 +79,8 @@ define i32 @test_memmove(ptr nocapture %p, ptr nocapture readonly %q) {
; MIR-LABEL: name: test_memset
; MIR: %2:gpr64 = MOVi64imm -6148914691236517206
-; MIR-NEXT: STRXui %2, %0, 1 :: (store (s64) into %ir.p0 + 8, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]])
-; MIR-NEXT: STRXui %2, %0, 0 :: (store (s64) into %ir.p0, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]])
+; MIR-NEXT: STRXui %2, %0, 1 :: (store (s64) into %ir.p + 8, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]])
+; MIR-NEXT: STRXui %2, %0, 0 :: (store (s64) into %ir.p, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]])
define i32 @test_memset(ptr nocapture %p, ptr nocapture readonly %q) {
; CHECK-LABEL: test_memset:
; CHECK: // %bb.0:
@@ -100,8 +100,8 @@ define i32 @test_memset(ptr nocapture %p, ptr nocapture readonly %q) {
}
; MIR-LABEL: name: test_mempcpy
-; MIR: %2:fpr128 = LDRQui %0, 1 :: (load (s128) from %ir.p1, align 1, !alias.scope ![[SET0]], !noalias ![[SET1]])
-; MIR-NEXT: STRQui killed %2, %0, 0 :: (store (s128) into %ir.p0, align 1, !alias.scope ![[SET0]], !noalias ![[SET1]])
+; MIR: %2:fpr128 = LDRQui %0, 1 :: (load (s128) from %ir.add.ptr, align 1, !alias.scope ![[SET0]], !noalias ![[SET1]])
+; MIR-NEXT: STRQui killed %2, %0, 0 :: (store (s128) into %ir.p, align 1, !alias.scope ![[SET0]], !noalias ![[SET1]])
define i32 @test_mempcpy(ptr nocapture %p, ptr nocapture readonly %q) {
; CHECK-LABEL: test_mempcpy:
; CHECK: // %bb.0:
diff --git a/llvm/test/CodeGen/AArch64/merge-scoped-aa-store.ll b/llvm/test/CodeGen/AArch64/merge-scoped-aa-store.ll
index 6b2a8c5aaf8b8..42fda4b4309af 100644
--- a/llvm/test/CodeGen/AArch64/merge-scoped-aa-store.ll
+++ b/llvm/test/CodeGen/AArch64/merge-scoped-aa-store.ll
@@ -21,7 +21,7 @@ define void @blam0(ptr %g0, ptr %g1) {
; MIR-NEXT: [[COPY:%[0-9]+]]:gpr64common = COPY $x1
; MIR-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY $x0
; MIR-NEXT: [[LDRDui:%[0-9]+]]:fpr64 = LDRDui [[COPY1]], 0 :: (load (s64) from %ir.g0, align 4, !alias.scope !0, !noalias !3)
- ; MIR-NEXT: STRDui killed [[LDRDui]], [[COPY]], 0 :: (store (s64) into %ir.tmp41, align 4, !alias.scope !3, !noalias !0)
+ ; MIR-NEXT: STRDui killed [[LDRDui]], [[COPY]], 0 :: (store (s64) into %ir.g1, align 4, !alias.scope !3, !noalias !0)
; MIR-NEXT: RET_ReallyLR
%tmp4 = getelementptr inbounds <3 x float>, ptr %g1, i64 0, i64 0
%tmp5 = load <3 x float>, ptr %g0, align 4, !alias.scope !0, !noalias !1
@@ -43,7 +43,7 @@ define void @blam1(ptr %g0, ptr %g1) {
; MIR-NEXT: [[COPY:%[0-9]+]]:gpr64common = COPY $x1
; MIR-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY $x0
; MIR-NEXT: [[LDRDui:%[0-9]+]]:fpr64 = LDRDui [[COPY1]], 0 :: (load (s64) from %ir.g0, align 4, !alias.scope !0, !noalias !3)
- ; MIR-NEXT: STRDui killed [[LDRDui]], [[COPY]], 0 :: (store (s64) into %ir.tmp41, align 4, !alias.scope !9, !noalias !10)
+ ; MIR-NEXT: STRDui killed [[LDRDui]], [[COPY]], 0 :: (store (s64) into %ir.g1, align 4, !alias.scope !9, !noalias !10)
; MIR-NEXT: RET_ReallyLR
%tmp4 = getelementptr inbounds <3 x float>, ptr %g1, i64 0, i64 0
%tmp5 = load <3 x float>, ptr %g0, align 4, !alias.scope !0, !noalias !1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll
index 96ee15f2eb78b..c7977c339880d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll
@@ -84,7 +84,7 @@ define amdgpu_kernel void @test_call_external_i32_func_i32_imm(ptr addrspace(1)
; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
; GCN-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
- ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (p1) from %ir.out.kernarg.offset1, align 16, addrspace 4)
+ ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (p1) from %ir.test_call_external_i32_func_i32_imm.kernarg.segment, align 16, addrspace 4)
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i32_func_i32
; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
@@ -2957,7 +2957,7 @@ define amdgpu_kernel void @test_call_external_v33i32_func_v33i32_i32(ptr addrspa
; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
- ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (p1) from %ir.p.kernarg.offset1, align 16, addrspace 4)
+ ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (p1) from %ir.test_call_external_v33i32_func_v33i32_i32.kernarg.segment, align 16, addrspace 4)
; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; GCN-NEXT: %18:_(p4) = nuw nusw G_PTR_ADD [[INT]], [[C]](s64)
; GCN-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD %18(p4) :: (dereferenceable invariant load (s32) from %ir.idx.kernarg.offset, align 8, addrspace 4)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll
index 92106d7e1d60d..862093ebf9416 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll
@@ -1350,7 +1350,7 @@ define amdgpu_kernel void @test_call_external_void_func_p0_imm(ptr %arg) #0 {
; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
- ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (p0) from %ir.arg.kernarg.offset1, align 16, addrspace 4)
+ ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (p0) from %ir.test_call_external_void_func_p0_imm.kernarg.segment, align 16, addrspace 4)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_p0
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
@@ -4654,7 +4654,7 @@ define amdgpu_kernel void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val
; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
- ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (<32 x s32>) from %ir.val.kernarg.offset1, align 16, addrspace 4)
+ ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (<32 x s32>) from %ir.stack_passed_arg_alignment_v32i32_f64.kernarg.segment, align 16, addrspace 4)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 128
; CHECK-NEXT: %18:_(p4) = nuw nusw G_PTR_ADD [[INT]], [[C]](s64)
; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD %18(p4) :: (dereferenceable invariant load (s64) from %ir.tmp.kernarg.offset, align 16, addrspace 4)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll
index ac0d5ee78666e..bdf89f785ef99 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll
@@ -17,7 +17,7 @@ define amdgpu_kernel void @test_indirect_call_sgpr_ptr(ptr %fptr) {
; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
- ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (p0) from %ir.fptr.kernarg.offset1, align 16, addrspace 4)
+ ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (p0) from %ir.test_indirect_call_sgpr_ptr.kernarg.segment, align 16, addrspace 4)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll
index ca580d8f29c84..a3f5d927d4b80 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll
@@ -140,7 +140,7 @@ define amdgpu_kernel void @kernel_call_i32_fastcc_i32_i32_unused_result(i32 %a,
; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; GCN-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
- ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (<2 x s32>) from %ir.a.kernarg.offset1, align 16, addrspace 4)
+ ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (<2 x s32>) from %ir.kernel_call_i32_fastcc_i32_i32_unused_result.kernarg.segment, align 16, addrspace 4)
; GCN-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<2 x s32>), [[C]](s32)
; GCN-NEXT: [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<2 x s32>), [[C1]](s32)
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
diff --git a/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll b/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll
index ae90cfb631e8d..412e3569c0d05 100644
--- a/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll
+++ b/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll
@@ -16,8 +16,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: renamable $sgpr17 = S_LOAD_DWORD_IMM renamable $sgpr8_sgpr9, 24, 0 :: (dereferenceable invariant load (s32) from %ir.arg4.kernarg.offset.align.down, align 8, addrspace 4)
; GFX90A-NEXT: early-clobber renamable $sgpr20_sgpr21_sgpr22_sgpr23 = S_LOAD_DWORDX4_IMM_ec renamable $sgpr8_sgpr9, 24, 0 :: (dereferenceable invariant load (s128) from %ir.arg6.kernarg.offset.align.down, align 8, addrspace 4)
; GFX90A-NEXT: renamable $sgpr33 = S_LOAD_DWORD_IMM renamable $sgpr8_sgpr9, 40, 0 :: (dereferenceable invariant load (s32) from %ir.arg6.kernarg.offset.align.down + 16, align 8, addrspace 4)
- ; GFX90A-NEXT: renamable $sgpr24_sgpr25_sgpr26_sgpr27 = S_LOAD_DWORDX4_IMM renamable $sgpr8_sgpr9, 0, 0 :: (dereferenceable invariant load (s128) from %ir.arg.kernarg.offset1, addrspace 4)
- ; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_LOAD_DWORDX2_IMM renamable $sgpr8_sgpr9, 16, 0 :: (dereferenceable invariant load (s64) from %ir.arg.kernarg.offset1 + 16, align 16, addrspace 4)
+ ; GFX90A-NEXT: renamable $sgpr24_sgpr25_sgpr26_sgpr27 = S_LOAD_DWORDX4_IMM renamable $sgpr8_sgpr9, 0, 0 :: (dereferenceable invariant load (s128) from %ir.f1.kernarg.segment, addrspace 4)
+ ; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_LOAD_DWORDX2_IMM renamable $sgpr8_sgpr9, 16, 0 :: (dereferenceable invariant load (s64) from %ir.f1.kernarg.segment + 16, align 16, addrspace 4)
; GFX90A-NEXT: S_BITCMP1_B32 renamable $sgpr17, 0, implicit-def $scc
; GFX90A-NEXT: renamable $sgpr12_sgpr13 = S_CSELECT_B64 -1, 0, implicit killed $scc
; GFX90A-NEXT: renamable $sgpr34_sgpr35 = S_MOV_B64 -1
diff --git a/llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll b/llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll
index 663fd98b46bf7..03d3724c232fa 100644
--- a/llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll
+++ b/llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll
@@ -15,7 +15,7 @@ define amdgpu_kernel void @partial_copy(<4 x i32> %arg) #0 {
; REGALLOC-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6094858 /* regdef:VReg_128 */, def %7
; REGALLOC-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3538954 /* regdef:VReg_64 */, def %8
; REGALLOC-GFX908-NEXT: GLOBAL_STORE_DWORDX4 undef %15:vreg_64, %7, 0, 0, implicit $exec :: (volatile store (s128) into `ptr addrspace(1) poison`, addrspace 1)
- ; REGALLOC-GFX908-NEXT: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (s128) from %ir.arg.kernarg.offset1, addrspace 4)
+ ; REGALLOC-GFX908-NEXT: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (s128) from %ir.partial_copy.kernarg.segment, addrspace 4)
; REGALLOC-GFX908-NEXT: [[COPY:%[0-9]+]]:areg_128 = COPY killed renamable $sgpr0_sgpr1_sgpr2_sgpr3
; REGALLOC-GFX908-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
; REGALLOC-GFX908-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2, implicit $exec
@@ -40,7 +40,7 @@ define amdgpu_kernel void @partial_copy(<4 x i32> %arg) #0 {
; PEI-GFX908-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit killed $vgpr0_vgpr1
; PEI-GFX908-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = COPY killed renamable $agpr0_agpr1_agpr2_agpr3, implicit $exec
; PEI-GFX908-NEXT: GLOBAL_STORE_DWORDX4 undef renamable $vgpr0_vgpr1, killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, implicit $exec :: (volatile store (s128) into `ptr addrspace(1) poison`, addrspace 1)
- ; PEI-GFX908-NEXT: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (s128) from %ir.arg.kernarg.offset1, addrspace 4)
+ ; PEI-GFX908-NEXT: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (s128) from %ir.partial_copy.kernarg.segment, addrspace 4)
; PEI-GFX908-NEXT: renamable $agpr0_agpr1_agpr2_agpr3 = COPY killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec
; PEI-GFX908-NEXT: renamable $vgpr0 = V_MOV_B32_e32 1, implicit $exec
; PEI-GFX908-NEXT: renamable $vgpr1 = V_MOV_B32_e32 2, implicit $exec
@@ -60,7 +60,7 @@ define amdgpu_kernel void @partial_copy(<4 x i32> %arg) #0 {
; REGALLOC-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6422538 /* regdef:VReg_128_Align2 */, def %7
; REGALLOC-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3866634 /* regdef:VReg_64_Align2 */, def %8
; REGALLOC-GFX90A-NEXT: GLOBAL_STORE_DWORDX4 undef %15:vreg_64_align2, %7, 0, 0, implicit $exec :: (volatile store (s128) into `ptr addrspace(1) poison`, addrspace 1)
- ; REGALLOC-GFX90A-NEXT: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (s128) from %ir.arg.kernarg.offset1, addrspace 4)
+ ; REGALLOC-GFX90A-NEXT: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (s128) from %ir.partial_copy.kernarg.segment, addrspace 4)
; REGALLOC-GFX90A-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY killed renamable $sgpr0_sgpr1_sgpr2_sgpr3
; REGALLOC-GFX90A-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
; REGALLOC-GFX90A-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2, implicit $exec
@@ -83,7 +83,7 @@ define amdgpu_kernel void @partial_copy(<4 x i32> %arg) #0 {
; PEI-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr12_sgpr13_sgpr14_sgpr15, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store (s32) into %stack.0, addrspace 5)
; PEI-GFX90A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit killed $vgpr0_vgpr1
; PEI-GFX90A-NEXT: GLOBAL_STORE_DWORDX4 undef renamable $vgpr0_vgpr1, killed renamable $agpr0_agpr1_agpr2_agpr3, 0, 0, implicit $exec :: (volatile store (s128) into `ptr addrspace(1) poison`, addrspace 1)
- ; PEI-GFX90A-NEXT: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (s128) from %ir.arg.kernarg.offset1, addrspace 4)
+ ; PEI-GFX90A-NEXT: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (s128) from %ir.partial_copy.kernarg.segment, addrspace 4)
; PEI-GFX90A-NEXT: renamable $agpr0_agpr1_agpr2_agpr3 = COPY killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec
; PEI-GFX90A-NEXT: renamable $vgpr0 = V_MOV_B32_e32 1, implicit $exec
; PEI-GFX90A-NEXT: renamable $vgpr1 = V_MOV_B32_e32 2, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/spill-vector-superclass.ll b/llvm/test/CodeGen/AMDGPU/spill-vector-superclass.ll
index bd255e88b9512..5f1444be050b6 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-vector-superclass.ll
+++ b/llvm/test/CodeGen/AMDGPU/spill-vector-superclass.ll
@@ -7,7 +7,7 @@ define amdgpu_kernel void @test_spill_av_class(<4 x i32> %arg) #0 {
; GCN: bb.0 (%ir-block.0):
; GCN-NEXT: liveins: $sgpr8_sgpr9
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM killed renamable $sgpr8_sgpr9, 0, 0 :: (dereferenceable invariant load (s128) from %ir.arg.kernarg.offset1, addrspace 4)
+ ; GCN-NEXT: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM killed renamable $sgpr8_sgpr9, 0, 0 :: (dereferenceable invariant load (s128) from %ir.test_spill_av_class.kernarg.segment, addrspace 4)
; GCN-NEXT: [[COPY:%[0-9]+]]:areg_128 = COPY killed renamable $sgpr0_sgpr1_sgpr2_sgpr3
; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
; GCN-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2, implicit $exec
diff --git a/llvm/test/CodeGen/SystemZ/isel-debug.ll b/llvm/test/CodeGen/SystemZ/isel-debug.ll
index 120a0e08ae9fa..753e29cbfaf54 100644
--- a/llvm/test/CodeGen/SystemZ/isel-debug.ll
+++ b/llvm/test/CodeGen/SystemZ/isel-debug.ll
@@ -5,7 +5,7 @@
;
; Check that some debug output is printed without problems.
; CHECK: SystemZAddressingMode
-; CHECK: Base t5: i64,ch = load<(load (s64) from %ir.0)>
+; CHECK: Base t5: i64,ch = load<(load (s64) from %ir.ptr)>
; CHECK: Index
; CHECK: Disp
diff --git a/llvm/test/CodeGen/Thumb2/active_lane_mask.ll b/llvm/test/CodeGen/Thumb2/active_lane_mask.ll
index bcd92f81911b2..61837714fc82c 100644
--- a/llvm/test/CodeGen/Thumb2/active_lane_mask.ll
+++ b/llvm/test/CodeGen/Thumb2/active_lane_mask.ll
@@ -299,15 +299,14 @@ define void @test_width2(ptr nocapture readnone %x, ptr nocapture %y, i8 zeroext
; CHECK-NEXT: movs r0, #0
; CHECK-NEXT: rsbs r3, r3, #0
; CHECK-NEXT: bfi r0, r12, #0, #1
-; CHECK-NEXT: sub.w r12, r1, #8
; CHECK-NEXT: bfi r0, r3, #1, #1
; CHECK-NEXT: lsls r3, r0, #31
; CHECK-NEXT: itt ne
-; CHECK-NEXT: ldrne.w r3, [r12]
+; CHECK-NEXT: ldrne r3, [r1, #-8]
; CHECK-NEXT: vmovne.32 q0[0], r3
; CHECK-NEXT: lsls r0, r0, #30
; CHECK-NEXT: itt mi
-; CHECK-NEXT: ldrmi.w r0, [r12, #4]
+; CHECK-NEXT: ldrmi r0, [r1, #-4]
; CHECK-NEXT: vmovmi.32 q0[2], r0
; CHECK-NEXT: vmrs r3, p0
; CHECK-NEXT: and r0, r3, #1
diff --git a/llvm/test/CodeGen/X86/GlobalISel/x86_64-irtranslator-struct-return.ll b/llvm/test/CodeGen/X86/GlobalISel/x86_64-irtranslator-struct-return.ll
index 171ccb287f2b9..49977b488d84a 100644
--- a/llvm/test/CodeGen/X86/GlobalISel/x86_64-irtranslator-struct-return.ll
+++ b/llvm/test/CodeGen/X86/GlobalISel/x86_64-irtranslator-struct-return.ll
@@ -18,9 +18,9 @@ define float @test_return_f1(float %f.coerce) {
; ALL-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; ALL-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.retval
; ALL-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1.f
- ; ALL-NEXT: G_STORE [[COPY]](s32), [[FRAME_INDEX1]](p0) :: (store (s32) into %ir.coerce.dive2)
- ; ALL-NEXT: G_MEMCPY [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0 :: (store (s8) into %ir.0, align 4), (load (s8) from %ir.1, align 4)
- ; ALL-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load (s32) from %ir.coerce.dive13)
+ ; ALL-NEXT: G_STORE [[COPY]](s32), [[FRAME_INDEX1]](p0) :: (store (s32) into %ir.f)
+ ; ALL-NEXT: G_MEMCPY [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0 :: (store (s8) into %ir.retval, align 4), (load (s8) from %ir.f, align 4)
+ ; ALL-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load (s32) from %ir.retval)
; ALL-NEXT: $xmm0 = COPY [[LOAD]](s32)
; ALL-NEXT: RET 0, implicit $xmm0
entry:
@@ -47,9 +47,9 @@ define double @test_return_d1(double %d.coerce) {
; ALL-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; ALL-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.retval
; ALL-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1.d
- ; ALL-NEXT: G_STORE [[COPY]](s64), [[FRAME_INDEX1]](p0) :: (store (s64) into %ir.coerce.dive2)
- ; ALL-NEXT: G_MEMCPY [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0 :: (store (s8) into %ir.0, align 8), (load (s8) from %ir.1, align 8)
- ; ALL-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load (s64) from %ir.coerce.dive13)
+ ; ALL-NEXT: G_STORE [[COPY]](s64), [[FRAME_INDEX1]](p0) :: (store (s64) into %ir.d)
+ ; ALL-NEXT: G_MEMCPY [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0 :: (store (s8) into %ir.retval, align 8), (load (s8) from %ir.d, align 8)
+ ; ALL-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load (s64) from %ir.retval)
; ALL-NEXT: $xmm0 = COPY [[LOAD]](s64)
; ALL-NEXT: RET 0, implicit $xmm0
entry:
@@ -75,14 +75,14 @@ define { double, double } @test_return_d2(double %d.coerce0, double %d.coerce1)
; ALL-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; ALL-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.retval
; ALL-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1.d
- ; ALL-NEXT: G_STORE [[COPY]](s64), [[FRAME_INDEX1]](p0) :: (store (s64) into %ir.1)
+ ; ALL-NEXT: G_STORE [[COPY]](s64), [[FRAME_INDEX1]](p0) :: (store (s64) into %ir.d)
; ALL-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; ALL-NEXT: %5:_(p0) = nuw nusw G_PTR_ADD [[FRAME_INDEX1]], [[C1]](s64)
- ; ALL-NEXT: G_STORE [[COPY1]](s64), %5(p0) :: (store (s64) into %ir.2)
- ; ALL-NEXT: G_MEMCPY [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0 :: (store (s8) into %ir.3, align 8), (load (s8) from %ir.4, align 8)
- ; ALL-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load (s64) from %ir.5)
+ ; ALL-NEXT: G_STORE [[COPY1]](s64), %5(p0) :: (store (s64) into %ir.0)
+ ; ALL-NEXT: G_MEMCPY [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0 :: (store (s8) into %ir.retval, align 8), (load (s8) from %ir.d, align 8)
+ ; ALL-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load (s64) from %ir.retval)
; ALL-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C1]](s64)
- ; ALL-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (dereferenceable load (s64) from %ir.5 + 8)
+ ; ALL-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (dereferenceable load (s64) from %ir.retval + 8)
; ALL-NEXT: $xmm0 = COPY [[LOAD]](s64)
; ALL-NEXT: $xmm1 = COPY [[LOAD1]](s64)
; ALL-NEXT: RET 0, implicit $xmm0, implicit $xmm1
@@ -111,9 +111,9 @@ define i32 @test_return_i1(i32 %i.coerce) {
; ALL-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; ALL-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.retval
; ALL-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1.i
- ; ALL-NEXT: G_STORE [[COPY]](s32), [[FRAME_INDEX1]](p0) :: (store (s32) into %ir.coerce.dive2)
- ; ALL-NEXT: G_MEMCPY [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0 :: (store (s8) into %ir.0, align 4), (load (s8) from %ir.1, align 4)
- ; ALL-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load (s32) from %ir.coerce.dive13)
+ ; ALL-NEXT: G_STORE [[COPY]](s32), [[FRAME_INDEX1]](p0) :: (store (s32) into %ir.i)
+ ; ALL-NEXT: G_MEMCPY [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0 :: (store (s8) into %ir.retval, align 4), (load (s8) from %ir.i, align 4)
+ ; ALL-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load (s32) from %ir.retval)
; ALL-NEXT: $eax = COPY [[LOAD]](s32)
; ALL-NEXT: RET 0, implicit $eax
entry:
@@ -138,9 +138,9 @@ define i64 @test_return_i2(i64 %i.coerce) {
; ALL-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; ALL-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.retval
; ALL-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1.i
- ; ALL-NEXT: G_STORE [[COPY]](s64), [[FRAME_INDEX1]](p0) :: (store (s64) into %ir.0, align 4)
- ; ALL-NEXT: G_MEMCPY [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0 :: (store (s8) into %ir.1, align 4), (load (s8) from %ir.2, align 4)
- ; ALL-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load (s64) from %ir.3, align 4)
+ ; ALL-NEXT: G_STORE [[COPY]](s64), [[FRAME_INDEX1]](p0) :: (store (s64) into %ir.i, align 4)
+ ; ALL-NEXT: G_MEMCPY [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0 :: (store (s8) into %ir.retval, align 4), (load (s8) from %ir.i, align 4)
+ ; ALL-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load (s64) from %ir.retval, align 4)
; ALL-NEXT: $rax = COPY [[LOAD]](s64)
; ALL-NEXT: RET 0, implicit $rax
entry:
@@ -168,13 +168,13 @@ define { i64, i32 } @test_return_i3(i64 %i.coerce0, i32 %i.coerce1) {
; ALL-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1.i
; ALL-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.2.coerce
; ALL-NEXT: [[FRAME_INDEX3:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.3.tmp
- ; ALL-NEXT: G_STORE [[COPY]](s64), [[FRAME_INDEX2]](p0) :: (store (s64) into %ir.0, align 4)
+ ; ALL-NEXT: G_STORE [[COPY]](s64), [[FRAME_INDEX2]](p0) :: (store (s64) into %ir.coerce, align 4)
; ALL-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; ALL-NEXT: %7:_(p0) = nuw nusw G_PTR_ADD [[FRAME_INDEX2]], [[C1]](s64)
- ; ALL-NEXT: G_STORE [[COPY1]](s32), %7(p0) :: (store (s32) into %ir.1)
- ; ALL-NEXT: G_MEMCPY [[FRAME_INDEX1]](p0), [[FRAME_INDEX2]](p0), [[C]](s64), 0 :: (store (s8) into %ir.2, align 4), (load (s8) from %ir.3, align 4)
- ; ALL-NEXT: G_MEMCPY [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0 :: (store (s8) into %ir.4, align 4), (load (s8) from %ir.5, align 4)
- ; ALL-NEXT: G_MEMCPY [[FRAME_INDEX3]](p0), [[FRAME_INDEX]](p0), [[C]](s64), 0 :: (store (s8) into %ir.6, align 8), (load (s8) from %ir.7, align 4)
+ ; ALL-NEXT: G_STORE [[COPY1]](s32), %7(p0) :: (store (s32) into %ir.0)
+ ; ALL-NEXT: G_MEMCPY [[FRAME_INDEX1]](p0), [[FRAME_INDEX2]](p0), [[C]](s64), 0 :: (store (s8) into %ir.i, align 4), (load (s8) from %ir.coerce, align 4)
+ ; ALL-NEXT: G_MEMCPY [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0 :: (store (s8) into %ir.retval, align 4), (load (s8) from %ir.i, align 4)
+ ; ALL-NEXT: G_MEMCPY [[FRAME_INDEX3]](p0), [[FRAME_INDEX]](p0), [[C]](s64), 0 :: (store (s8) into %ir.tmp, align 8), (load (s8) from %ir.retval, align 4)
; ALL-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX3]](p0) :: (dereferenceable load (s64) from %ir.tmp)
; ALL-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX3]], [[C1]](s64)
; ALL-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (dereferenceable load (s32) from %ir.tmp + 8, align 8)
@@ -213,14 +213,14 @@ define { i64, i64 } @test_return_i4(i64 %i.coerce0, i64 %i.coerce1) {
; ALL-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; ALL-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.retval
; ALL-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1.i
- ; ALL-NEXT: G_STORE [[COPY]](s64), [[FRAME_INDEX1]](p0) :: (store (s64) into %ir.1, align 4)
+ ; ALL-NEXT: G_STORE [[COPY]](s64), [[FRAME_INDEX1]](p0) :: (store (s64) into %ir.i, align 4)
; ALL-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; ALL-NEXT: %5:_(p0) = nuw nusw G_PTR_ADD [[FRAME_INDEX1]], [[C1]](s64)
- ; ALL-NEXT: G_STORE [[COPY1]](s64), %5(p0) :: (store (s64) into %ir.2, align 4)
- ; ALL-NEXT: G_MEMCPY [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0 :: (store (s8) into %ir.3, align 4), (load (s8) from %ir.4, align 4)
- ; ALL-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load (s64) from %ir.5, align 4)
+ ; ALL-NEXT: G_STORE [[COPY1]](s64), %5(p0) :: (store (s64) into %ir.0, align 4)
+ ; ALL-NEXT: G_MEMCPY [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0 :: (store (s8) into %ir.retval, align 4), (load (s8) from %ir.i, align 4)
+ ; ALL-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load (s64) from %ir.retval, align 4)
; ALL-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C1]](s64)
- ; ALL-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (dereferenceable load (s64) from %ir.5 + 8, align 4)
+ ; ALL-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (dereferenceable load (s64) from %ir.retval + 8, align 4)
; ALL-NEXT: $rax = COPY [[LOAD]](s64)
; ALL-NEXT: $rdx = COPY [[LOAD1]](s64)
; ALL-NEXT: RET 0, implicit $rax, implicit $rdx
diff --git a/llvm/test/CodeGen/X86/memcpy-scoped-aa.ll b/llvm/test/CodeGen/X86/memcpy-scoped-aa.ll
index d3b86786a630c..2df4fedeb6770 100644
--- a/llvm/test/CodeGen/X86/memcpy-scoped-aa.ll
+++ b/llvm/test/CodeGen/X86/memcpy-scoped-aa.ll
@@ -18,10 +18,10 @@ define i32 @test_memcpy(ptr nocapture %p, ptr nocapture readonly %q) {
; MIR-NEXT: {{ $}}
; MIR-NEXT: [[COPY:%[0-9]+]]:gr64 = COPY $rsi
; MIR-NEXT: [[COPY1:%[0-9]+]]:gr64 = COPY $rdi
- ; MIR-NEXT: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY1]], 1, $noreg, 16, $noreg :: (load (s64) from %ir.p1, align 4, !alias.scope !0, !noalias !3)
- ; MIR-NEXT: [[MOV64rm1:%[0-9]+]]:gr64 = MOV64rm [[COPY1]], 1, $noreg, 24, $noreg :: (load (s64) from %ir.p1 + 8, align 4, !alias.scope !0, !noalias !3)
- ; MIR-NEXT: MOV64mr [[COPY1]], 1, $noreg, 8, $noreg, killed [[MOV64rm1]] :: (store (s64) into %ir.p0 + 8, align 4, !alias.scope !0, !noalias !3)
- ; MIR-NEXT: MOV64mr [[COPY1]], 1, $noreg, 0, $noreg, killed [[MOV64rm]] :: (store (s64) into %ir.p0, align 4, !alias.scope !0, !noalias !3)
+ ; MIR-NEXT: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY1]], 1, $noreg, 16, $noreg :: (load (s64) from %ir.add.ptr, align 4, !alias.scope !0, !noalias !3)
+ ; MIR-NEXT: [[MOV64rm1:%[0-9]+]]:gr64 = MOV64rm [[COPY1]], 1, $noreg, 24, $noreg :: (load (s64) from %ir.add.ptr + 8, align 4, !alias.scope !0, !noalias !3)
+ ; MIR-NEXT: MOV64mr [[COPY1]], 1, $noreg, 8, $noreg, killed [[MOV64rm1]] :: (store (s64) into %ir.p + 8, align 4, !alias.scope !0, !noalias !3)
+ ; MIR-NEXT: MOV64mr [[COPY1]], 1, $noreg, 0, $noreg, killed [[MOV64rm]] :: (store (s64) into %ir.p, align 4, !alias.scope !0, !noalias !3)
; MIR-NEXT: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.q, !alias.scope !3, !noalias !0)
; MIR-NEXT: [[ADD32rm:%[0-9]+]]:gr32 = ADD32rm [[MOV32rm]], [[COPY]], 1, $noreg, 4, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.q1, !alias.scope !3, !noalias !0)
; MIR-NEXT: $eax = COPY [[ADD32rm]]
@@ -44,10 +44,10 @@ define i32 @test_memcpy_inline(ptr nocapture %p, ptr nocapture readonly %q) {
; MIR-NEXT: {{ $}}
; MIR-NEXT: [[COPY:%[0-9]+]]:gr64 = COPY $rsi
; MIR-NEXT: [[COPY1:%[0-9]+]]:gr64 = COPY $rdi
- ; MIR-NEXT: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY1]], 1, $noreg, 16, $noreg :: (load (s64) from %ir.p1, align 4, !alias.scope !0, !noalias !3)
- ; MIR-NEXT: [[MOV64rm1:%[0-9]+]]:gr64 = MOV64rm [[COPY1]], 1, $noreg, 24, $noreg :: (load (s64) from %ir.p1 + 8, align 4, !alias.scope !0, !noalias !3)
- ; MIR-NEXT: MOV64mr [[COPY1]], 1, $noreg, 8, $noreg, killed [[MOV64rm1]] :: (store (s64) into %ir.p0 + 8, align 4, !alias.scope !0, !noalias !3)
- ; MIR-NEXT: MOV64mr [[COPY1]], 1, $noreg, 0, $noreg, killed [[MOV64rm]] :: (store (s64) into %ir.p0, align 4, !alias.scope !0, !noalias !3)
+ ; MIR-NEXT: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY1]], 1, $noreg, 16, $noreg :: (load (s64) from %ir.add.ptr, align 4, !alias.scope !0, !noalias !3)
+ ; MIR-NEXT: [[MOV64rm1:%[0-9]+]]:gr64 = MOV64rm [[COPY1]], 1, $noreg, 24, $noreg :: (load (s64) from %ir.add.ptr + 8, align 4, !alias.scope !0, !noalias !3)
+ ; MIR-NEXT: MOV64mr [[COPY1]], 1, $noreg, 8, $noreg, killed [[MOV64rm1]] :: (store (s64) into %ir.p + 8, align 4, !alias.scope !0, !noalias !3)
+ ; MIR-NEXT: MOV64mr [[COPY1]], 1, $noreg, 0, $noreg, killed [[MOV64rm]] :: (store (s64) into %ir.p, align 4, !alias.scope !0, !noalias !3)
; MIR-NEXT: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.q, !alias.scope !3, !noalias !0)
; MIR-NEXT: [[ADD32rm:%[0-9]+]]:gr32 = ADD32rm [[MOV32rm]], [[COPY]], 1, $noreg, 4, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.q1, !alias.scope !3, !noalias !0)
; MIR-NEXT: $eax = COPY [[ADD32rm]]
@@ -70,10 +70,10 @@ define i32 @test_memmove(ptr nocapture %p, ptr nocapture readonly %q) {
; MIR-NEXT: {{ $}}
; MIR-NEXT: [[COPY:%[0-9]+]]:gr64 = COPY $rsi
; MIR-NEXT: [[COPY1:%[0-9]+]]:gr64 = COPY $rdi
- ; MIR-NEXT: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY1]], 1, $noreg, 16, $noreg :: (load (s64) from %ir.p1, align 4, !alias.scope !0, !noalias !3)
- ; MIR-NEXT: [[MOV64rm1:%[0-9]+]]:gr64 = MOV64rm [[COPY1]], 1, $noreg, 24, $noreg :: (load (s64) from %ir.p1 + 8, align 4, !alias.scope !0, !noalias !3)
- ; MIR-NEXT: MOV64mr [[COPY1]], 1, $noreg, 0, $noreg, killed [[MOV64rm]] :: (store (s64) into %ir.p0, align 4, !alias.scope !0, !noalias !3)
- ; MIR-NEXT: MOV64mr [[COPY1]], 1, $noreg, 8, $noreg, killed [[MOV64rm1]] :: (store (s64) into %ir.p0 + 8, align 4, !alias.scope !0, !noalias !3)
+ ; MIR-NEXT: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY1]], 1, $noreg, 16, $noreg :: (load (s64) from %ir.add.ptr, align 4, !alias.scope !0, !noalias !3)
+ ; MIR-NEXT: [[MOV64rm1:%[0-9]+]]:gr64 = MOV64rm [[COPY1]], 1, $noreg, 24, $noreg :: (load (s64) from %ir.add.ptr + 8, align 4, !alias.scope !0, !noalias !3)
+ ; MIR-NEXT: MOV64mr [[COPY1]], 1, $noreg, 0, $noreg, killed [[MOV64rm]] :: (store (s64) into %ir.p, align 4, !alias.scope !0, !noalias !3)
+ ; MIR-NEXT: MOV64mr [[COPY1]], 1, $noreg, 8, $noreg, killed [[MOV64rm1]] :: (store (s64) into %ir.p + 8, align 4, !alias.scope !0, !noalias !3)
; MIR-NEXT: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.q, !alias.scope !3, !noalias !0)
; MIR-NEXT: [[ADD32rm:%[0-9]+]]:gr32 = ADD32rm [[MOV32rm]], [[COPY]], 1, $noreg, 4, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.q1, !alias.scope !3, !noalias !0)
; MIR-NEXT: $eax = COPY [[ADD32rm]]
@@ -97,8 +97,8 @@ define i32 @test_memset(ptr nocapture %p, ptr nocapture readonly %q) {
; MIR-NEXT: [[COPY:%[0-9]+]]:gr64 = COPY $rsi
; MIR-NEXT: [[COPY1:%[0-9]+]]:gr64 = COPY $rdi
; MIR-NEXT: [[MOV64ri:%[0-9]+]]:gr64 = MOV64ri -6148914691236517206
- ; MIR-NEXT: MOV64mr [[COPY1]], 1, $noreg, 8, $noreg, [[MOV64ri]] :: (store (s64) into %ir.p0 + 8, align 4, !alias.scope !0, !noalias !3)
- ; MIR-NEXT: MOV64mr [[COPY1]], 1, $noreg, 0, $noreg, [[MOV64ri]] :: (store (s64) into %ir.p0, align 4, !alias.scope !0, !noalias !3)
+ ; MIR-NEXT: MOV64mr [[COPY1]], 1, $noreg, 8, $noreg, [[MOV64ri]] :: (store (s64) into %ir.p + 8, align 4, !alias.scope !0, !noalias !3)
+ ; MIR-NEXT: MOV64mr [[COPY1]], 1, $noreg, 0, $noreg, [[MOV64ri]] :: (store (s64) into %ir.p, align 4, !alias.scope !0, !noalias !3)
; MIR-NEXT: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.q, !alias.scope !3, !noalias !0)
; MIR-NEXT: [[ADD32rm:%[0-9]+]]:gr32 = ADD32rm [[MOV32rm]], [[COPY]], 1, $noreg, 4, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.q1, !alias.scope !3, !noalias !0)
; MIR-NEXT: $eax = COPY [[ADD32rm]]
@@ -119,10 +119,10 @@ define i32 @test_mempcpy(ptr nocapture %p, ptr nocapture readonly %q) {
; MIR-NEXT: {{ $}}
; MIR-NEXT: [[COPY:%[0-9]+]]:gr64 = COPY $rsi
; MIR-NEXT: [[COPY1:%[0-9]+]]:gr64 = COPY $rdi
- ; MIR-NEXT: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY1]], 1, $noreg, 16, $noreg :: (load (s64) from %ir.p1, align 1, !alias.scope !0, !noalias !3)
- ; MIR-NEXT: [[MOV64rm1:%[0-9]+]]:gr64 = MOV64rm [[COPY1]], 1, $noreg, 24, $noreg :: (load (s64) from %ir.p1 + 8, align 1, !alias.scope !0, !noalias !3)
- ; MIR-NEXT: MOV64mr [[COPY1]], 1, $noreg, 8, $noreg, killed [[MOV64rm1]] :: (store (s64) into %ir.p0 + 8, align 1, !alias.scope !0, !noalias !3)
- ; MIR-NEXT: MOV64mr [[COPY1]], 1, $noreg, 0, $noreg, killed [[MOV64rm]] :: (store (s64) into %ir.p0, align 1, !alias.scope !0, !noalias !3)
+ ; MIR-NEXT: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY1]], 1, $noreg, 16, $noreg :: (load (s64) from %ir.add.ptr, align 1, !alias.scope !0, !noalias !3)
+ ; MIR-NEXT: [[MOV64rm1:%[0-9]+]]:gr64 = MOV64rm [[COPY1]], 1, $noreg, 24, $noreg :: (load (s64) from %ir.add.ptr + 8, align 1, !alias.scope !0, !noalias !3)
+ ; MIR-NEXT: MOV64mr [[COPY1]], 1, $noreg, 8, $noreg, killed [[MOV64rm1]] :: (store (s64) into %ir.p + 8, align 1, !alias.scope !0, !noalias !3)
+ ; MIR-NEXT: MOV64mr [[COPY1]], 1, $noreg, 0, $noreg, killed [[MOV64rm]] :: (store (s64) into %ir.p, align 1, !alias.scope !0, !noalias !3)
; MIR-NEXT: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.q, !alias.scope !3, !noalias !0)
; MIR-NEXT: [[ADD32rm:%[0-9]+]]:gr32 = ADD32rm [[MOV32rm]], [[COPY]], 1, $noreg, 4, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.q1, !alias.scope !3, !noalias !0)
; MIR-NEXT: $eax = COPY [[ADD32rm]]
diff --git a/llvm/test/CodeGen/X86/merge-store-partially-alias-loads.ll b/llvm/test/CodeGen/X86/merge-store-partially-alias-loads.ll
index c1fdd71c04948..fc2737b3d20b1 100644
--- a/llvm/test/CodeGen/X86/merge-store-partially-alias-loads.ll
+++ b/llvm/test/CodeGen/X86/merge-store-partially-alias-loads.ll
@@ -18,7 +18,7 @@
; DBGDAG-DAG: [[BASEPTR:t[0-9]+]]: i64,ch = CopyFromReg [[ENTRYTOKEN]],
; DBGDAG-DAG: [[ADDPTR:t[0-9]+]]: i64 = add {{(nuw )?}}[[BASEPTR]], Constant:i64<2>
-; DBGDAG-DAG: [[LD2:t[0-9]+]]: i16,ch = load<(load (s16) from %ir.tmp81, align 1)> [[ENTRYTOKEN]], [[BASEPTR]], undef:i64
+; DBGDAG-DAG: [[LD2:t[0-9]+]]: i16,ch = load<(load (s16) from %ir.tmp, align 1)> [[ENTRYTOKEN]], [[BASEPTR]], undef:i64
; DBGDAG-DAG: [[LD1:t[0-9]+]]: i8,ch = load<(load (s8) from %ir.tmp12)> [[ENTRYTOKEN]], [[ADDPTR]], undef:i64
; DBGDAG-DAG: [[ST1:t[0-9]+]]: ch = store<(store (s8) into %ir.tmp14)> [[ENTRYTOKEN]], [[LD1]], t{{[0-9]+}}, undef:i64
diff --git a/llvm/test/CodeGen/X86/pr44140.ll b/llvm/test/CodeGen/X86/pr44140.ll
index 02525d73a786d..82dc0f7b853a4 100644
--- a/llvm/test/CodeGen/X86/pr44140.ll
+++ b/llvm/test/CodeGen/X86/pr44140.ll
@@ -22,22 +22,22 @@ define i32 @main() {
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0
; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm1
-; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm7
; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm2
; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm3
; CHECK-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: vmovups %ymm1, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm1
+; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0
; CHECK-NEXT: vmovups %ymm3, {{[0-9]+}}(%rsp)
; CHECK-NEXT: vmovups %ymm2, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: vmovups %ymm7, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: vmovups %ymm1, {{[0-9]+}}(%rsp)
; CHECK-NEXT: vmovups %ymm3, {{[0-9]+}}(%rsp)
; CHECK-NEXT: vmovups %ymm2, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: vmovups %ymm7, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: vmovups %ymm1, {{[0-9]+}}(%rsp)
; CHECK-NEXT: vmovups %ymm1, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0
; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm5
; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm4
+; CHECK-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
; CHECK-NEXT: vmovups %ymm5, {{[0-9]+}}(%rsp)
; CHECK-NEXT: vmovups %ymm4, {{[0-9]+}}(%rsp)
; CHECK-NEXT: vzeroupper
diff --git a/llvm/test/CodeGen/X86/sse-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/sse-intrinsics-fast-isel.ll
index 2e2e78a6da51e..632205c8a3bc9 100644
--- a/llvm/test/CodeGen/X86/sse-intrinsics-fast-isel.ll
+++ b/llvm/test/CodeGen/X86/sse-intrinsics-fast-isel.ll
@@ -934,8 +934,7 @@ define i32 @test_MM_GET_EXCEPTION_MASK() nounwind {
; X86-SSE-LABEL: test_MM_GET_EXCEPTION_MASK:
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: pushl %eax # encoding: [0x50]
-; X86-SSE-NEXT: movl %esp, %eax # encoding: [0x89,0xe0]
-; X86-SSE-NEXT: stmxcsr (%eax) # encoding: [0x0f,0xae,0x18]
+; X86-SSE-NEXT: stmxcsr (%esp) # encoding: [0x0f,0xae,0x1c,0x24]
; X86-SSE-NEXT: movl (%esp), %eax # encoding: [0x8b,0x04,0x24]
; X86-SSE-NEXT: andl $8064, %eax # encoding: [0x25,0x80,0x1f,0x00,0x00]
; X86-SSE-NEXT: # imm = 0x1F80
@@ -945,8 +944,7 @@ define i32 @test_MM_GET_EXCEPTION_MASK() nounwind {
; X86-AVX-LABEL: test_MM_GET_EXCEPTION_MASK:
; X86-AVX: # %bb.0:
; X86-AVX-NEXT: pushl %eax # encoding: [0x50]
-; X86-AVX-NEXT: movl %esp, %eax # encoding: [0x89,0xe0]
-; X86-AVX-NEXT: vstmxcsr (%eax) # encoding: [0xc5,0xf8,0xae,0x18]
+; X86-AVX-NEXT: vstmxcsr (%esp) # encoding: [0xc5,0xf8,0xae,0x1c,0x24]
; X86-AVX-NEXT: movl (%esp), %eax # encoding: [0x8b,0x04,0x24]
; X86-AVX-NEXT: andl $8064, %eax # encoding: [0x25,0x80,0x1f,0x00,0x00]
; X86-AVX-NEXT: # imm = 0x1F80
@@ -955,8 +953,7 @@ define i32 @test_MM_GET_EXCEPTION_MASK() nounwind {
;
; X64-SSE-LABEL: test_MM_GET_EXCEPTION_MASK:
; X64-SSE: # %bb.0:
-; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc]
-; X64-SSE-NEXT: stmxcsr (%rax) # encoding: [0x0f,0xae,0x18]
+; X64-SSE-NEXT: stmxcsr -{{[0-9]+}}(%rsp) # encoding: [0x0f,0xae,0x5c,0x24,0xfc]
; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc]
; X64-SSE-NEXT: andl $8064, %eax # encoding: [0x25,0x80,0x1f,0x00,0x00]
; X64-SSE-NEXT: # imm = 0x1F80
@@ -964,8 +961,7 @@ define i32 @test_MM_GET_EXCEPTION_MASK() nounwind {
;
; X64-AVX-LABEL: test_MM_GET_EXCEPTION_MASK:
; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc]
-; X64-AVX-NEXT: vstmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x18]
+; X64-AVX-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # encoding: [0xc5,0xf8,0xae,0x5c,0x24,0xfc]
; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc]
; X64-AVX-NEXT: andl $8064, %eax # encoding: [0x25,0x80,0x1f,0x00,0x00]
; X64-AVX-NEXT: # imm = 0x1F80
@@ -983,8 +979,7 @@ define i32 @test_MM_GET_EXCEPTION_STATE() nounwind {
; X86-SSE-LABEL: test_MM_GET_EXCEPTION_STATE:
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: pushl %eax # encoding: [0x50]
-; X86-SSE-NEXT: movl %esp, %eax # encoding: [0x89,0xe0]
-; X86-SSE-NEXT: stmxcsr (%eax) # encoding: [0x0f,0xae,0x18]
+; X86-SSE-NEXT: stmxcsr (%esp) # encoding: [0x0f,0xae,0x1c,0x24]
; X86-SSE-NEXT: movl (%esp), %eax # encoding: [0x8b,0x04,0x24]
; X86-SSE-NEXT: andl $63, %eax # encoding: [0x83,0xe0,0x3f]
; X86-SSE-NEXT: popl %ecx # encoding: [0x59]
@@ -993,8 +988,7 @@ define i32 @test_MM_GET_EXCEPTION_STATE() nounwind {
; X86-AVX-LABEL: test_MM_GET_EXCEPTION_STATE:
; X86-AVX: # %bb.0:
; X86-AVX-NEXT: pushl %eax # encoding: [0x50]
-; X86-AVX-NEXT: movl %esp, %eax # encoding: [0x89,0xe0]
-; X86-AVX-NEXT: vstmxcsr (%eax) # encoding: [0xc5,0xf8,0xae,0x18]
+; X86-AVX-NEXT: vstmxcsr (%esp) # encoding: [0xc5,0xf8,0xae,0x1c,0x24]
; X86-AVX-NEXT: movl (%esp), %eax # encoding: [0x8b,0x04,0x24]
; X86-AVX-NEXT: andl $63, %eax # encoding: [0x83,0xe0,0x3f]
; X86-AVX-NEXT: popl %ecx # encoding: [0x59]
@@ -1002,16 +996,14 @@ define i32 @test_MM_GET_EXCEPTION_STATE() nounwind {
;
; X64-SSE-LABEL: test_MM_GET_EXCEPTION_STATE:
; X64-SSE: # %bb.0:
-; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc]
-; X64-SSE-NEXT: stmxcsr (%rax) # encoding: [0x0f,0xae,0x18]
+; X64-SSE-NEXT: stmxcsr -{{[0-9]+}}(%rsp) # encoding: [0x0f,0xae,0x5c,0x24,0xfc]
; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc]
; X64-SSE-NEXT: andl $63, %eax # encoding: [0x83,0xe0,0x3f]
; X64-SSE-NEXT: retq # encoding: [0xc3]
;
; X64-AVX-LABEL: test_MM_GET_EXCEPTION_STATE:
; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc]
-; X64-AVX-NEXT: vstmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x18]
+; X64-AVX-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # encoding: [0xc5,0xf8,0xae,0x5c,0x24,0xfc]
; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc]
; X64-AVX-NEXT: andl $63, %eax # encoding: [0x83,0xe0,0x3f]
; X64-AVX-NEXT: retq # encoding: [0xc3]
@@ -1027,8 +1019,7 @@ define i32 @test_MM_GET_FLUSH_ZERO_MODE() nounwind {
; X86-SSE-LABEL: test_MM_GET_FLUSH_ZERO_MODE:
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: pushl %eax # encoding: [0x50]
-; X86-SSE-NEXT: movl %esp, %eax # encoding: [0x89,0xe0]
-; X86-SSE-NEXT: stmxcsr (%eax) # encoding: [0x0f,0xae,0x18]
+; X86-SSE-NEXT: stmxcsr (%esp) # encoding: [0x0f,0xae,0x1c,0x24]
; X86-SSE-NEXT: movl (%esp), %eax # encoding: [0x8b,0x04,0x24]
; X86-SSE-NEXT: andl $32768, %eax # encoding: [0x25,0x00,0x80,0x00,0x00]
; X86-SSE-NEXT: # imm = 0x8000
@@ -1038,8 +1029,7 @@ define i32 @test_MM_GET_FLUSH_ZERO_MODE() nounwind {
; X86-AVX-LABEL: test_MM_GET_FLUSH_ZERO_MODE:
; X86-AVX: # %bb.0:
; X86-AVX-NEXT: pushl %eax # encoding: [0x50]
-; X86-AVX-NEXT: movl %esp, %eax # encoding: [0x89,0xe0]
-; X86-AVX-NEXT: vstmxcsr (%eax) # encoding: [0xc5,0xf8,0xae,0x18]
+; X86-AVX-NEXT: vstmxcsr (%esp) # encoding: [0xc5,0xf8,0xae,0x1c,0x24]
; X86-AVX-NEXT: movl (%esp), %eax # encoding: [0x8b,0x04,0x24]
; X86-AVX-NEXT: andl $32768, %eax # encoding: [0x25,0x00,0x80,0x00,0x00]
; X86-AVX-NEXT: # imm = 0x8000
@@ -1048,8 +1038,7 @@ define i32 @test_MM_GET_FLUSH_ZERO_MODE() nounwind {
;
; X64-SSE-LABEL: test_MM_GET_FLUSH_ZERO_MODE:
; X64-SSE: # %bb.0:
-; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc]
-; X64-SSE-NEXT: stmxcsr (%rax) # encoding: [0x0f,0xae,0x18]
+; X64-SSE-NEXT: stmxcsr -{{[0-9]+}}(%rsp) # encoding: [0x0f,0xae,0x5c,0x24,0xfc]
; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc]
; X64-SSE-NEXT: andl $32768, %eax # encoding: [0x25,0x00,0x80,0x00,0x00]
; X64-SSE-NEXT: # imm = 0x8000
@@ -1057,8 +1046,7 @@ define i32 @test_MM_GET_FLUSH_ZERO_MODE() nounwind {
;
; X64-AVX-LABEL: test_MM_GET_FLUSH_ZERO_MODE:
; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc]
-; X64-AVX-NEXT: vstmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x18]
+; X64-AVX-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # encoding: [0xc5,0xf8,0xae,0x5c,0x24,0xfc]
; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc]
; X64-AVX-NEXT: andl $32768, %eax # encoding: [0x25,0x00,0x80,0x00,0x00]
; X64-AVX-NEXT: # imm = 0x8000
@@ -1075,8 +1063,7 @@ define i32 @test_MM_GET_ROUNDING_MODE() nounwind {
; X86-SSE-LABEL: test_MM_GET_ROUNDING_MODE:
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: pushl %eax # encoding: [0x50]
-; X86-SSE-NEXT: movl %esp, %eax # encoding: [0x89,0xe0]
-; X86-SSE-NEXT: stmxcsr (%eax) # encoding: [0x0f,0xae,0x18]
+; X86-SSE-NEXT: stmxcsr (%esp) # encoding: [0x0f,0xae,0x1c,0x24]
; X86-SSE-NEXT: movl (%esp), %eax # encoding: [0x8b,0x04,0x24]
; X86-SSE-NEXT: andl $24576, %eax # encoding: [0x25,0x00,0x60,0x00,0x00]
; X86-SSE-NEXT: # imm = 0x6000
@@ -1086,8 +1073,7 @@ define i32 @test_MM_GET_ROUNDING_MODE() nounwind {
; X86-AVX-LABEL: test_MM_GET_ROUNDING_MODE:
; X86-AVX: # %bb.0:
; X86-AVX-NEXT: pushl %eax # encoding: [0x50]
-; X86-AVX-NEXT: movl %esp, %eax # encoding: [0x89,0xe0]
-; X86-AVX-NEXT: vstmxcsr (%eax) # encoding: [0xc5,0xf8,0xae,0x18]
+; X86-AVX-NEXT: vstmxcsr (%esp) # encoding: [0xc5,0xf8,0xae,0x1c,0x24]
; X86-AVX-NEXT: movl (%esp), %eax # encoding: [0x8b,0x04,0x24]
; X86-AVX-NEXT: andl $24576, %eax # encoding: [0x25,0x00,0x60,0x00,0x00]
; X86-AVX-NEXT: # imm = 0x6000
@@ -1096,8 +1082,7 @@ define i32 @test_MM_GET_ROUNDING_MODE() nounwind {
;
; X64-SSE-LABEL: test_MM_GET_ROUNDING_MODE:
; X64-SSE: # %bb.0:
-; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc]
-; X64-SSE-NEXT: stmxcsr (%rax) # encoding: [0x0f,0xae,0x18]
+; X64-SSE-NEXT: stmxcsr -{{[0-9]+}}(%rsp) # encoding: [0x0f,0xae,0x5c,0x24,0xfc]
; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc]
; X64-SSE-NEXT: andl $24576, %eax # encoding: [0x25,0x00,0x60,0x00,0x00]
; X64-SSE-NEXT: # imm = 0x6000
@@ -1105,8 +1090,7 @@ define i32 @test_MM_GET_ROUNDING_MODE() nounwind {
;
; X64-AVX-LABEL: test_MM_GET_ROUNDING_MODE:
; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc]
-; X64-AVX-NEXT: vstmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x18]
+; X64-AVX-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # encoding: [0xc5,0xf8,0xae,0x5c,0x24,0xfc]
; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc]
; X64-AVX-NEXT: andl $24576, %eax # encoding: [0x25,0x00,0x60,0x00,0x00]
; X64-AVX-NEXT: # imm = 0x6000
@@ -1123,8 +1107,7 @@ define i32 @test_mm_getcsr() nounwind {
; X86-SSE-LABEL: test_mm_getcsr:
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: pushl %eax # encoding: [0x50]
-; X86-SSE-NEXT: movl %esp, %eax # encoding: [0x89,0xe0]
-; X86-SSE-NEXT: stmxcsr (%eax) # encoding: [0x0f,0xae,0x18]
+; X86-SSE-NEXT: stmxcsr (%esp) # encoding: [0x0f,0xae,0x1c,0x24]
; X86-SSE-NEXT: movl (%esp), %eax # encoding: [0x8b,0x04,0x24]
; X86-SSE-NEXT: popl %ecx # encoding: [0x59]
; X86-SSE-NEXT: retl # encoding: [0xc3]
@@ -1132,23 +1115,20 @@ define i32 @test_mm_getcsr() nounwind {
; X86-AVX-LABEL: test_mm_getcsr:
; X86-AVX: # %bb.0:
; X86-AVX-NEXT: pushl %eax # encoding: [0x50]
-; X86-AVX-NEXT: movl %esp, %eax # encoding: [0x89,0xe0]
-; X86-AVX-NEXT: vstmxcsr (%eax) # encoding: [0xc5,0xf8,0xae,0x18]
+; X86-AVX-NEXT: vstmxcsr (%esp) # encoding: [0xc5,0xf8,0xae,0x1c,0x24]
; X86-AVX-NEXT: movl (%esp), %eax # encoding: [0x8b,0x04,0x24]
; X86-AVX-NEXT: popl %ecx # encoding: [0x59]
; X86-AVX-NEXT: retl # encoding: [0xc3]
;
; X64-SSE-LABEL: test_mm_getcsr:
; X64-SSE: # %bb.0:
-; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc]
-; X64-SSE-NEXT: stmxcsr (%rax) # encoding: [0x0f,0xae,0x18]
+; X64-SSE-NEXT: stmxcsr -{{[0-9]+}}(%rsp) # encoding: [0x0f,0xae,0x5c,0x24,0xfc]
; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc]
; X64-SSE-NEXT: retq # encoding: [0xc3]
;
; X64-AVX-LABEL: test_mm_getcsr:
; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc]
-; X64-AVX-NEXT: vstmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x18]
+; X64-AVX-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # encoding: [0xc5,0xf8,0xae,0x5c,0x24,0xfc]
; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc]
; X64-AVX-NEXT: retq # encoding: [0xc3]
%1 = alloca i32, align 4
@@ -1813,14 +1793,13 @@ define void @test_MM_SET_EXCEPTION_MASK(i32 %a0) nounwind {
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: pushl %eax # encoding: [0x50]
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
-; X86-SSE-NEXT: movl %esp, %ecx # encoding: [0x89,0xe1]
-; X86-SSE-NEXT: stmxcsr (%ecx) # encoding: [0x0f,0xae,0x19]
-; X86-SSE-NEXT: movl (%esp), %edx # encoding: [0x8b,0x14,0x24]
-; X86-SSE-NEXT: andl $-8065, %edx # encoding: [0x81,0xe2,0x7f,0xe0,0xff,0xff]
+; X86-SSE-NEXT: stmxcsr (%esp) # encoding: [0x0f,0xae,0x1c,0x24]
+; X86-SSE-NEXT: movl (%esp), %ecx # encoding: [0x8b,0x0c,0x24]
+; X86-SSE-NEXT: andl $-8065, %ecx # encoding: [0x81,0xe1,0x7f,0xe0,0xff,0xff]
; X86-SSE-NEXT: # imm = 0xE07F
-; X86-SSE-NEXT: orl %eax, %edx # encoding: [0x09,0xc2]
-; X86-SSE-NEXT: movl %edx, (%esp) # encoding: [0x89,0x14,0x24]
-; X86-SSE-NEXT: ldmxcsr (%ecx) # encoding: [0x0f,0xae,0x11]
+; X86-SSE-NEXT: orl %eax, %ecx # encoding: [0x09,0xc1]
+; X86-SSE-NEXT: movl %ecx, (%esp) # encoding: [0x89,0x0c,0x24]
+; X86-SSE-NEXT: ldmxcsr (%esp) # encoding: [0x0f,0xae,0x14,0x24]
; X86-SSE-NEXT: popl %eax # encoding: [0x58]
; X86-SSE-NEXT: retl # encoding: [0xc3]
;
@@ -1828,39 +1807,36 @@ define void @test_MM_SET_EXCEPTION_MASK(i32 %a0) nounwind {
; X86-AVX: # %bb.0:
; X86-AVX-NEXT: pushl %eax # encoding: [0x50]
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
-; X86-AVX-NEXT: movl %esp, %ecx # encoding: [0x89,0xe1]
-; X86-AVX-NEXT: vstmxcsr (%ecx) # encoding: [0xc5,0xf8,0xae,0x19]
-; X86-AVX-NEXT: movl (%esp), %edx # encoding: [0x8b,0x14,0x24]
-; X86-AVX-NEXT: andl $-8065, %edx # encoding: [0x81,0xe2,0x7f,0xe0,0xff,0xff]
+; X86-AVX-NEXT: vstmxcsr (%esp) # encoding: [0xc5,0xf8,0xae,0x1c,0x24]
+; X86-AVX-NEXT: movl (%esp), %ecx # encoding: [0x8b,0x0c,0x24]
+; X86-AVX-NEXT: andl $-8065, %ecx # encoding: [0x81,0xe1,0x7f,0xe0,0xff,0xff]
; X86-AVX-NEXT: # imm = 0xE07F
-; X86-AVX-NEXT: orl %eax, %edx # encoding: [0x09,0xc2]
-; X86-AVX-NEXT: movl %edx, (%esp) # encoding: [0x89,0x14,0x24]
-; X86-AVX-NEXT: vldmxcsr (%ecx) # encoding: [0xc5,0xf8,0xae,0x11]
+; X86-AVX-NEXT: orl %eax, %ecx # encoding: [0x09,0xc1]
+; X86-AVX-NEXT: movl %ecx, (%esp) # encoding: [0x89,0x0c,0x24]
+; X86-AVX-NEXT: vldmxcsr (%esp) # encoding: [0xc5,0xf8,0xae,0x14,0x24]
; X86-AVX-NEXT: popl %eax # encoding: [0x58]
; X86-AVX-NEXT: retl # encoding: [0xc3]
;
; X64-SSE-LABEL: test_MM_SET_EXCEPTION_MASK:
; X64-SSE: # %bb.0:
-; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc]
-; X64-SSE-NEXT: stmxcsr (%rax) # encoding: [0x0f,0xae,0x18]
-; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %ecx # encoding: [0x8b,0x4c,0x24,0xfc]
-; X64-SSE-NEXT: andl $-8065, %ecx # encoding: [0x81,0xe1,0x7f,0xe0,0xff,0xff]
+; X64-SSE-NEXT: stmxcsr -{{[0-9]+}}(%rsp) # encoding: [0x0f,0xae,0x5c,0x24,0xfc]
+; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc]
+; X64-SSE-NEXT: andl $-8065, %eax # encoding: [0x25,0x7f,0xe0,0xff,0xff]
; X64-SSE-NEXT: # imm = 0xE07F
-; X64-SSE-NEXT: orl %edi, %ecx # encoding: [0x09,0xf9]
-; X64-SSE-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x4c,0x24,0xfc]
-; X64-SSE-NEXT: ldmxcsr (%rax) # encoding: [0x0f,0xae,0x10]
+; X64-SSE-NEXT: orl %edi, %eax # encoding: [0x09,0xf8]
+; X64-SSE-NEXT: movl %eax, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x44,0x24,0xfc]
+; X64-SSE-NEXT: ldmxcsr -{{[0-9]+}}(%rsp) # encoding: [0x0f,0xae,0x54,0x24,0xfc]
; X64-SSE-NEXT: retq # encoding: [0xc3]
;
; X64-AVX-LABEL: test_MM_SET_EXCEPTION_MASK:
; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc]
-; X64-AVX-NEXT: vstmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x18]
-; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %ecx # encoding: [0x8b,0x4c,0x24,0xfc]
-; X64-AVX-NEXT: andl $-8065, %ecx # encoding: [0x81,0xe1,0x7f,0xe0,0xff,0xff]
+; X64-AVX-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # encoding: [0xc5,0xf8,0xae,0x5c,0x24,0xfc]
+; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc]
+; X64-AVX-NEXT: andl $-8065, %eax # encoding: [0x25,0x7f,0xe0,0xff,0xff]
; X64-AVX-NEXT: # imm = 0xE07F
-; X64-AVX-NEXT: orl %edi, %ecx # encoding: [0x09,0xf9]
-; X64-AVX-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x4c,0x24,0xfc]
-; X64-AVX-NEXT: vldmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x10]
+; X64-AVX-NEXT: orl %edi, %eax # encoding: [0x09,0xf8]
+; X64-AVX-NEXT: movl %eax, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x44,0x24,0xfc]
+; X64-AVX-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # encoding: [0xc5,0xf8,0xae,0x54,0x24,0xfc]
; X64-AVX-NEXT: retq # encoding: [0xc3]
%1 = alloca i32, align 4
%2 = bitcast ptr %1 to ptr
@@ -1879,13 +1855,12 @@ define void @test_MM_SET_EXCEPTION_STATE(i32 %a0) nounwind {
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: pushl %eax # encoding: [0x50]
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
-; X86-SSE-NEXT: movl %esp, %ecx # encoding: [0x89,0xe1]
-; X86-SSE-NEXT: stmxcsr (%ecx) # encoding: [0x0f,0xae,0x19]
-; X86-SSE-NEXT: movl (%esp), %edx # encoding: [0x8b,0x14,0x24]
-; X86-SSE-NEXT: andl $-64, %edx # encoding: [0x83,0xe2,0xc0]
-; X86-SSE-NEXT: orl %eax, %edx # encoding: [0x09,0xc2]
-; X86-SSE-NEXT: movl %edx, (%esp) # encoding: [0x89,0x14,0x24]
-; X86-SSE-NEXT: ldmxcsr (%ecx) # encoding: [0x0f,0xae,0x11]
+; X86-SSE-NEXT: stmxcsr (%esp) # encoding: [0x0f,0xae,0x1c,0x24]
+; X86-SSE-NEXT: movl (%esp), %ecx # encoding: [0x8b,0x0c,0x24]
+; X86-SSE-NEXT: andl $-64, %ecx # encoding: [0x83,0xe1,0xc0]
+; X86-SSE-NEXT: orl %eax, %ecx # encoding: [0x09,0xc1]
+; X86-SSE-NEXT: movl %ecx, (%esp) # encoding: [0x89,0x0c,0x24]
+; X86-SSE-NEXT: ldmxcsr (%esp) # encoding: [0x0f,0xae,0x14,0x24]
; X86-SSE-NEXT: popl %eax # encoding: [0x58]
; X86-SSE-NEXT: retl # encoding: [0xc3]
;
@@ -1893,36 +1868,33 @@ define void @test_MM_SET_EXCEPTION_STATE(i32 %a0) nounwind {
; X86-AVX: # %bb.0:
; X86-AVX-NEXT: pushl %eax # encoding: [0x50]
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
-; X86-AVX-NEXT: movl %esp, %ecx # encoding: [0x89,0xe1]
-; X86-AVX-NEXT: vstmxcsr (%ecx) # encoding: [0xc5,0xf8,0xae,0x19]
-; X86-AVX-NEXT: movl (%esp), %edx # encoding: [0x8b,0x14,0x24]
-; X86-AVX-NEXT: andl $-64, %edx # encoding: [0x83,0xe2,0xc0]
-; X86-AVX-NEXT: orl %eax, %edx # encoding: [0x09,0xc2]
-; X86-AVX-NEXT: movl %edx, (%esp) # encoding: [0x89,0x14,0x24]
-; X86-AVX-NEXT: vldmxcsr (%ecx) # encoding: [0xc5,0xf8,0xae,0x11]
+; X86-AVX-NEXT: vstmxcsr (%esp) # encoding: [0xc5,0xf8,0xae,0x1c,0x24]
+; X86-AVX-NEXT: movl (%esp), %ecx # encoding: [0x8b,0x0c,0x24]
+; X86-AVX-NEXT: andl $-64, %ecx # encoding: [0x83,0xe1,0xc0]
+; X86-AVX-NEXT: orl %eax, %ecx # encoding: [0x09,0xc1]
+; X86-AVX-NEXT: movl %ecx, (%esp) # encoding: [0x89,0x0c,0x24]
+; X86-AVX-NEXT: vldmxcsr (%esp) # encoding: [0xc5,0xf8,0xae,0x14,0x24]
; X86-AVX-NEXT: popl %eax # encoding: [0x58]
; X86-AVX-NEXT: retl # encoding: [0xc3]
;
; X64-SSE-LABEL: test_MM_SET_EXCEPTION_STATE:
; X64-SSE: # %bb.0:
-; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc]
-; X64-SSE-NEXT: stmxcsr (%rax) # encoding: [0x0f,0xae,0x18]
-; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %ecx # encoding: [0x8b,0x4c,0x24,0xfc]
-; X64-SSE-NEXT: andl $-64, %ecx # encoding: [0x83,0xe1,0xc0]
-; X64-SSE-NEXT: orl %edi, %ecx # encoding: [0x09,0xf9]
-; X64-SSE-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x4c,0x24,0xfc]
-; X64-SSE-NEXT: ldmxcsr (%rax) # encoding: [0x0f,0xae,0x10]
+; X64-SSE-NEXT: stmxcsr -{{[0-9]+}}(%rsp) # encoding: [0x0f,0xae,0x5c,0x24,0xfc]
+; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc]
+; X64-SSE-NEXT: andl $-64, %eax # encoding: [0x83,0xe0,0xc0]
+; X64-SSE-NEXT: orl %edi, %eax # encoding: [0x09,0xf8]
+; X64-SSE-NEXT: movl %eax, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x44,0x24,0xfc]
+; X64-SSE-NEXT: ldmxcsr -{{[0-9]+}}(%rsp) # encoding: [0x0f,0xae,0x54,0x24,0xfc]
; X64-SSE-NEXT: retq # encoding: [0xc3]
;
; X64-AVX-LABEL: test_MM_SET_EXCEPTION_STATE:
; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc]
-; X64-AVX-NEXT: vstmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x18]
-; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %ecx # encoding: [0x8b,0x4c,0x24,0xfc]
-; X64-AVX-NEXT: andl $-64, %ecx # encoding: [0x83,0xe1,0xc0]
-; X64-AVX-NEXT: orl %edi, %ecx # encoding: [0x09,0xf9]
-; X64-AVX-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x4c,0x24,0xfc]
-; X64-AVX-NEXT: vldmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x10]
+; X64-AVX-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # encoding: [0xc5,0xf8,0xae,0x5c,0x24,0xfc]
+; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc]
+; X64-AVX-NEXT: andl $-64, %eax # encoding: [0x83,0xe0,0xc0]
+; X64-AVX-NEXT: orl %edi, %eax # encoding: [0x09,0xf8]
+; X64-AVX-NEXT: movl %eax, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x44,0x24,0xfc]
+; X64-AVX-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # encoding: [0xc5,0xf8,0xae,0x54,0x24,0xfc]
; X64-AVX-NEXT: retq # encoding: [0xc3]
%1 = alloca i32, align 4
%2 = bitcast ptr %1 to ptr
@@ -1940,14 +1912,13 @@ define void @test_MM_SET_FLUSH_ZERO_MODE(i32 %a0) nounwind {
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: pushl %eax # encoding: [0x50]
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
-; X86-SSE-NEXT: movl %esp, %ecx # encoding: [0x89,0xe1]
-; X86-SSE-NEXT: stmxcsr (%ecx) # encoding: [0x0f,0xae,0x19]
-; X86-SSE-NEXT: movl (%esp), %edx # encoding: [0x8b,0x14,0x24]
-; X86-SSE-NEXT: andl $-32769, %edx # encoding: [0x81,0xe2,0xff,0x7f,0xff,0xff]
+; X86-SSE-NEXT: stmxcsr (%esp) # encoding: [0x0f,0xae,0x1c,0x24]
+; X86-SSE-NEXT: movl (%esp), %ecx # encoding: [0x8b,0x0c,0x24]
+; X86-SSE-NEXT: andl $-32769, %ecx # encoding: [0x81,0xe1,0xff,0x7f,0xff,0xff]
; X86-SSE-NEXT: # imm = 0xFFFF7FFF
-; X86-SSE-NEXT: orl %eax, %edx # encoding: [0x09,0xc2]
-; X86-SSE-NEXT: movl %edx, (%esp) # encoding: [0x89,0x14,0x24]
-; X86-SSE-NEXT: ldmxcsr (%ecx) # encoding: [0x0f,0xae,0x11]
+; X86-SSE-NEXT: orl %eax, %ecx # encoding: [0x09,0xc1]
+; X86-SSE-NEXT: movl %ecx, (%esp) # encoding: [0x89,0x0c,0x24]
+; X86-SSE-NEXT: ldmxcsr (%esp) # encoding: [0x0f,0xae,0x14,0x24]
; X86-SSE-NEXT: popl %eax # encoding: [0x58]
; X86-SSE-NEXT: retl # encoding: [0xc3]
;
@@ -1955,39 +1926,36 @@ define void @test_MM_SET_FLUSH_ZERO_MODE(i32 %a0) nounwind {
; X86-AVX: # %bb.0:
; X86-AVX-NEXT: pushl %eax # encoding: [0x50]
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
-; X86-AVX-NEXT: movl %esp, %ecx # encoding: [0x89,0xe1]
-; X86-AVX-NEXT: vstmxcsr (%ecx) # encoding: [0xc5,0xf8,0xae,0x19]
-; X86-AVX-NEXT: movl (%esp), %edx # encoding: [0x8b,0x14,0x24]
-; X86-AVX-NEXT: andl $-32769, %edx # encoding: [0x81,0xe2,0xff,0x7f,0xff,0xff]
+; X86-AVX-NEXT: vstmxcsr (%esp) # encoding: [0xc5,0xf8,0xae,0x1c,0x24]
+; X86-AVX-NEXT: movl (%esp), %ecx # encoding: [0x8b,0x0c,0x24]
+; X86-AVX-NEXT: andl $-32769, %ecx # encoding: [0x81,0xe1,0xff,0x7f,0xff,0xff]
; X86-AVX-NEXT: # imm = 0xFFFF7FFF
-; X86-AVX-NEXT: orl %eax, %edx # encoding: [0x09,0xc2]
-; X86-AVX-NEXT: movl %edx, (%esp) # encoding: [0x89,0x14,0x24]
-; X86-AVX-NEXT: vldmxcsr (%ecx) # encoding: [0xc5,0xf8,0xae,0x11]
+; X86-AVX-NEXT: orl %eax, %ecx # encoding: [0x09,0xc1]
+; X86-AVX-NEXT: movl %ecx, (%esp) # encoding: [0x89,0x0c,0x24]
+; X86-AVX-NEXT: vldmxcsr (%esp) # encoding: [0xc5,0xf8,0xae,0x14,0x24]
; X86-AVX-NEXT: popl %eax # encoding: [0x58]
; X86-AVX-NEXT: retl # encoding: [0xc3]
;
; X64-SSE-LABEL: test_MM_SET_FLUSH_ZERO_MODE:
; X64-SSE: # %bb.0:
-; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc]
-; X64-SSE-NEXT: stmxcsr (%rax) # encoding: [0x0f,0xae,0x18]
-; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %ecx # encoding: [0x8b,0x4c,0x24,0xfc]
-; X64-SSE-NEXT: andl $-32769, %ecx # encoding: [0x81,0xe1,0xff,0x7f,0xff,0xff]
+; X64-SSE-NEXT: stmxcsr -{{[0-9]+}}(%rsp) # encoding: [0x0f,0xae,0x5c,0x24,0xfc]
+; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc]
+; X64-SSE-NEXT: andl $-32769, %eax # encoding: [0x25,0xff,0x7f,0xff,0xff]
; X64-SSE-NEXT: # imm = 0xFFFF7FFF
-; X64-SSE-NEXT: orl %edi, %ecx # encoding: [0x09,0xf9]
-; X64-SSE-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x4c,0x24,0xfc]
-; X64-SSE-NEXT: ldmxcsr (%rax) # encoding: [0x0f,0xae,0x10]
+; X64-SSE-NEXT: orl %edi, %eax # encoding: [0x09,0xf8]
+; X64-SSE-NEXT: movl %eax, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x44,0x24,0xfc]
+; X64-SSE-NEXT: ldmxcsr -{{[0-9]+}}(%rsp) # encoding: [0x0f,0xae,0x54,0x24,0xfc]
; X64-SSE-NEXT: retq # encoding: [0xc3]
;
; X64-AVX-LABEL: test_MM_SET_FLUSH_ZERO_MODE:
; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc]
-; X64-AVX-NEXT: vstmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x18]
-; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %ecx # encoding: [0x8b,0x4c,0x24,0xfc]
-; X64-AVX-NEXT: andl $-32769, %ecx # encoding: [0x81,0xe1,0xff,0x7f,0xff,0xff]
+; X64-AVX-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # encoding: [0xc5,0xf8,0xae,0x5c,0x24,0xfc]
+; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc]
+; X64-AVX-NEXT: andl $-32769, %eax # encoding: [0x25,0xff,0x7f,0xff,0xff]
; X64-AVX-NEXT: # imm = 0xFFFF7FFF
-; X64-AVX-NEXT: orl %edi, %ecx # encoding: [0x09,0xf9]
-; X64-AVX-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x4c,0x24,0xfc]
-; X64-AVX-NEXT: vldmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x10]
+; X64-AVX-NEXT: orl %edi, %eax # encoding: [0x09,0xf8]
+; X64-AVX-NEXT: movl %eax, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x44,0x24,0xfc]
+; X64-AVX-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # encoding: [0xc5,0xf8,0xae,0x54,0x24,0xfc]
; X64-AVX-NEXT: retq # encoding: [0xc3]
%1 = alloca i32, align 4
%2 = bitcast ptr %1 to ptr
@@ -2144,14 +2112,13 @@ define void @test_MM_SET_ROUNDING_MODE(i32 %a0) nounwind {
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: pushl %eax # encoding: [0x50]
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
-; X86-SSE-NEXT: movl %esp, %ecx # encoding: [0x89,0xe1]
-; X86-SSE-NEXT: stmxcsr (%ecx) # encoding: [0x0f,0xae,0x19]
-; X86-SSE-NEXT: movl (%esp), %edx # encoding: [0x8b,0x14,0x24]
-; X86-SSE-NEXT: andl $-24577, %edx # encoding: [0x81,0xe2,0xff,0x9f,0xff,0xff]
+; X86-SSE-NEXT: stmxcsr (%esp) # encoding: [0x0f,0xae,0x1c,0x24]
+; X86-SSE-NEXT: movl (%esp), %ecx # encoding: [0x8b,0x0c,0x24]
+; X86-SSE-NEXT: andl $-24577, %ecx # encoding: [0x81,0xe1,0xff,0x9f,0xff,0xff]
; X86-SSE-NEXT: # imm = 0x9FFF
-; X86-SSE-NEXT: orl %eax, %edx # encoding: [0x09,0xc2]
-; X86-SSE-NEXT: movl %edx, (%esp) # encoding: [0x89,0x14,0x24]
-; X86-SSE-NEXT: ldmxcsr (%ecx) # encoding: [0x0f,0xae,0x11]
+; X86-SSE-NEXT: orl %eax, %ecx # encoding: [0x09,0xc1]
+; X86-SSE-NEXT: movl %ecx, (%esp) # encoding: [0x89,0x0c,0x24]
+; X86-SSE-NEXT: ldmxcsr (%esp) # encoding: [0x0f,0xae,0x14,0x24]
; X86-SSE-NEXT: popl %eax # encoding: [0x58]
; X86-SSE-NEXT: retl # encoding: [0xc3]
;
@@ -2159,39 +2126,36 @@ define void @test_MM_SET_ROUNDING_MODE(i32 %a0) nounwind {
; X86-AVX: # %bb.0:
; X86-AVX-NEXT: pushl %eax # encoding: [0x50]
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
-; X86-AVX-NEXT: movl %esp, %ecx # encoding: [0x89,0xe1]
-; X86-AVX-NEXT: vstmxcsr (%ecx) # encoding: [0xc5,0xf8,0xae,0x19]
-; X86-AVX-NEXT: movl (%esp), %edx # encoding: [0x8b,0x14,0x24]
-; X86-AVX-NEXT: andl $-24577, %edx # encoding: [0x81,0xe2,0xff,0x9f,0xff,0xff]
+; X86-AVX-NEXT: vstmxcsr (%esp) # encoding: [0xc5,0xf8,0xae,0x1c,0x24]
+; X86-AVX-NEXT: movl (%esp), %ecx # encoding: [0x8b,0x0c,0x24]
+; X86-AVX-NEXT: andl $-24577, %ecx # encoding: [0x81,0xe1,0xff,0x9f,0xff,0xff]
; X86-AVX-NEXT: # imm = 0x9FFF
-; X86-AVX-NEXT: orl %eax, %edx # encoding: [0x09,0xc2]
-; X86-AVX-NEXT: movl %edx, (%esp) # encoding: [0x89,0x14,0x24]
-; X86-AVX-NEXT: vldmxcsr (%ecx) # encoding: [0xc5,0xf8,0xae,0x11]
+; X86-AVX-NEXT: orl %eax, %ecx # encoding: [0x09,0xc1]
+; X86-AVX-NEXT: movl %ecx, (%esp) # encoding: [0x89,0x0c,0x24]
+; X86-AVX-NEXT: vldmxcsr (%esp) # encoding: [0xc5,0xf8,0xae,0x14,0x24]
; X86-AVX-NEXT: popl %eax # encoding: [0x58]
; X86-AVX-NEXT: retl # encoding: [0xc3]
;
; X64-SSE-LABEL: test_MM_SET_ROUNDING_MODE:
; X64-SSE: # %bb.0:
-; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc]
-; X64-SSE-NEXT: stmxcsr (%rax) # encoding: [0x0f,0xae,0x18]
-; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %ecx # encoding: [0x8b,0x4c,0x24,0xfc]
-; X64-SSE-NEXT: andl $-24577, %ecx # encoding: [0x81,0xe1,0xff,0x9f,0xff,0xff]
+; X64-SSE-NEXT: stmxcsr -{{[0-9]+}}(%rsp) # encoding: [0x0f,0xae,0x5c,0x24,0xfc]
+; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc]
+; X64-SSE-NEXT: andl $-24577, %eax # encoding: [0x25,0xff,0x9f,0xff,0xff]
; X64-SSE-NEXT: # imm = 0x9FFF
-; X64-SSE-NEXT: orl %edi, %ecx # encoding: [0x09,0xf9]
-; X64-SSE-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x4c,0x24,0xfc]
-; X64-SSE-NEXT: ldmxcsr (%rax) # encoding: [0x0f,0xae,0x10]
+; X64-SSE-NEXT: orl %edi, %eax # encoding: [0x09,0xf8]
+; X64-SSE-NEXT: movl %eax, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x44,0x24,0xfc]
+; X64-SSE-NEXT: ldmxcsr -{{[0-9]+}}(%rsp) # encoding: [0x0f,0xae,0x54,0x24,0xfc]
; X64-SSE-NEXT: retq # encoding: [0xc3]
;
; X64-AVX-LABEL: test_MM_SET_ROUNDING_MODE:
; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc]
-; X64-AVX-NEXT: vstmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x18]
-; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %ecx # encoding: [0x8b,0x4c,0x24,0xfc]
-; X64-AVX-NEXT: andl $-24577, %ecx # encoding: [0x81,0xe1,0xff,0x9f,0xff,0xff]
+; X64-AVX-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # encoding: [0xc5,0xf8,0xae,0x5c,0x24,0xfc]
+; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc]
+; X64-AVX-NEXT: andl $-24577, %eax # encoding: [0x25,0xff,0x9f,0xff,0xff]
; X64-AVX-NEXT: # imm = 0x9FFF
-; X64-AVX-NEXT: orl %edi, %ecx # encoding: [0x09,0xf9]
-; X64-AVX-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x4c,0x24,0xfc]
-; X64-AVX-NEXT: vldmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x10]
+; X64-AVX-NEXT: orl %edi, %eax # encoding: [0x09,0xf8]
+; X64-AVX-NEXT: movl %eax, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x44,0x24,0xfc]
+; X64-AVX-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # encoding: [0xc5,0xf8,0xae,0x54,0x24,0xfc]
; X64-AVX-NEXT: retq # encoding: [0xc3]
%1 = alloca i32, align 4
%2 = bitcast ptr %1 to ptr
@@ -2303,28 +2267,24 @@ define <4 x float> @test_mm_set1_ps(float %a0) nounwind {
define void @test_mm_setcsr(i32 %a0) nounwind {
; X86-SSE-LABEL: test_mm_setcsr:
; X86-SSE: # %bb.0:
-; X86-SSE-NEXT: leal {{[0-9]+}}(%esp), %eax # encoding: [0x8d,0x44,0x24,0x04]
-; X86-SSE-NEXT: ldmxcsr (%eax) # encoding: [0x0f,0xae,0x10]
+; X86-SSE-NEXT: ldmxcsr {{[0-9]+}}(%esp) # encoding: [0x0f,0xae,0x54,0x24,0x04]
; X86-SSE-NEXT: retl # encoding: [0xc3]
;
; X86-AVX-LABEL: test_mm_setcsr:
; X86-AVX: # %bb.0:
-; X86-AVX-NEXT: leal {{[0-9]+}}(%esp), %eax # encoding: [0x8d,0x44,0x24,0x04]
-; X86-AVX-NEXT: vldmxcsr (%eax) # encoding: [0xc5,0xf8,0xae,0x10]
+; X86-AVX-NEXT: vldmxcsr {{[0-9]+}}(%esp) # encoding: [0xc5,0xf8,0xae,0x54,0x24,0x04]
; X86-AVX-NEXT: retl # encoding: [0xc3]
;
; X64-SSE-LABEL: test_mm_setcsr:
; X64-SSE: # %bb.0:
; X64-SSE-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x7c,0x24,0xfc]
-; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc]
-; X64-SSE-NEXT: ldmxcsr (%rax) # encoding: [0x0f,0xae,0x10]
+; X64-SSE-NEXT: ldmxcsr -{{[0-9]+}}(%rsp) # encoding: [0x0f,0xae,0x54,0x24,0xfc]
; X64-SSE-NEXT: retq # encoding: [0xc3]
;
; X64-AVX-LABEL: test_mm_setcsr:
; X64-AVX: # %bb.0:
; X64-AVX-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x7c,0x24,0xfc]
-; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc]
-; X64-AVX-NEXT: vldmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x10]
+; X64-AVX-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # encoding: [0xc5,0xf8,0xae,0x54,0x24,0xfc]
; X64-AVX-NEXT: retq # encoding: [0xc3]
%st = alloca i32, align 4
store i32 %a0, ptr %st, align 4
diff --git a/llvm/test/CodeGen/X86/statepoint-cmp-sunk-past-statepoint.ll b/llvm/test/CodeGen/X86/statepoint-cmp-sunk-past-statepoint.ll
index 8d589c519eff2..0fcf433131745 100644
--- a/llvm/test/CodeGen/X86/statepoint-cmp-sunk-past-statepoint.ll
+++ b/llvm/test/CodeGen/X86/statepoint-cmp-sunk-past-statepoint.ll
@@ -99,12 +99,12 @@ define void @test2(ptr addrspace(1) %this, i32 %0, ptr addrspace(1) %p0, ptr add
; CHECK-LV-NEXT: successors: %bb.3(0x7ffff800), %bb.7(0x00000800)
; CHECK-LV-NEXT: {{ $}}
; CHECK-LV-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
- ; CHECK-LV-NEXT: [[COPY4:%[0-9]+]]:gr64 = STATEPOINT 2882400000, 0, 0, undef %11:gr64, 2, 0, 2, 0, 2, 0, 2, 1, [[COPY4]](tied-def 0), 2, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
+ ; CHECK-LV-NEXT: [[COPY4:%[0-9]+]]:gr64 = STATEPOINT 2882400000, 0, 0, undef %10:gr64, 2, 0, 2, 0, 2, 0, 2, 1, [[COPY4]](tied-def 0), 2, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
; CHECK-LV-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
; CHECK-LV-NEXT: EH_LABEL <mcsymbol >
; CHECK-LV-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
; CHECK-LV-NEXT: [[COPY6:%[0-9]+]]:gr64 = COPY [[COPY4]]
- ; CHECK-LV-NEXT: [[COPY6:%[0-9]+]]:gr64 = STATEPOINT 2882400000, 0, 0, undef %13:gr64, 2, 0, 2, 0, 2, 0, 2, 1, [[COPY6]](tied-def 0), 2, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
+ ; CHECK-LV-NEXT: [[COPY6:%[0-9]+]]:gr64 = STATEPOINT 2882400000, 0, 0, undef %12:gr64, 2, 0, 2, 0, 2, 0, 2, 1, [[COPY6]](tied-def 0), 2, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
; CHECK-LV-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
; CHECK-LV-NEXT: EH_LABEL <mcsymbol >
; CHECK-LV-NEXT: JMP_1 %bb.3
@@ -165,12 +165,12 @@ define void @test2(ptr addrspace(1) %this, i32 %0, ptr addrspace(1) %p0, ptr add
; CHECK-LIS-NEXT: successors: %bb.3(0x7ffff800), %bb.7(0x00000800)
; CHECK-LIS-NEXT: {{ $}}
; CHECK-LIS-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
- ; CHECK-LIS-NEXT: [[COPY4:%[0-9]+]]:gr64 = STATEPOINT 2882400000, 0, 0, undef %11:gr64, 2, 0, 2, 0, 2, 0, 2, 1, [[COPY4]](tied-def 0), 2, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
+ ; CHECK-LIS-NEXT: [[COPY4:%[0-9]+]]:gr64 = STATEPOINT 2882400000, 0, 0, undef %10:gr64, 2, 0, 2, 0, 2, 0, 2, 1, [[COPY4]](tied-def 0), 2, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
; CHECK-LIS-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
; CHECK-LIS-NEXT: EH_LABEL <mcsymbol >
; CHECK-LIS-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
; CHECK-LIS-NEXT: [[COPY6:%[0-9]+]]:gr64 = COPY [[COPY4]]
- ; CHECK-LIS-NEXT: [[COPY6:%[0-9]+]]:gr64 = STATEPOINT 2882400000, 0, 0, undef %13:gr64, 2, 0, 2, 0, 2, 0, 2, 1, [[COPY6]](tied-def 0), 2, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
+ ; CHECK-LIS-NEXT: [[COPY6:%[0-9]+]]:gr64 = STATEPOINT 2882400000, 0, 0, undef %12:gr64, 2, 0, 2, 0, 2, 0, 2, 1, [[COPY6]](tied-def 0), 2, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax
; CHECK-LIS-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
; CHECK-LIS-NEXT: EH_LABEL <mcsymbol >
; CHECK-LIS-NEXT: JMP_1 %bb.3
diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/codegenprepare/sunk-addr.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/codegenprepare/sunk-addr.ll
index 953548cc807a8..9d9b4688c2473 100644
--- a/llvm/test/DebugInfo/Generic/assignment-tracking/codegenprepare/sunk-addr.ll
+++ b/llvm/test/DebugInfo/Generic/assignment-tracking/codegenprepare/sunk-addr.ll
@@ -20,9 +20,8 @@ next:
; Address calcs should be duplicated into this block. One dbg.value should be
; updated, and the other should not.
; CHECK-LABEL: next:
-; CHECK: %[[CASTVAR:[0-9a-zA-Z]+]] = bitcast ptr %p to ptr
; CHECK-NEXT: #dbg_assign(ptr %arith, ![[DIVAR:[0-9]+]],
-; CHECK-NEXT: %[[GEPVAR:[0-9a-zA-Z]+]] = getelementptr i8, ptr %[[CASTVAR]], i64 3
+; CHECK-NEXT: %[[GEPVAR:[0-9a-zA-Z]+]] = getelementptr i8, ptr %p, i64 3
; CHECK-NEXT: %loaded = load i8, ptr %[[GEPVAR]]
; CHECK-NEXT: #dbg_assign(ptr %[[GEPVAR]], ![[DIVAR]],
call void @llvm.dbg.assign(metadata ptr %arith, metadata !12, metadata !DIExpression(), metadata !21, metadata ptr undef, metadata !DIExpression()), !dbg !14
diff --git a/llvm/test/DebugInfo/X86/bitcast-di.ll b/llvm/test/DebugInfo/X86/bitcast-di.ll
index f599cc9ca7db3..a1b5ea431a759 100644
--- a/llvm/test/DebugInfo/X86/bitcast-di.ll
+++ b/llvm/test/DebugInfo/X86/bitcast-di.ll
@@ -5,44 +5,13 @@ target triple = "x86_64-unknown-linux-gnu"
@x = external global [1 x [2 x <4 x float>]]
-declare void @foo(i32)
-
-declare void @slowpath(i32, ptr)
-
-; Is DI maintained after sinking bitcast?
-define void @test(i1 %cond, ptr %base) {
-; CHECK-LABEL: @test
-entry:
- %addr = getelementptr inbounds i64, ptr %base, i64 5
- %casted = bitcast ptr %addr to ptr
- br i1 %cond, label %if.then, label %fallthrough
-
-if.then:
-; CHECK-LABEL: if.then:
-; CHECK: bitcast ptr %addr to ptr, !dbg ![[castLoc:[0-9]+]]
- %v1 = load i32, ptr %casted, align 4
- call void @foo(i32 %v1)
- %cmp = icmp eq i32 %v1, 0
- br i1 %cmp, label %rare.1, label %fallthrough
-
-fallthrough:
- ret void
-
-rare.1:
-; CHECK-LABEL: rare.1:
-; CHECK: bitcast ptr %addr to ptr, !dbg ![[castLoc]]
- call void @slowpath(i32 %v1, ptr %casted) ;; NOT COLD
- br label %fallthrough
-}
-
; Is DI maitained when a GEP with all zero indices gets converted to bitcast?
define void @test2() {
; CHECK-LABEL: @test2
load.i145:
-; CHECK: bitcast ptr @x to ptr, !dbg ![[castLoc2:[0-9]+]]
+; CHECK: bitcast ptr @x to ptr, !dbg ![[castLoc:[0-9]+]]
%x_offset = getelementptr [1 x [2 x <4 x float>]], ptr @x, i32 0, i64 0
ret void
}
-; CHECK: ![[castLoc]] = !DILocation(line: 2
-; CHECK: ![[castLoc2]] = !DILocation(line: 11
\ No newline at end of file
+; CHECK: ![[castLoc]] = !DILocation(line: 1
diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/masked-gather-struct-gep.ll b/llvm/test/Transforms/CodeGenPrepare/X86/masked-gather-struct-gep.ll
index dbd5e87f2c28d..bdd298a9ba786 100644
--- a/llvm/test/Transforms/CodeGenPrepare/X86/masked-gather-struct-gep.ll
+++ b/llvm/test/Transforms/CodeGenPrepare/X86/masked-gather-struct-gep.ll
@@ -10,9 +10,8 @@ declare <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr>, i32 immarg, <4 x i
define <4 x float> @foo(ptr %p) {
; CHECK-LABEL: define <4 x float> @foo
; CHECK-SAME: (ptr [[P:%.*]]) {
-; CHECK-NEXT: [[TMP1:%.*]] = bitcast ptr [[P]] to ptr
-; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, ptr [[TMP1]], <4 x i32> zeroinitializer
-; CHECK-NEXT: [[GATHER:%.*]] = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> [[TMP2]], i32 0, <4 x i1> zeroinitializer, <4 x float> zeroinitializer)
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr float, ptr [[P]], <4 x i32> zeroinitializer
+; CHECK-NEXT: [[GATHER:%.*]] = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> [[TMP1]], i32 0, <4 x i1> zeroinitializer, <4 x float> zeroinitializer)
; CHECK-NEXT: ret <4 x float> [[GATHER]]
;
%base.splatinsert = insertelement <4 x ptr> poison, ptr %p, i32 0
diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/sink-addr-recreate.ll b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addr-recreate.ll
index d0d87b38e0589..73f6aeeb9ad20 100644
--- a/llvm/test/Transforms/CodeGenPrepare/X86/sink-addr-recreate.ll
+++ b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addr-recreate.ll
@@ -18,11 +18,9 @@ define void @addr_from_invoke() personality ptr null {
; CHECK-NEXT: [[PAD:%.*]] = cleanuppad within none []
; CHECK-NEXT: cleanupret from [[PAD]] unwind to caller
; CHECK: [[BODY_1]]:
-; CHECK-NEXT: [[GEP1:%.*]] = bitcast ptr [[PTR]] to ptr
-; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[GEP1]], align 4
-; CHECK-NEXT: [[TMP0:%.*]] = bitcast ptr [[PTR]] to ptr
-; CHECK-NEXT: [[UNUSED:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
-; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP0]], align 4
+; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[PTR]], align 4
+; CHECK-NEXT: [[UNUSED:%.*]] = load <4 x i32>, ptr [[PTR]], align 4
+; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[PTR]], align 4
; CHECK-NEXT: ret void
;
entry:
@@ -51,8 +49,7 @@ define void @addr_from_arg(ptr %ptr, i1 %p) {
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
; CHECK: [[BODY_1]]:
-; CHECK-NEXT: [[TMP0:%.*]] = bitcast ptr [[PTR]] to ptr
-; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP0]], align 4
+; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[PTR]], align 4
; CHECK-NEXT: [[UNUSED:%.*]] = load <4 x i32>, ptr [[PTR]], align 4
; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[PTR]], align 4
; CHECK-NEXT: ret void
diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-base.ll b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-base.ll
index 63fd1845594c6..20ce7e7704aed 100644
--- a/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-base.ll
+++ b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-base.ll
@@ -898,30 +898,45 @@ fallthrough:
; Different types but null is the first?
define i32 @test19(i1 %cond1, i1 %cond2, ptr %b2, ptr %b1) {
-; CHECK-LABEL: define i32 @test19(
-; CHECK-SAME: i1 [[COND1:%.*]], i1 [[COND2:%.*]], ptr [[B2:%.*]], ptr [[B1:%.*]]) {
-; CHECK-NEXT: [[ENTRY:.*:]]
-; CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds i64, ptr [[B2]], i64 5
-; CHECK-NEXT: br i1 [[COND1]], label %[[IF_THEN1:.*]], label %[[IF_THEN2:.*]]
-; CHECK: [[IF_THEN1]]:
-; CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds i8, ptr [[B1]], i64 40
-; CHECK-NEXT: [[BC2:%.*]] = bitcast ptr [[G2]] to ptr
-; CHECK-NEXT: br label %[[FALLTHROUGH:.*]]
-; CHECK: [[IF_THEN2]]:
-; CHECK-NEXT: [[BC1_1:%.*]] = bitcast ptr [[G1]] to ptr
-; CHECK-NEXT: br i1 [[COND2]], label %[[FALLTHROUGH]], label %[[IF_THEN3:.*]]
-; CHECK: [[IF_THEN3]]:
-; CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds i64, ptr null, i64 5
-; CHECK-NEXT: [[BC1_2:%.*]] = bitcast ptr [[G3]] to ptr
-; CHECK-NEXT: br label %[[FALLTHROUGH]]
-; CHECK: [[FALLTHROUGH]]:
-; CHECK-NEXT: [[C:%.*]] = phi ptr [ [[BC2]], %[[IF_THEN1]] ], [ [[BC1_1]], %[[IF_THEN2]] ], [ [[BC1_2]], %[[IF_THEN3]] ]
-; CHECK-NEXT: [[V1:%.*]] = load i32, ptr [[C]], align 4
-; CHECK-NEXT: [[G1_1:%.*]] = getelementptr inbounds i64, ptr [[B2]], i64 5
-; CHECK-NEXT: [[BC1_1_1:%.*]] = bitcast ptr [[G1_1]] to ptr
-; CHECK-NEXT: [[V2:%.*]] = load i32, ptr [[BC1_1_1]], align 4
-; CHECK-NEXT: [[V:%.*]] = add i32 [[V1]], [[V2]]
-; CHECK-NEXT: ret i32 [[V]]
+; CHECK-YES-LABEL: define i32 @test19(
+; CHECK-YES-SAME: i1 [[COND1:%.*]], i1 [[COND2:%.*]], ptr [[B2:%.*]], ptr [[B1:%.*]]) {
+; CHECK-YES-NEXT: [[ENTRY:.*:]]
+; CHECK-YES-NEXT: br i1 [[COND1]], label %[[IF_THEN1:.*]], label %[[IF_THEN2:.*]]
+; CHECK-YES: [[IF_THEN1]]:
+; CHECK-YES-NEXT: br label %[[FALLTHROUGH:.*]]
+; CHECK-YES: [[IF_THEN2]]:
+; CHECK-YES-NEXT: br i1 [[COND2]], label %[[FALLTHROUGH]], label %[[IF_THEN3:.*]]
+; CHECK-YES: [[IF_THEN3]]:
+; CHECK-YES-NEXT: br label %[[FALLTHROUGH]]
+; CHECK-YES: [[FALLTHROUGH]]:
+; CHECK-YES-NEXT: [[SUNK_PHI:%.*]] = phi ptr [ null, %[[IF_THEN3]] ], [ [[B2]], %[[IF_THEN2]] ], [ [[B1]], %[[IF_THEN1]] ]
+; CHECK-YES-NEXT: [[SUNKADDR:%.*]] = getelementptr inbounds i8, ptr [[SUNK_PHI]], i64 40
+; CHECK-YES-NEXT: [[V1:%.*]] = load i32, ptr [[SUNKADDR]], align 4
+; CHECK-YES-NEXT: [[G1_1:%.*]] = getelementptr inbounds i64, ptr [[B2]], i64 5
+; CHECK-YES-NEXT: [[V2:%.*]] = load i32, ptr [[G1_1]], align 4
+; CHECK-YES-NEXT: [[V:%.*]] = add i32 [[V1]], [[V2]]
+; CHECK-YES-NEXT: ret i32 [[V]]
+;
+; CHECK-NO-LABEL: define i32 @test19(
+; CHECK-NO-SAME: i1 [[COND1:%.*]], i1 [[COND2:%.*]], ptr [[B2:%.*]], ptr [[B1:%.*]]) {
+; CHECK-NO-NEXT: [[ENTRY:.*:]]
+; CHECK-NO-NEXT: [[G1:%.*]] = getelementptr inbounds i64, ptr [[B2]], i64 5
+; CHECK-NO-NEXT: br i1 [[COND1]], label %[[IF_THEN1:.*]], label %[[IF_THEN2:.*]]
+; CHECK-NO: [[IF_THEN1]]:
+; CHECK-NO-NEXT: [[G2:%.*]] = getelementptr inbounds i8, ptr [[B1]], i64 40
+; CHECK-NO-NEXT: br label %[[FALLTHROUGH:.*]]
+; CHECK-NO: [[IF_THEN2]]:
+; CHECK-NO-NEXT: br i1 [[COND2]], label %[[FALLTHROUGH]], label %[[IF_THEN3:.*]]
+; CHECK-NO: [[IF_THEN3]]:
+; CHECK-NO-NEXT: [[G3:%.*]] = getelementptr inbounds i64, ptr null, i64 5
+; CHECK-NO-NEXT: br label %[[FALLTHROUGH]]
+; CHECK-NO: [[FALLTHROUGH]]:
+; CHECK-NO-NEXT: [[C:%.*]] = phi ptr [ [[G2]], %[[IF_THEN1]] ], [ [[G1]], %[[IF_THEN2]] ], [ [[G3]], %[[IF_THEN3]] ]
+; CHECK-NO-NEXT: [[V1:%.*]] = load i32, ptr [[C]], align 4
+; CHECK-NO-NEXT: [[G1_1:%.*]] = getelementptr inbounds i64, ptr [[B2]], i64 5
+; CHECK-NO-NEXT: [[V2:%.*]] = load i32, ptr [[G1_1]], align 4
+; CHECK-NO-NEXT: [[V:%.*]] = add i32 [[V1]], [[V2]]
+; CHECK-NO-NEXT: ret i32 [[V]]
;
entry:
%g1 = getelementptr inbounds i64, ptr %b2, i64 5
diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-two-phi.ll b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-two-phi.ll
index 2945a007bf578..819b7c571efcb 100644
--- a/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-two-phi.ll
+++ b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-two-phi.ll
@@ -11,8 +11,7 @@ define void @test() {
; CHECK-NEXT: br label %[[START:.*]]
; CHECK: [[START]]:
; CHECK-NEXT: [[VAL1:%.*]] = phi ptr [ [[TMP0]], %[[ENTRY]] ], [ [[VAL1]], %[[START]] ]
-; CHECK-NEXT: [[VAL2:%.*]] = phi ptr [ null, %[[ENTRY]] ], [ [[VAL2]], %[[START]] ]
-; CHECK-NEXT: [[SUNKADDR2:%.*]] = bitcast ptr [[VAL2]] to ptr
+; CHECK-NEXT: [[SUNKADDR2:%.*]] = phi ptr [ null, %[[ENTRY]] ], [ [[SUNKADDR2]], %[[START]] ]
; CHECK-NEXT: [[LOADX:%.*]] = load i64, ptr [[SUNKADDR2]], align 8
; CHECK-NEXT: br label %[[START]]
;
More information about the llvm-commits
mailing list