[llvm] 1578c67 - [NFC][SROA] Variably-indexed load: add test variation w/ upper half of alloca being zeros

Roman Lebedev via llvm-commits llvm-commits at lists.llvm.org
Fri Dec 23 09:17:02 PST 2022


Author: Roman Lebedev
Date: 2022-12-23T20:16:41+03:00
New Revision: 1578c670ff3c0058ac6043e55e40d37d260937bd

URL: https://github.com/llvm/llvm-project/commit/1578c670ff3c0058ac6043e55e40d37d260937bd
DIFF: https://github.com/llvm/llvm-project/commit/1578c670ff3c0058ac6043e55e40d37d260937bd.diff

LOG: [NFC][SROA] Variably-indexed load: add test variation w/ upper half of alloca being zeros

This is the actual pattern i'm looking at.

Added: 
    llvm/test/Transforms/SROA/widen-load-of-small-alloca-special-cases.ll
    llvm/test/Transforms/SROA/widen-load-of-small-alloca-with-zero-upper-half.ll

Modified: 
    llvm/test/Transforms/SROA/widen-load-of-small-alloca.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/Transforms/SROA/widen-load-of-small-alloca-special-cases.ll b/llvm/test/Transforms/SROA/widen-load-of-small-alloca-special-cases.ll
new file mode 100644
index 0000000000000..a8b9bcbca1cf3
--- /dev/null
+++ b/llvm/test/Transforms/SROA/widen-load-of-small-alloca-special-cases.ll
@@ -0,0 +1,321 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -passes='sroa<preserve-cfg>' -data-layout="e-n8:16:32:64" -S %s | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-SCALAR,CHECK-SCALAR-64,CHECK-LE-64
+; RUN: opt -passes='sroa<modify-cfg>' -data-layout="e-n8:16:32:64" -S %s | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-SCALAR,CHECK-SCALAR-64,CHECK-LE-64
+; RUN: opt -passes='sroa<preserve-cfg>' -data-layout="e-n8:16:32" -S %s | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-SCALAR,CHECK-SCALAR-32,CHECK-LE-32
+; RUN: opt -passes='sroa<modify-cfg>' -data-layout="e-n8:16:32" -S %s | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-SCALAR,CHECK-SCALAR-32,CHECK-LE-32
+; RUN: opt -passes='sroa<preserve-cfg>' -data-layout="E-n8:16:32:64" -S %s | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-SCALAR,CHECK-SCALAR-64,CHECK-BE-64
+; RUN: opt -passes='sroa<modify-cfg>' -data-layout="E-n8:16:32:64" -S %s | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-SCALAR,CHECK-SCALAR-64,CHECK-BE-64
+; RUN: opt -passes='sroa<preserve-cfg>' -data-layout="E-n8:16:32" -S %s | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-SCALAR,CHECK-SCALAR-32,CHECK-BE-32
+; RUN: opt -passes='sroa<modify-cfg>' -data-layout="E-n8:16:32" -S %s | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-SCALAR,CHECK-SCALAR-32,CHECK-BE-32
+
+;; Special test
+
+define void @load_2byte_chunk_of_8byte_alloca_with_2byte_step(ptr %src, i64 %byteOff, ptr %dst) nounwind {
+; CHECK-ALL-LABEL: @load_2byte_chunk_of_8byte_alloca_with_2byte_step(
+; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [8 x i8], align 64
+; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <8 x i8>, ptr [[SRC:%.*]], align 1
+; CHECK-ALL-NEXT:    store <8 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
+; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i16, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]]
+; CHECK-ALL-NEXT:    [[CHUNK:%.*]] = load <2 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1
+; CHECK-ALL-NEXT:    store <2 x i8> [[CHUNK]], ptr [[DST:%.*]], align 2
+; CHECK-ALL-NEXT:    ret void
+;
+  %intermediate = alloca [8 x i8], align 64
+  %init = load <8 x i8>, ptr %src, align 1
+  store <8 x i8> %init, ptr %intermediate, align 64
+  %intermediate.off.addr = getelementptr inbounds i16, ptr %intermediate, i64 %byteOff
+  %chunk = load <2 x i8>, ptr %intermediate.off.addr, align 1
+  store <2 x i8> %chunk, ptr %dst
+  ret void
+}
+
+define void @load_volatile_2byte_chunk_of_8byte_alloca_with_2byte_step(ptr %src, i64 %byteOff, ptr %dst) nounwind {
+; CHECK-ALL-LABEL: @load_volatile_2byte_chunk_of_8byte_alloca_with_2byte_step(
+; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [8 x i8], align 64
+; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <8 x i8>, ptr [[SRC:%.*]], align 1
+; CHECK-ALL-NEXT:    store <8 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
+; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i16, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]]
+; CHECK-ALL-NEXT:    [[CHUNK:%.*]] = load volatile <2 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1
+; CHECK-ALL-NEXT:    store <2 x i8> [[CHUNK]], ptr [[DST:%.*]], align 2
+; CHECK-ALL-NEXT:    ret void
+;
+  %intermediate = alloca [8 x i8], align 64
+  %init = load <8 x i8>, ptr %src, align 1
+  store <8 x i8> %init, ptr %intermediate, align 64
+  %intermediate.off.addr = getelementptr inbounds i16, ptr %intermediate, i64 %byteOff
+  %chunk = load volatile <2 x i8>, ptr %intermediate.off.addr, align 1
+  store <2 x i8> %chunk, ptr %dst
+  ret void
+}
+
+define void @store_2byte_chunk_of_8byte_alloca_with_2byte_step(ptr %src, i64 %byteOff, <2 x i8> %reinit, ptr %dst) nounwind {
+; CHECK-ALL-LABEL: @store_2byte_chunk_of_8byte_alloca_with_2byte_step(
+; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [8 x i8], align 64
+; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <8 x i8>, ptr [[SRC:%.*]], align 1
+; CHECK-ALL-NEXT:    store <8 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
+; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i16, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]]
+; CHECK-ALL-NEXT:    store <2 x i8> [[REINIT:%.*]], ptr [[INTERMEDIATE_OFF_ADDR]], align 1
+; CHECK-ALL-NEXT:    [[FINAL:%.*]] = load <8 x i8>, ptr [[INTERMEDIATE]], align 1
+; CHECK-ALL-NEXT:    store <8 x i8> [[FINAL]], ptr [[DST:%.*]], align 8
+; CHECK-ALL-NEXT:    ret void
+;
+  %intermediate = alloca [8 x i8], align 64
+  %init = load <8 x i8>, ptr %src, align 1
+  store <8 x i8> %init, ptr %intermediate, align 64
+  %intermediate.off.addr = getelementptr inbounds i16, ptr %intermediate, i64 %byteOff
+  store <2 x i8> %reinit, ptr %intermediate.off.addr, align 1
+  %final = load <8 x i8>, ptr %intermediate, align 1
+  store <8 x i8> %final, ptr %dst
+  ret void
+}
+
+define void @store_volatile_2byte_chunk_of_8byte_alloca_with_2byte_step(ptr %src, i64 %byteOff, <2 x i8> %reinit, ptr %dst) nounwind {
+; CHECK-ALL-LABEL: @store_volatile_2byte_chunk_of_8byte_alloca_with_2byte_step(
+; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [8 x i8], align 64
+; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <8 x i8>, ptr [[SRC:%.*]], align 1
+; CHECK-ALL-NEXT:    store <8 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
+; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i16, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]]
+; CHECK-ALL-NEXT:    store volatile <2 x i8> [[REINIT:%.*]], ptr [[INTERMEDIATE_OFF_ADDR]], align 1
+; CHECK-ALL-NEXT:    [[FINAL:%.*]] = load <8 x i8>, ptr [[INTERMEDIATE]], align 1
+; CHECK-ALL-NEXT:    store <8 x i8> [[FINAL]], ptr [[DST:%.*]], align 8
+; CHECK-ALL-NEXT:    ret void
+;
+  %intermediate = alloca [8 x i8], align 64
+  %init = load <8 x i8>, ptr %src, align 1
+  store <8 x i8> %init, ptr %intermediate, align 64
+  %intermediate.off.addr = getelementptr inbounds i16, ptr %intermediate, i64 %byteOff
+  store volatile <2 x i8> %reinit, ptr %intermediate.off.addr, align 1
+  %final = load <8 x i8>, ptr %intermediate, align 1
+  store <8 x i8> %final, ptr %dst
+  ret void
+}
+
+define void @load_2byte_chunk_of_8byte_alloca_with_2byte_step_with_constant_offset_beforehand(ptr %src, i64 %byteOff, ptr %dst) nounwind {
+; CHECK-ALL-LABEL: @load_2byte_chunk_of_8byte_alloca_with_2byte_step_with_constant_offset_beforehand(
+; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [8 x i8], align 64
+; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <8 x i8>, ptr [[SRC:%.*]], align 1
+; CHECK-ALL-NEXT:    store <8 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
+; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR_CST:%.*]] = getelementptr inbounds i16, ptr [[INTERMEDIATE]], i64 1
+; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i16, ptr [[INTERMEDIATE_OFF_ADDR_CST]], i64 [[BYTEOFF:%.*]]
+; CHECK-ALL-NEXT:    [[CHUNK:%.*]] = load <2 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1
+; CHECK-ALL-NEXT:    store <2 x i8> [[CHUNK]], ptr [[DST:%.*]], align 2
+; CHECK-ALL-NEXT:    ret void
+;
+  %intermediate = alloca [8 x i8], align 64
+  %init = load <8 x i8>, ptr %src, align 1
+  store <8 x i8> %init, ptr %intermediate, align 64
+  %intermediate.off.addr.cst = getelementptr inbounds i16, ptr %intermediate, i64 1
+  %intermediate.off.addr = getelementptr inbounds i16, ptr %intermediate.off.addr.cst, i64 %byteOff
+  %chunk = load <2 x i8>, ptr %intermediate.off.addr, align 1
+  store <2 x i8> %chunk, ptr %dst
+  ret void
+}
+
+define void @load_2byte_chunk_of_8byte_alloca_with_2byte_step_with_constant_offset_afterwards(ptr %src, i64 %byteOff, ptr %dst) nounwind {
+; CHECK-ALL-LABEL: @load_2byte_chunk_of_8byte_alloca_with_2byte_step_with_constant_offset_afterwards(
+; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [8 x i8], align 64
+; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <8 x i8>, ptr [[SRC:%.*]], align 1
+; CHECK-ALL-NEXT:    store <8 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
+; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR_VARIABLE:%.*]] = getelementptr inbounds i16, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]]
+; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i16, ptr [[INTERMEDIATE_OFF_ADDR_VARIABLE]], i64 1
+; CHECK-ALL-NEXT:    [[CHUNK:%.*]] = load <2 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1
+; CHECK-ALL-NEXT:    store <2 x i8> [[CHUNK]], ptr [[DST:%.*]], align 2
+; CHECK-ALL-NEXT:    ret void
+;
+  %intermediate = alloca [8 x i8], align 64
+  %init = load <8 x i8>, ptr %src, align 1
+  store <8 x i8> %init, ptr %intermediate, align 64
+  %intermediate.off.addr.variable = getelementptr inbounds i16, ptr %intermediate, i64 %byteOff
+  %intermediate.off.addr = getelementptr inbounds i16, ptr %intermediate.off.addr.variable, i64 1
+  %chunk = load <2 x i8>, ptr %intermediate.off.addr, align 1
+  store <2 x i8> %chunk, ptr %dst
+  ret void
+}
+
+define void @load_2byte_chunk_of_8byte_alloca_with_2byte_step_with_variable_offset_inbetween_constant_offsets(ptr %src, i64 %byteOff, ptr %dst) nounwind {
+; CHECK-ALL-LABEL: @load_2byte_chunk_of_8byte_alloca_with_2byte_step_with_variable_offset_inbetween_constant_offsets(
+; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [8 x i8], align 64
+; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <8 x i8>, ptr [[SRC:%.*]], align 1
+; CHECK-ALL-NEXT:    store <8 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
+; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR_CST:%.*]] = getelementptr inbounds i16, ptr [[INTERMEDIATE]], i64 1
+; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR_VARIABLE:%.*]] = getelementptr inbounds i16, ptr [[INTERMEDIATE_OFF_ADDR_CST]], i64 [[BYTEOFF:%.*]]
+; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i16, ptr [[INTERMEDIATE_OFF_ADDR_VARIABLE]], i64 1
+; CHECK-ALL-NEXT:    [[CHUNK:%.*]] = load <2 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1
+; CHECK-ALL-NEXT:    store <2 x i8> [[CHUNK]], ptr [[DST:%.*]], align 2
+; CHECK-ALL-NEXT:    ret void
+;
+  %intermediate = alloca [8 x i8], align 64
+  %init = load <8 x i8>, ptr %src, align 1
+  store <8 x i8> %init, ptr %intermediate, align 64
+  %intermediate.off.addr.cst = getelementptr inbounds i16, ptr %intermediate, i64 1
+  %intermediate.off.addr.variable = getelementptr inbounds i16, ptr %intermediate.off.addr.cst, i64 %byteOff
+  %intermediate.off.addr = getelementptr inbounds i16, ptr %intermediate.off.addr.variable, i64 1
+  %chunk = load <2 x i8>, ptr %intermediate.off.addr, align 1
+  store <2 x i8> %chunk, ptr %dst
+  ret void
+}
+
+define void @load_2byte_chunk_of_8byte_alloca_with_2byte_step_select_of_variable_geps(ptr %src, i64 %byteOff0, i64 %byteOff1, i1 %cond, ptr %dst) nounwind {
+; CHECK-ALL-LABEL: @load_2byte_chunk_of_8byte_alloca_with_2byte_step_select_of_variable_geps(
+; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [8 x i8], align 64
+; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <8 x i8>, ptr [[SRC:%.*]], align 1
+; CHECK-ALL-NEXT:    store <8 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
+; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF0:%.*]] = getelementptr inbounds i16, ptr [[INTERMEDIATE]], i64 [[BYTEOFF0:%.*]]
+; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF1:%.*]] = getelementptr inbounds i16, ptr [[INTERMEDIATE]], i64 [[BYTEOFF1:%.*]]
+; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR:%.*]] = select i1 [[COND:%.*]], ptr [[INTERMEDIATE_OFF0]], ptr [[INTERMEDIATE_OFF1]]
+; CHECK-ALL-NEXT:    [[CHUNK:%.*]] = load <2 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1
+; CHECK-ALL-NEXT:    store <2 x i8> [[CHUNK]], ptr [[DST:%.*]], align 2
+; CHECK-ALL-NEXT:    ret void
+;
+  %intermediate = alloca [8 x i8], align 64
+  %init = load <8 x i8>, ptr %src, align 1
+  store <8 x i8> %init, ptr %intermediate, align 64
+  %intermediate.off0 = getelementptr inbounds i16, ptr %intermediate, i64 %byteOff0
+  %intermediate.off1 = getelementptr inbounds i16, ptr %intermediate, i64 %byteOff1
+  %intermediate.off.addr = select i1 %cond, ptr %intermediate.off0, ptr %intermediate.off1
+  %chunk = load <2 x i8>, ptr %intermediate.off.addr, align 1
+  store <2 x i8> %chunk, ptr %dst
+  ret void
+}
+
+define void @load_2byte_chunk_of_8byte_alloca_with_2byte_step_select_of_variable_and_const_geps(ptr %src, i64 %byteOff0, i64 %byteOff1, i1 %cond, ptr %dst) nounwind {
+; CHECK-ALL-LABEL: @load_2byte_chunk_of_8byte_alloca_with_2byte_step_select_of_variable_and_const_geps(
+; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [8 x i8], align 64
+; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <8 x i8>, ptr [[SRC:%.*]], align 1
+; CHECK-ALL-NEXT:    store <8 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
+; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF0:%.*]] = getelementptr inbounds i16, ptr [[INTERMEDIATE]], i64 1
+; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF1:%.*]] = getelementptr inbounds i16, ptr [[INTERMEDIATE]], i64 [[BYTEOFF1:%.*]]
+; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR:%.*]] = select i1 [[COND:%.*]], ptr [[INTERMEDIATE_OFF0]], ptr [[INTERMEDIATE_OFF1]]
+; CHECK-ALL-NEXT:    [[CHUNK:%.*]] = load <2 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1
+; CHECK-ALL-NEXT:    store <2 x i8> [[CHUNK]], ptr [[DST:%.*]], align 2
+; CHECK-ALL-NEXT:    ret void
+;
+  %intermediate = alloca [8 x i8], align 64
+  %init = load <8 x i8>, ptr %src, align 1
+  store <8 x i8> %init, ptr %intermediate, align 64
+  %intermediate.off0 = getelementptr inbounds i16, ptr %intermediate, i64 1
+  %intermediate.off1 = getelementptr inbounds i16, ptr %intermediate, i64 %byteOff1
+  %intermediate.off.addr = select i1 %cond, ptr %intermediate.off0, ptr %intermediate.off1
+  %chunk = load <2 x i8>, ptr %intermediate.off.addr, align 1
+  store <2 x i8> %chunk, ptr %dst
+  ret void
+}
+
+define void @load_2byte_chunk_of_8byte_alloca_with_2byte_step_variable_gep_of_select_of_const_geps(ptr %src, i64 %byteOff, i1 %cond, ptr %dst) nounwind {
+; CHECK-ALL-LABEL: @load_2byte_chunk_of_8byte_alloca_with_2byte_step_variable_gep_of_select_of_const_geps(
+; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [8 x i8], align 64
+; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <8 x i8>, ptr [[SRC:%.*]], align 1
+; CHECK-ALL-NEXT:    store <8 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
+; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF0:%.*]] = getelementptr inbounds i16, ptr [[INTERMEDIATE]], i64 0
+; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF1:%.*]] = getelementptr inbounds i16, ptr [[INTERMEDIATE]], i64 2
+; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR:%.*]] = select i1 [[COND:%.*]], ptr [[INTERMEDIATE_OFF0]], ptr [[INTERMEDIATE_OFF1]]
+; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR_VAR:%.*]] = getelementptr inbounds i16, ptr [[INTERMEDIATE_OFF_ADDR]], i64 [[BYTEOFF:%.*]]
+; CHECK-ALL-NEXT:    [[CHUNK:%.*]] = load <2 x i8>, ptr [[INTERMEDIATE_OFF_ADDR_VAR]], align 1
+; CHECK-ALL-NEXT:    store <2 x i8> [[CHUNK]], ptr [[DST:%.*]], align 2
+; CHECK-ALL-NEXT:    ret void
+;
+  %intermediate = alloca [8 x i8], align 64
+  %init = load <8 x i8>, ptr %src, align 1
+  store <8 x i8> %init, ptr %intermediate, align 64
+  %intermediate.off0 = getelementptr inbounds i16, ptr %intermediate, i64 0
+  %intermediate.off1 = getelementptr inbounds i16, ptr %intermediate, i64 2
+  %intermediate.off.addr = select i1 %cond, ptr %intermediate.off0, ptr %intermediate.off1
+  %intermediate.off.addr.var = getelementptr inbounds i16, ptr %intermediate.off.addr, i64 %byteOff
+  %chunk = load <2 x i8>, ptr %intermediate.off.addr.var, align 1
+  store <2 x i8> %chunk, ptr %dst
+  ret void
+}
+
+define void @load_ptr_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind {
+; CHECK-ALL-LABEL: @load_ptr_chunk_of_16byte_alloca(
+; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [16 x i8], align 64
+; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <16 x i8>, ptr [[SRC:%.*]], align 1
+; CHECK-ALL-NEXT:    store <16 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
+; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]]
+; CHECK-ALL-NEXT:    [[CHUNK:%.*]] = load <1 x ptr>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1
+; CHECK-ALL-NEXT:    store <1 x ptr> [[CHUNK]], ptr [[DST:%.*]], align 8
+; CHECK-ALL-NEXT:    ret void
+;
+  %intermediate = alloca [16 x i8], align 64
+  %init = load <16 x i8>, ptr %src, align 1
+  store <16 x i8> %init, ptr %intermediate, align 64
+  %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff
+  %chunk = load <1 x ptr>, ptr %intermediate.off.addr, align 1
+  store <1 x ptr> %chunk, ptr %dst
+  ret void
+}
+
+define void @load_float_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind {
+; CHECK-ALL-LABEL: @load_float_chunk_of_16byte_alloca(
+; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [16 x i8], align 64
+; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <16 x i8>, ptr [[SRC:%.*]], align 1
+; CHECK-ALL-NEXT:    store <16 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
+; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]]
+; CHECK-ALL-NEXT:    [[CHUNK:%.*]] = load <1 x float>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1
+; CHECK-ALL-NEXT:    store <1 x float> [[CHUNK]], ptr [[DST:%.*]], align 4
+; CHECK-ALL-NEXT:    ret void
+;
+  %intermediate = alloca [16 x i8], align 64
+  %init = load <16 x i8>, ptr %src, align 1
+  store <16 x i8> %init, ptr %intermediate, align 64
+  %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff
+  %chunk = load <1 x float>, ptr %intermediate.off.addr, align 1
+  store <1 x float> %chunk, ptr %dst
+  ret void
+}
+
+define void @two_loads_of_same_2byte_chunks_of_8byte_alloca_with_2byte_step_variable_gep(ptr %src, i64 %byteOff, ptr %dst0, ptr %dst1) nounwind {
+; CHECK-ALL-LABEL: @two_loads_of_same_2byte_chunks_of_8byte_alloca_with_2byte_step_variable_gep(
+; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [8 x i8], align 64
+; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <8 x i8>, ptr [[SRC:%.*]], align 1
+; CHECK-ALL-NEXT:    store <8 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
+; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF:%.*]] = getelementptr inbounds i16, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]]
+; CHECK-ALL-NEXT:    [[CHUNK0:%.*]] = load <2 x i8>, ptr [[INTERMEDIATE_OFF]], align 1
+; CHECK-ALL-NEXT:    store <2 x i8> [[CHUNK0]], ptr [[DST0:%.*]], align 2
+; CHECK-ALL-NEXT:    [[CHUNK1:%.*]] = load <2 x i8>, ptr [[INTERMEDIATE_OFF]], align 1
+; CHECK-ALL-NEXT:    store <2 x i8> [[CHUNK1]], ptr [[DST1:%.*]], align 2
+; CHECK-ALL-NEXT:    ret void
+;
+  %intermediate = alloca [8 x i8], align 64
+  %init = load <8 x i8>, ptr %src, align 1
+  store <8 x i8> %init, ptr %intermediate, align 64
+  %intermediate.off = getelementptr inbounds i16, ptr %intermediate, i64 %byteOff
+  %chunk0 = load <2 x i8>, ptr %intermediate.off, align 1
+  store <2 x i8> %chunk0, ptr %dst0
+  %chunk1 = load <2 x i8>, ptr %intermediate.off, align 1
+  store <2 x i8> %chunk1, ptr %dst1
+  ret void
+}
+
+define void @two_loads_of_two_2byte_chunks_of_8byte_alloca_with_2byte_step_variable_geps(ptr %src, i64 %byteOff0, i64 %byteOff1, ptr %dst0, ptr %dst1) nounwind {
+; CHECK-ALL-LABEL: @two_loads_of_two_2byte_chunks_of_8byte_alloca_with_2byte_step_variable_geps(
+; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [8 x i8], align 64
+; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <8 x i8>, ptr [[SRC:%.*]], align 1
+; CHECK-ALL-NEXT:    store <8 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
+; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF0:%.*]] = getelementptr inbounds i16, ptr [[INTERMEDIATE]], i64 [[BYTEOFF0:%.*]]
+; CHECK-ALL-NEXT:    [[CHUNK0:%.*]] = load <2 x i8>, ptr [[INTERMEDIATE_OFF0]], align 1
+; CHECK-ALL-NEXT:    store <2 x i8> [[CHUNK0]], ptr [[DST0:%.*]], align 2
+; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF1:%.*]] = getelementptr inbounds i16, ptr [[INTERMEDIATE_OFF0]], i64 [[BYTEOFF1:%.*]]
+; CHECK-ALL-NEXT:    [[CHUNK1:%.*]] = load <2 x i8>, ptr [[INTERMEDIATE_OFF1]], align 1
+; CHECK-ALL-NEXT:    store <2 x i8> [[CHUNK1]], ptr [[DST1:%.*]], align 2
+; CHECK-ALL-NEXT:    ret void
+;
+  %intermediate = alloca [8 x i8], align 64
+  %init = load <8 x i8>, ptr %src, align 1
+  store <8 x i8> %init, ptr %intermediate, align 64
+  %intermediate.off0 = getelementptr inbounds i16, ptr %intermediate, i64 %byteOff0
+  %chunk0 = load <2 x i8>, ptr %intermediate.off0, align 1
+  store <2 x i8> %chunk0, ptr %dst0
+  %intermediate.off1 = getelementptr inbounds i16, ptr %intermediate.off0, i64 %byteOff1
+  %chunk1 = load <2 x i8>, ptr %intermediate.off1, align 1
+  store <2 x i8> %chunk1, ptr %dst1
+  ret void
+}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK-BE-32: {{.*}}
+; CHECK-BE-64: {{.*}}
+; CHECK-LE-32: {{.*}}
+; CHECK-LE-64: {{.*}}
+; CHECK-SCALAR: {{.*}}
+; CHECK-SCALAR-32: {{.*}}
+; CHECK-SCALAR-64: {{.*}}

diff  --git a/llvm/test/Transforms/SROA/widen-load-of-small-alloca-with-zero-upper-half.ll b/llvm/test/Transforms/SROA/widen-load-of-small-alloca-with-zero-upper-half.ll
new file mode 100644
index 0000000000000..00e633858fcf0
--- /dev/null
+++ b/llvm/test/Transforms/SROA/widen-load-of-small-alloca-with-zero-upper-half.ll
@@ -0,0 +1,458 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -passes='sroa<preserve-cfg>' -data-layout="e-n8:16:32:64" -S %s | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-SCALAR,CHECK-SCALAR-64,CHECK-LE-64
+; RUN: opt -passes='sroa<modify-cfg>' -data-layout="e-n8:16:32:64" -S %s | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-SCALAR,CHECK-SCALAR-64,CHECK-LE-64
+; RUN: opt -passes='sroa<preserve-cfg>' -data-layout="e-n8:16:32" -S %s | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-SCALAR,CHECK-SCALAR-32,CHECK-LE-32
+; RUN: opt -passes='sroa<modify-cfg>' -data-layout="e-n8:16:32" -S %s | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-SCALAR,CHECK-SCALAR-32,CHECK-LE-32
+; RUN: opt -passes='sroa<preserve-cfg>' -data-layout="E-n8:16:32:64" -S %s | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-SCALAR,CHECK-SCALAR-64,CHECK-BE-64
+; RUN: opt -passes='sroa<modify-cfg>' -data-layout="E-n8:16:32:64" -S %s | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-SCALAR,CHECK-SCALAR-64,CHECK-BE-64
+; RUN: opt -passes='sroa<preserve-cfg>' -data-layout="E-n8:16:32" -S %s | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-SCALAR,CHECK-SCALAR-32,CHECK-BE-32
+; RUN: opt -passes='sroa<modify-cfg>' -data-layout="E-n8:16:32" -S %s | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-SCALAR,CHECK-SCALAR-32,CHECK-BE-32
+
+define void @load_1byte_chunk_of_2byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind {
+; CHECK-ALL-LABEL: @load_1byte_chunk_of_2byte_alloca_with_zero_upper_half(
+; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [2 x i8], align 64
+; CHECK-ALL-NEXT:    store <2 x i8> zeroinitializer, ptr [[INTERMEDIATE]], align 64
+; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <1 x i8>, ptr [[SRC:%.*]], align 1
+; CHECK-ALL-NEXT:    store <1 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
+; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]]
+; CHECK-ALL-NEXT:    [[CHUNK:%.*]] = load <1 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1
+; CHECK-ALL-NEXT:    store <1 x i8> [[CHUNK]], ptr [[DST:%.*]], align 1
+; CHECK-ALL-NEXT:    ret void
+;
+  %intermediate = alloca [2 x i8], align 64
+  store <2 x i8> zeroinitializer,  ptr %intermediate, align 64
+  %init = load <1 x i8>, ptr %src, align 1
+  store <1 x i8> %init, ptr %intermediate, align 64
+  %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff
+  %chunk = load <1 x i8>, ptr %intermediate.off.addr, align 1
+  store <1 x i8> %chunk, ptr %dst
+  ret void
+}
+
+define void @load_1byte_chunk_of_4byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind {
+; CHECK-ALL-LABEL: @load_1byte_chunk_of_4byte_alloca_with_zero_upper_half(
+; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [4 x i8], align 64
+; CHECK-ALL-NEXT:    store <4 x i8> zeroinitializer, ptr [[INTERMEDIATE]], align 64
+; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <2 x i8>, ptr [[SRC:%.*]], align 1
+; CHECK-ALL-NEXT:    store <2 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
+; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]]
+; CHECK-ALL-NEXT:    [[CHUNK:%.*]] = load <1 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1
+; CHECK-ALL-NEXT:    store <1 x i8> [[CHUNK]], ptr [[DST:%.*]], align 1
+; CHECK-ALL-NEXT:    ret void
+;
+  %intermediate = alloca [4 x i8], align 64
+  store <4 x i8> zeroinitializer,  ptr %intermediate, align 64
+  %init = load <2 x i8>, ptr %src, align 1
+  store <2 x i8> %init, ptr %intermediate, align 64
+  %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff
+  %chunk = load <1 x i8>, ptr %intermediate.off.addr, align 1
+  store <1 x i8> %chunk, ptr %dst
+  ret void
+}
+
+define void @load_2byte_chunk_of_4byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind {
+; CHECK-ALL-LABEL: @load_2byte_chunk_of_4byte_alloca_with_zero_upper_half(
+; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [4 x i8], align 64
+; CHECK-ALL-NEXT:    store <4 x i8> zeroinitializer, ptr [[INTERMEDIATE]], align 64
+; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <2 x i8>, ptr [[SRC:%.*]], align 1
+; CHECK-ALL-NEXT:    store <2 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
+; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]]
+; CHECK-ALL-NEXT:    [[CHUNK:%.*]] = load <2 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1
+; CHECK-ALL-NEXT:    store <2 x i8> [[CHUNK]], ptr [[DST:%.*]], align 2
+; CHECK-ALL-NEXT:    ret void
+;
+  %intermediate = alloca [4 x i8], align 64
+  store <4 x i8> zeroinitializer,  ptr %intermediate, align 64
+  %init = load <2 x i8>, ptr %src, align 1
+  store <2 x i8> %init, ptr %intermediate, align 64
+  %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff
+  %chunk = load <2 x i8>, ptr %intermediate.off.addr, align 1
+  store <2 x i8> %chunk, ptr %dst
+  ret void
+}
+
+define void @load_1byte_chunk_of_8byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind {
+; CHECK-ALL-LABEL: @load_1byte_chunk_of_8byte_alloca_with_zero_upper_half(
+; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [8 x i8], align 64
+; CHECK-ALL-NEXT:    store <8 x i8> zeroinitializer, ptr [[INTERMEDIATE]], align 64
+; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <4 x i8>, ptr [[SRC:%.*]], align 1
+; CHECK-ALL-NEXT:    store <4 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
+; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]]
+; CHECK-ALL-NEXT:    [[CHUNK:%.*]] = load <1 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1
+; CHECK-ALL-NEXT:    store <1 x i8> [[CHUNK]], ptr [[DST:%.*]], align 1
+; CHECK-ALL-NEXT:    ret void
+;
+  %intermediate = alloca [8 x i8], align 64
+  store <8 x i8> zeroinitializer,  ptr %intermediate, align 64
+  %init = load <4 x i8>, ptr %src, align 1
+  store <4 x i8> %init, ptr %intermediate, align 64
+  %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff
+  %chunk = load <1 x i8>, ptr %intermediate.off.addr, align 1
+  store <1 x i8> %chunk, ptr %dst
+  ret void
+}
+
+define void @load_2byte_chunk_of_8byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind {
+; CHECK-ALL-LABEL: @load_2byte_chunk_of_8byte_alloca_with_zero_upper_half(
+; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [8 x i8], align 64
+; CHECK-ALL-NEXT:    store <8 x i8> zeroinitializer, ptr [[INTERMEDIATE]], align 64
+; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <4 x i8>, ptr [[SRC:%.*]], align 1
+; CHECK-ALL-NEXT:    store <4 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
+; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]]
+; CHECK-ALL-NEXT:    [[CHUNK:%.*]] = load <2 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1
+; CHECK-ALL-NEXT:    store <2 x i8> [[CHUNK]], ptr [[DST:%.*]], align 2
+; CHECK-ALL-NEXT:    ret void
+;
+  %intermediate = alloca [8 x i8], align 64
+  store <8 x i8> zeroinitializer,  ptr %intermediate, align 64
+  %init = load <4 x i8>, ptr %src, align 1
+  store <4 x i8> %init, ptr %intermediate, align 64
+  %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff
+  %chunk = load <2 x i8>, ptr %intermediate.off.addr, align 1
+  store <2 x i8> %chunk, ptr %dst
+  ret void
+}
+
+define void @load_4byte_chunk_of_8byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind {
+; CHECK-ALL-LABEL: @load_4byte_chunk_of_8byte_alloca_with_zero_upper_half(
+; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [8 x i8], align 64
+; CHECK-ALL-NEXT:    store <8 x i8> zeroinitializer, ptr [[INTERMEDIATE]], align 64
+; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <4 x i8>, ptr [[SRC:%.*]], align 1
+; CHECK-ALL-NEXT:    store <4 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
+; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]]
+; CHECK-ALL-NEXT:    [[CHUNK:%.*]] = load <4 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1
+; CHECK-ALL-NEXT:    store <4 x i8> [[CHUNK]], ptr [[DST:%.*]], align 4
+; CHECK-ALL-NEXT:    ret void
+;
+  %intermediate = alloca [8 x i8], align 64
+  store <8 x i8> zeroinitializer,  ptr %intermediate, align 64
+  %init = load <4 x i8>, ptr %src, align 1
+  store <4 x i8> %init, ptr %intermediate, align 64
+  %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff
+  %chunk = load <4 x i8>, ptr %intermediate.off.addr, align 1
+  store <4 x i8> %chunk, ptr %dst
+  ret void
+}
+
+define void @load_1byte_chunk_of_16byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind {
+; CHECK-ALL-LABEL: @load_1byte_chunk_of_16byte_alloca_with_zero_upper_half(
+; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [16 x i8], align 64
+; CHECK-ALL-NEXT:    store <16 x i8> zeroinitializer, ptr [[INTERMEDIATE]], align 64
+; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <8 x i8>, ptr [[SRC:%.*]], align 1
+; CHECK-ALL-NEXT:    store <8 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
+; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]]
+; CHECK-ALL-NEXT:    [[CHUNK:%.*]] = load <1 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1
+; CHECK-ALL-NEXT:    store <1 x i8> [[CHUNK]], ptr [[DST:%.*]], align 1
+; CHECK-ALL-NEXT:    ret void
+;
+  %intermediate = alloca [16 x i8], align 64
+  store <16 x i8> zeroinitializer,  ptr %intermediate, align 64
+  %init = load <8 x i8>, ptr %src, align 1
+  store <8 x i8> %init, ptr %intermediate, align 64
+  %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff
+  %chunk = load <1 x i8>, ptr %intermediate.off.addr, align 1
+  store <1 x i8> %chunk, ptr %dst
+  ret void
+}
+
+define void @load_2byte_chunk_of_16byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind {
+; CHECK-ALL-LABEL: @load_2byte_chunk_of_16byte_alloca_with_zero_upper_half(
+; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [16 x i8], align 64
+; CHECK-ALL-NEXT:    store <16 x i8> zeroinitializer, ptr [[INTERMEDIATE]], align 64
+; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <8 x i8>, ptr [[SRC:%.*]], align 1
+; CHECK-ALL-NEXT:    store <8 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
+; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]]
+; CHECK-ALL-NEXT:    [[CHUNK:%.*]] = load <2 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1
+; CHECK-ALL-NEXT:    store <2 x i8> [[CHUNK]], ptr [[DST:%.*]], align 2
+; CHECK-ALL-NEXT:    ret void
+;
+  %intermediate = alloca [16 x i8], align 64
+  store <16 x i8> zeroinitializer,  ptr %intermediate, align 64
+  %init = load <8 x i8>, ptr %src, align 1
+  store <8 x i8> %init, ptr %intermediate, align 64
+  %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff
+  %chunk = load <2 x i8>, ptr %intermediate.off.addr, align 1
+  store <2 x i8> %chunk, ptr %dst
+  ret void
+}
+
+define void @load_4byte_chunk_of_16byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind {
+; CHECK-ALL-LABEL: @load_4byte_chunk_of_16byte_alloca_with_zero_upper_half(
+; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [16 x i8], align 64
+; CHECK-ALL-NEXT:    store <16 x i8> zeroinitializer, ptr [[INTERMEDIATE]], align 64
+; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <8 x i8>, ptr [[SRC:%.*]], align 1
+; CHECK-ALL-NEXT:    store <8 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
+; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]]
+; CHECK-ALL-NEXT:    [[CHUNK:%.*]] = load <4 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1
+; CHECK-ALL-NEXT:    store <4 x i8> [[CHUNK]], ptr [[DST:%.*]], align 4
+; CHECK-ALL-NEXT:    ret void
+;
+  %intermediate = alloca [16 x i8], align 64
+  store <16 x i8> zeroinitializer,  ptr %intermediate, align 64
+  %init = load <8 x i8>, ptr %src, align 1
+  store <8 x i8> %init, ptr %intermediate, align 64
+  %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff
+  %chunk = load <4 x i8>, ptr %intermediate.off.addr, align 1
+  store <4 x i8> %chunk, ptr %dst
+  ret void
+}
+
+define void @load_8byte_chunk_of_16byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind {
+; CHECK-ALL-LABEL: @load_8byte_chunk_of_16byte_alloca_with_zero_upper_half(
+; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [16 x i8], align 64
+; CHECK-ALL-NEXT:    store <16 x i8> zeroinitializer, ptr [[INTERMEDIATE]], align 64
+; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <8 x i8>, ptr [[SRC:%.*]], align 1
+; CHECK-ALL-NEXT:    store <8 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
+; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]]
+; CHECK-ALL-NEXT:    [[CHUNK:%.*]] = load <8 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1
+; CHECK-ALL-NEXT:    store <8 x i8> [[CHUNK]], ptr [[DST:%.*]], align 8
+; CHECK-ALL-NEXT:    ret void
+;
+  %intermediate = alloca [16 x i8], align 64
+  store <16 x i8> zeroinitializer,  ptr %intermediate, align 64
+  %init = load <8 x i8>, ptr %src, align 1
+  store <8 x i8> %init, ptr %intermediate, align 64
+  %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff
+  %chunk = load <8 x i8>, ptr %intermediate.off.addr, align 1
+  store <8 x i8> %chunk, ptr %dst
+  ret void
+}
+
+define void @load_1byte_chunk_of_32byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind {
+; CHECK-ALL-LABEL: @load_1byte_chunk_of_32byte_alloca_with_zero_upper_half(
+; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [32 x i8], align 64
+; CHECK-ALL-NEXT:    store <32 x i8> zeroinitializer, ptr [[INTERMEDIATE]], align 64
+; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <16 x i8>, ptr [[SRC:%.*]], align 1
+; CHECK-ALL-NEXT:    store <16 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
+; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]]
+; CHECK-ALL-NEXT:    [[CHUNK:%.*]] = load <1 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1
+; CHECK-ALL-NEXT:    store <1 x i8> [[CHUNK]], ptr [[DST:%.*]], align 1
+; CHECK-ALL-NEXT:    ret void
+;
+  %intermediate = alloca [32 x i8], align 64
+  store <32 x i8> zeroinitializer,  ptr %intermediate, align 64
+  %init = load <16 x i8>, ptr %src, align 1
+  store <16 x i8> %init, ptr %intermediate, align 64
+  %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff
+  %chunk = load <1 x i8>, ptr %intermediate.off.addr, align 1
+  store <1 x i8> %chunk, ptr %dst
+  ret void
+}
+
+define void @load_2byte_chunk_of_32byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind {
+; CHECK-ALL-LABEL: @load_2byte_chunk_of_32byte_alloca_with_zero_upper_half(
+; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [32 x i8], align 64
+; CHECK-ALL-NEXT:    store <32 x i8> zeroinitializer, ptr [[INTERMEDIATE]], align 64
+; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <16 x i8>, ptr [[SRC:%.*]], align 1
+; CHECK-ALL-NEXT:    store <16 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
+; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]]
+; CHECK-ALL-NEXT:    [[CHUNK:%.*]] = load <2 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1
+; CHECK-ALL-NEXT:    store <2 x i8> [[CHUNK]], ptr [[DST:%.*]], align 2
+; CHECK-ALL-NEXT:    ret void
+;
+  %intermediate = alloca [32 x i8], align 64
+  store <32 x i8> zeroinitializer,  ptr %intermediate, align 64
+  %init = load <16 x i8>, ptr %src, align 1
+  store <16 x i8> %init, ptr %intermediate, align 64
+  %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff
+  %chunk = load <2 x i8>, ptr %intermediate.off.addr, align 1
+  store <2 x i8> %chunk, ptr %dst
+  ret void
+}
+
+define void @load_4byte_chunk_of_32byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind {
+; CHECK-ALL-LABEL: @load_4byte_chunk_of_32byte_alloca_with_zero_upper_half(
+; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [32 x i8], align 64
+; CHECK-ALL-NEXT:    store <32 x i8> zeroinitializer, ptr [[INTERMEDIATE]], align 64
+; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <16 x i8>, ptr [[SRC:%.*]], align 1
+; CHECK-ALL-NEXT:    store <16 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
+; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]]
+; CHECK-ALL-NEXT:    [[CHUNK:%.*]] = load <4 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1
+; CHECK-ALL-NEXT:    store <4 x i8> [[CHUNK]], ptr [[DST:%.*]], align 4
+; CHECK-ALL-NEXT:    ret void
+;
+  %intermediate = alloca [32 x i8], align 64
+  store <32 x i8> zeroinitializer,  ptr %intermediate, align 64
+  %init = load <16 x i8>, ptr %src, align 1
+  store <16 x i8> %init, ptr %intermediate, align 64
+  %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff
+  %chunk = load <4 x i8>, ptr %intermediate.off.addr, align 1
+  store <4 x i8> %chunk, ptr %dst
+  ret void
+}
+
+define void @load_8byte_chunk_of_32byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind {
+; CHECK-ALL-LABEL: @load_8byte_chunk_of_32byte_alloca_with_zero_upper_half(
+; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [32 x i8], align 64
+; CHECK-ALL-NEXT:    store <32 x i8> zeroinitializer, ptr [[INTERMEDIATE]], align 64
+; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <16 x i8>, ptr [[SRC:%.*]], align 1
+; CHECK-ALL-NEXT:    store <16 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
+; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]]
+; CHECK-ALL-NEXT:    [[CHUNK:%.*]] = load <8 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1
+; CHECK-ALL-NEXT:    store <8 x i8> [[CHUNK]], ptr [[DST:%.*]], align 8
+; CHECK-ALL-NEXT:    ret void
+;
+  %intermediate = alloca [32 x i8], align 64
+  store <32 x i8> zeroinitializer,  ptr %intermediate, align 64
+  %init = load <16 x i8>, ptr %src, align 1
+  store <16 x i8> %init, ptr %intermediate, align 64
+  %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff
+  %chunk = load <8 x i8>, ptr %intermediate.off.addr, align 1
+  store <8 x i8> %chunk, ptr %dst
+  ret void
+}
+
+define void @load_16byte_chunk_of_32byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind {
+; CHECK-ALL-LABEL: @load_16byte_chunk_of_32byte_alloca_with_zero_upper_half(
+; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [32 x i8], align 64
+; CHECK-ALL-NEXT:    store <32 x i8> zeroinitializer, ptr [[INTERMEDIATE]], align 64
+; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <16 x i8>, ptr [[SRC:%.*]], align 1
+; CHECK-ALL-NEXT:    store <16 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
+; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]]
+; CHECK-ALL-NEXT:    [[CHUNK:%.*]] = load <16 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1
+; CHECK-ALL-NEXT:    store <16 x i8> [[CHUNK]], ptr [[DST:%.*]], align 16
+; CHECK-ALL-NEXT:    ret void
+;
+  %intermediate = alloca [32 x i8], align 64
+  store <32 x i8> zeroinitializer,  ptr %intermediate, align 64
+  %init = load <16 x i8>, ptr %src, align 1
+  store <16 x i8> %init, ptr %intermediate, align 64
+  %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff
+  %chunk = load <16 x i8>, ptr %intermediate.off.addr, align 1
+  store <16 x i8> %chunk, ptr %dst
+  ret void
+}
+
+define void @load_1byte_chunk_of_64byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind {
+; CHECK-ALL-LABEL: @load_1byte_chunk_of_64byte_alloca_with_zero_upper_half(
+; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [64 x i8], align 64
+; CHECK-ALL-NEXT:    store <64 x i8> zeroinitializer, ptr [[INTERMEDIATE]], align 64
+; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <32 x i8>, ptr [[SRC:%.*]], align 1
+; CHECK-ALL-NEXT:    store <32 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
+; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]]
+; CHECK-ALL-NEXT:    [[CHUNK:%.*]] = load <1 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1
+; CHECK-ALL-NEXT:    store <1 x i8> [[CHUNK]], ptr [[DST:%.*]], align 1
+; CHECK-ALL-NEXT:    ret void
+;
+  %intermediate = alloca [64 x i8], align 64
+  store <64 x i8> zeroinitializer,  ptr %intermediate, align 64
+  %init = load <32 x i8>, ptr %src, align 1
+  store <32 x i8> %init, ptr %intermediate, align 64
+  %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff
+  %chunk = load <1 x i8>, ptr %intermediate.off.addr, align 1
+  store <1 x i8> %chunk, ptr %dst
+  ret void
+}
+
+define void @load_2byte_chunk_of_64byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind {
+; CHECK-ALL-LABEL: @load_2byte_chunk_of_64byte_alloca_with_zero_upper_half(
+; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [64 x i8], align 64
+; CHECK-ALL-NEXT:    store <64 x i8> zeroinitializer, ptr [[INTERMEDIATE]], align 64
+; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <32 x i8>, ptr [[SRC:%.*]], align 1
+; CHECK-ALL-NEXT:    store <32 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
+; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]]
+; CHECK-ALL-NEXT:    [[CHUNK:%.*]] = load <2 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1
+; CHECK-ALL-NEXT:    store <2 x i8> [[CHUNK]], ptr [[DST:%.*]], align 2
+; CHECK-ALL-NEXT:    ret void
+;
+  %intermediate = alloca [64 x i8], align 64
+  store <64 x i8> zeroinitializer,  ptr %intermediate, align 64
+  %init = load <32 x i8>, ptr %src, align 1
+  store <32 x i8> %init, ptr %intermediate, align 64
+  %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff
+  %chunk = load <2 x i8>, ptr %intermediate.off.addr, align 1
+  store <2 x i8> %chunk, ptr %dst
+  ret void
+}
+
+define void @load_4byte_chunk_of_64byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind {
+; CHECK-ALL-LABEL: @load_4byte_chunk_of_64byte_alloca_with_zero_upper_half(
+; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [64 x i8], align 64
+; CHECK-ALL-NEXT:    store <64 x i8> zeroinitializer, ptr [[INTERMEDIATE]], align 64
+; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <32 x i8>, ptr [[SRC:%.*]], align 1
+; CHECK-ALL-NEXT:    store <32 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
+; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]]
+; CHECK-ALL-NEXT:    [[CHUNK:%.*]] = load <4 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1
+; CHECK-ALL-NEXT:    store <4 x i8> [[CHUNK]], ptr [[DST:%.*]], align 4
+; CHECK-ALL-NEXT:    ret void
+;
+  %intermediate = alloca [64 x i8], align 64
+  store <64 x i8> zeroinitializer,  ptr %intermediate, align 64
+  %init = load <32 x i8>, ptr %src, align 1
+  store <32 x i8> %init, ptr %intermediate, align 64
+  %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff
+  %chunk = load <4 x i8>, ptr %intermediate.off.addr, align 1
+  store <4 x i8> %chunk, ptr %dst
+  ret void
+}
+
+define void @load_8byte_chunk_of_64byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind {
+; CHECK-ALL-LABEL: @load_8byte_chunk_of_64byte_alloca_with_zero_upper_half(
+; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [64 x i8], align 64
+; CHECK-ALL-NEXT:    store <64 x i8> zeroinitializer, ptr [[INTERMEDIATE]], align 64
+; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <32 x i8>, ptr [[SRC:%.*]], align 1
+; CHECK-ALL-NEXT:    store <32 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
+; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]]
+; CHECK-ALL-NEXT:    [[CHUNK:%.*]] = load <8 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1
+; CHECK-ALL-NEXT:    store <8 x i8> [[CHUNK]], ptr [[DST:%.*]], align 8
+; CHECK-ALL-NEXT:    ret void
+;
+  %intermediate = alloca [64 x i8], align 64
+  store <64 x i8> zeroinitializer,  ptr %intermediate, align 64
+  %init = load <32 x i8>, ptr %src, align 1
+  store <32 x i8> %init, ptr %intermediate, align 64
+  %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff
+  %chunk = load <8 x i8>, ptr %intermediate.off.addr, align 1
+  store <8 x i8> %chunk, ptr %dst
+  ret void
+}
+
+define void @load_16byte_chunk_of_64byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind {
+; CHECK-ALL-LABEL: @load_16byte_chunk_of_64byte_alloca_with_zero_upper_half(
+; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [64 x i8], align 64
+; CHECK-ALL-NEXT:    store <64 x i8> zeroinitializer, ptr [[INTERMEDIATE]], align 64
+; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <32 x i8>, ptr [[SRC:%.*]], align 1
+; CHECK-ALL-NEXT:    store <32 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
+; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]]
+; CHECK-ALL-NEXT:    [[CHUNK:%.*]] = load <16 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1
+; CHECK-ALL-NEXT:    store <16 x i8> [[CHUNK]], ptr [[DST:%.*]], align 16
+; CHECK-ALL-NEXT:    ret void
+;
+  %intermediate = alloca [64 x i8], align 64
+  store <64 x i8> zeroinitializer,  ptr %intermediate, align 64
+  %init = load <32 x i8>, ptr %src, align 1
+  store <32 x i8> %init, ptr %intermediate, align 64
+  %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff
+  %chunk = load <16 x i8>, ptr %intermediate.off.addr, align 1
+  store <16 x i8> %chunk, ptr %dst
+  ret void
+}
+
+define void @load_32byte_chunk_of_64byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind {
+; CHECK-ALL-LABEL: @load_32byte_chunk_of_64byte_alloca_with_zero_upper_half(
+; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [64 x i8], align 64
+; CHECK-ALL-NEXT:    store <64 x i8> zeroinitializer, ptr [[INTERMEDIATE]], align 64
+; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <32 x i8>, ptr [[SRC:%.*]], align 1
+; CHECK-ALL-NEXT:    store <32 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
+; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]]
+; CHECK-ALL-NEXT:    [[CHUNK:%.*]] = load <32 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1
+; CHECK-ALL-NEXT:    store <32 x i8> [[CHUNK]], ptr [[DST:%.*]], align 32
+; CHECK-ALL-NEXT:    ret void
+;
+  %intermediate = alloca [64 x i8], align 64
+  store <64 x i8> zeroinitializer,  ptr %intermediate, align 64
+  %init = load <32 x i8>, ptr %src, align 1
+  store <32 x i8> %init, ptr %intermediate, align 64
+  %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff
+  %chunk = load <32 x i8>, ptr %intermediate.off.addr, align 1
+  store <32 x i8> %chunk, ptr %dst
+  ret void
+}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK-BE-32: {{.*}}
+; CHECK-BE-64: {{.*}}
+; CHECK-LE-32: {{.*}}
+; CHECK-LE-64: {{.*}}
+; CHECK-SCALAR: {{.*}}
+; CHECK-SCALAR-32: {{.*}}
+; CHECK-SCALAR-64: {{.*}}

diff  --git a/llvm/test/Transforms/SROA/widen-load-of-small-alloca.ll b/llvm/test/Transforms/SROA/widen-load-of-small-alloca.ll
index 916bb6555fb17..b4a99606a8b75 100644
--- a/llvm/test/Transforms/SROA/widen-load-of-small-alloca.ll
+++ b/llvm/test/Transforms/SROA/widen-load-of-small-alloca.ll
@@ -8,8 +8,8 @@
 ; RUN: opt -passes='sroa<preserve-cfg>' -data-layout="E-n8:16:32" -S %s | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-SCALAR,CHECK-SCALAR-32,CHECK-BE-32
 ; RUN: opt -passes='sroa<modify-cfg>' -data-layout="E-n8:16:32" -S %s | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-SCALAR,CHECK-SCALAR-32,CHECK-BE-32
 
-define void @load-1byte-chunk-of-1byte-alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind {
-; CHECK-ALL-LABEL: @load-1byte-chunk-of-1byte-alloca(
+define void @load_1byte_chunk_of_1byte_alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind {
+; CHECK-ALL-LABEL: @load_1byte_chunk_of_1byte_alloca(
 ; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [1 x i8], align 64
 ; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <1 x i8>, ptr [[SRC:%.*]], align 1
 ; CHECK-ALL-NEXT:    store <1 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
@@ -27,8 +27,8 @@ define void @load-1byte-chunk-of-1byte-alloca(ptr %src, i64 %byteOff, ptr %dst)
   ret void
 }
 
-define void @load-1byte-chunk-of-2byte-alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind {
-; CHECK-ALL-LABEL: @load-1byte-chunk-of-2byte-alloca(
+define void @load_1byte_chunk_of_2byte_alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind {
+; CHECK-ALL-LABEL: @load_1byte_chunk_of_2byte_alloca(
 ; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [2 x i8], align 64
 ; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <2 x i8>, ptr [[SRC:%.*]], align 1
 ; CHECK-ALL-NEXT:    store <2 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
@@ -46,8 +46,8 @@ define void @load-1byte-chunk-of-2byte-alloca(ptr %src, i64 %byteOff, ptr %dst)
   ret void
 }
 
-define void @load-2byte-chunk-of-2byte-alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind {
-; CHECK-ALL-LABEL: @load-2byte-chunk-of-2byte-alloca(
+define void @load_2byte_chunk_of_2byte_alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind {
+; CHECK-ALL-LABEL: @load_2byte_chunk_of_2byte_alloca(
 ; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [2 x i8], align 64
 ; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <2 x i8>, ptr [[SRC:%.*]], align 1
 ; CHECK-ALL-NEXT:    store <2 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
@@ -65,8 +65,8 @@ define void @load-2byte-chunk-of-2byte-alloca(ptr %src, i64 %byteOff, ptr %dst)
   ret void
 }
 
-define void @load-1byte-chunk-of-4byte-alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind {
-; CHECK-ALL-LABEL: @load-1byte-chunk-of-4byte-alloca(
+define void @load_1byte_chunk_of_4byte_alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind {
+; CHECK-ALL-LABEL: @load_1byte_chunk_of_4byte_alloca(
 ; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [4 x i8], align 64
 ; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <4 x i8>, ptr [[SRC:%.*]], align 1
 ; CHECK-ALL-NEXT:    store <4 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
@@ -84,8 +84,8 @@ define void @load-1byte-chunk-of-4byte-alloca(ptr %src, i64 %byteOff, ptr %dst)
   ret void
 }
 
-define void @load-2byte-chunk-of-4byte-alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind {
-; CHECK-ALL-LABEL: @load-2byte-chunk-of-4byte-alloca(
+define void @load_2byte_chunk_of_4byte_alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind {
+; CHECK-ALL-LABEL: @load_2byte_chunk_of_4byte_alloca(
 ; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [4 x i8], align 64
 ; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <4 x i8>, ptr [[SRC:%.*]], align 1
 ; CHECK-ALL-NEXT:    store <4 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
@@ -103,8 +103,8 @@ define void @load-2byte-chunk-of-4byte-alloca(ptr %src, i64 %byteOff, ptr %dst)
   ret void
 }
 
-define void @load-4byte-chunk-of-4byte-alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind {
-; CHECK-ALL-LABEL: @load-4byte-chunk-of-4byte-alloca(
+define void @load_4byte_chunk_of_4byte_alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind {
+; CHECK-ALL-LABEL: @load_4byte_chunk_of_4byte_alloca(
 ; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [4 x i8], align 64
 ; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <4 x i8>, ptr [[SRC:%.*]], align 1
 ; CHECK-ALL-NEXT:    store <4 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
@@ -122,8 +122,8 @@ define void @load-4byte-chunk-of-4byte-alloca(ptr %src, i64 %byteOff, ptr %dst)
   ret void
 }
 
-define void @load-1byte-chunk-of-8byte-alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind {
-; CHECK-ALL-LABEL: @load-1byte-chunk-of-8byte-alloca(
+define void @load_1byte_chunk_of_8byte_alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind {
+; CHECK-ALL-LABEL: @load_1byte_chunk_of_8byte_alloca(
 ; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [8 x i8], align 64
 ; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <8 x i8>, ptr [[SRC:%.*]], align 1
 ; CHECK-ALL-NEXT:    store <8 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
@@ -141,8 +141,8 @@ define void @load-1byte-chunk-of-8byte-alloca(ptr %src, i64 %byteOff, ptr %dst)
   ret void
 }
 
-define void @load-2byte-chunk-of-8byte-alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind {
-; CHECK-ALL-LABEL: @load-2byte-chunk-of-8byte-alloca(
+define void @load_2byte_chunk_of_8byte_alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind {
+; CHECK-ALL-LABEL: @load_2byte_chunk_of_8byte_alloca(
 ; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [8 x i8], align 64
 ; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <8 x i8>, ptr [[SRC:%.*]], align 1
 ; CHECK-ALL-NEXT:    store <8 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
@@ -160,8 +160,8 @@ define void @load-2byte-chunk-of-8byte-alloca(ptr %src, i64 %byteOff, ptr %dst)
   ret void
 }
 
-define void @load-4byte-chunk-of-8byte-alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind {
-; CHECK-ALL-LABEL: @load-4byte-chunk-of-8byte-alloca(
+define void @load_4byte_chunk_of_8byte_alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind {
+; CHECK-ALL-LABEL: @load_4byte_chunk_of_8byte_alloca(
 ; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [8 x i8], align 64
 ; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <8 x i8>, ptr [[SRC:%.*]], align 1
 ; CHECK-ALL-NEXT:    store <8 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
@@ -179,8 +179,8 @@ define void @load-4byte-chunk-of-8byte-alloca(ptr %src, i64 %byteOff, ptr %dst)
   ret void
 }
 
-define void @load-8byte-chunk-of-8byte-alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind {
-; CHECK-ALL-LABEL: @load-8byte-chunk-of-8byte-alloca(
+define void @load_8byte_chunk_of_8byte_alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind {
+; CHECK-ALL-LABEL: @load_8byte_chunk_of_8byte_alloca(
 ; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [8 x i8], align 64
 ; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <8 x i8>, ptr [[SRC:%.*]], align 1
 ; CHECK-ALL-NEXT:    store <8 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
@@ -198,8 +198,8 @@ define void @load-8byte-chunk-of-8byte-alloca(ptr %src, i64 %byteOff, ptr %dst)
   ret void
 }
 
-define void @load-1byte-chunk-of-16byte-alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind {
-; CHECK-ALL-LABEL: @load-1byte-chunk-of-16byte-alloca(
+define void @load_1byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind {
+; CHECK-ALL-LABEL: @load_1byte_chunk_of_16byte_alloca(
 ; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [16 x i8], align 64
 ; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <16 x i8>, ptr [[SRC:%.*]], align 1
 ; CHECK-ALL-NEXT:    store <16 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
@@ -217,8 +217,8 @@ define void @load-1byte-chunk-of-16byte-alloca(ptr %src, i64 %byteOff, ptr %dst)
   ret void
 }
 
-define void @load-2byte-chunk-of-16byte-alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind {
-; CHECK-ALL-LABEL: @load-2byte-chunk-of-16byte-alloca(
+define void @load_2byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind {
+; CHECK-ALL-LABEL: @load_2byte_chunk_of_16byte_alloca(
 ; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [16 x i8], align 64
 ; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <16 x i8>, ptr [[SRC:%.*]], align 1
 ; CHECK-ALL-NEXT:    store <16 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
@@ -236,8 +236,8 @@ define void @load-2byte-chunk-of-16byte-alloca(ptr %src, i64 %byteOff, ptr %dst)
   ret void
 }
 
-define void @load-4byte-chunk-of-16byte-alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind {
-; CHECK-ALL-LABEL: @load-4byte-chunk-of-16byte-alloca(
+define void @load_4byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind {
+; CHECK-ALL-LABEL: @load_4byte_chunk_of_16byte_alloca(
 ; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [16 x i8], align 64
 ; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <16 x i8>, ptr [[SRC:%.*]], align 1
 ; CHECK-ALL-NEXT:    store <16 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
@@ -255,8 +255,8 @@ define void @load-4byte-chunk-of-16byte-alloca(ptr %src, i64 %byteOff, ptr %dst)
   ret void
 }
 
-define void @load-8byte-chunk-of-16byte-alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind {
-; CHECK-ALL-LABEL: @load-8byte-chunk-of-16byte-alloca(
+define void @load_8byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind {
+; CHECK-ALL-LABEL: @load_8byte_chunk_of_16byte_alloca(
 ; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [16 x i8], align 64
 ; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <16 x i8>, ptr [[SRC:%.*]], align 1
 ; CHECK-ALL-NEXT:    store <16 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
@@ -274,8 +274,8 @@ define void @load-8byte-chunk-of-16byte-alloca(ptr %src, i64 %byteOff, ptr %dst)
   ret void
 }
 
-define void @load-16byte-chunk-of-16byte-alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind {
-; CHECK-ALL-LABEL: @load-16byte-chunk-of-16byte-alloca(
+define void @load_16byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind {
+; CHECK-ALL-LABEL: @load_16byte_chunk_of_16byte_alloca(
 ; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [16 x i8], align 64
 ; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <16 x i8>, ptr [[SRC:%.*]], align 1
 ; CHECK-ALL-NEXT:    store <16 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
@@ -293,8 +293,8 @@ define void @load-16byte-chunk-of-16byte-alloca(ptr %src, i64 %byteOff, ptr %dst
   ret void
 }
 
-define void @load-1byte-chunk-of-32byte-alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind {
-; CHECK-ALL-LABEL: @load-1byte-chunk-of-32byte-alloca(
+define void @load_1byte_chunk_of_32byte_alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind {
+; CHECK-ALL-LABEL: @load_1byte_chunk_of_32byte_alloca(
 ; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [32 x i8], align 64
 ; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <32 x i8>, ptr [[SRC:%.*]], align 1
 ; CHECK-ALL-NEXT:    store <32 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
@@ -312,8 +312,8 @@ define void @load-1byte-chunk-of-32byte-alloca(ptr %src, i64 %byteOff, ptr %dst)
   ret void
 }
 
-define void @load-2byte-chunk-of-32byte-alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind {
-; CHECK-ALL-LABEL: @load-2byte-chunk-of-32byte-alloca(
+define void @load_2byte_chunk_of_32byte_alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind {
+; CHECK-ALL-LABEL: @load_2byte_chunk_of_32byte_alloca(
 ; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [32 x i8], align 64
 ; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <32 x i8>, ptr [[SRC:%.*]], align 1
 ; CHECK-ALL-NEXT:    store <32 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
@@ -331,8 +331,8 @@ define void @load-2byte-chunk-of-32byte-alloca(ptr %src, i64 %byteOff, ptr %dst)
   ret void
 }
 
-define void @load-4byte-chunk-of-32byte-alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind {
-; CHECK-ALL-LABEL: @load-4byte-chunk-of-32byte-alloca(
+define void @load_4byte_chunk_of_32byte_alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind {
+; CHECK-ALL-LABEL: @load_4byte_chunk_of_32byte_alloca(
 ; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [32 x i8], align 64
 ; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <32 x i8>, ptr [[SRC:%.*]], align 1
 ; CHECK-ALL-NEXT:    store <32 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
@@ -350,8 +350,8 @@ define void @load-4byte-chunk-of-32byte-alloca(ptr %src, i64 %byteOff, ptr %dst)
   ret void
 }
 
-define void @load-8byte-chunk-of-32byte-alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind {
-; CHECK-ALL-LABEL: @load-8byte-chunk-of-32byte-alloca(
+define void @load_8byte_chunk_of_32byte_alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind {
+; CHECK-ALL-LABEL: @load_8byte_chunk_of_32byte_alloca(
 ; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [32 x i8], align 64
 ; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <32 x i8>, ptr [[SRC:%.*]], align 1
 ; CHECK-ALL-NEXT:    store <32 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
@@ -369,8 +369,8 @@ define void @load-8byte-chunk-of-32byte-alloca(ptr %src, i64 %byteOff, ptr %dst)
   ret void
 }
 
-define void @load-16byte-chunk-of-32byte-alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind {
-; CHECK-ALL-LABEL: @load-16byte-chunk-of-32byte-alloca(
+define void @load_16byte_chunk_of_32byte_alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind {
+; CHECK-ALL-LABEL: @load_16byte_chunk_of_32byte_alloca(
 ; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [32 x i8], align 64
 ; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <32 x i8>, ptr [[SRC:%.*]], align 1
 ; CHECK-ALL-NEXT:    store <32 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
@@ -388,8 +388,8 @@ define void @load-16byte-chunk-of-32byte-alloca(ptr %src, i64 %byteOff, ptr %dst
   ret void
 }
 
-define void @load-32byte-chunk-of-32byte-alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind {
-; CHECK-ALL-LABEL: @load-32byte-chunk-of-32byte-alloca(
+define void @load_32byte_chunk_of_32byte_alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind {
+; CHECK-ALL-LABEL: @load_32byte_chunk_of_32byte_alloca(
 ; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [32 x i8], align 64
 ; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <32 x i8>, ptr [[SRC:%.*]], align 1
 ; CHECK-ALL-NEXT:    store <32 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
@@ -407,309 +407,6 @@ define void @load-32byte-chunk-of-32byte-alloca(ptr %src, i64 %byteOff, ptr %dst
   ret void
 }
 
-;; Special test
-
-define void @load-2byte-chunk-of-8byte-alloca-with-2byte-step(ptr %src, i64 %byteOff, ptr %dst) nounwind {
-; CHECK-ALL-LABEL: @load-2byte-chunk-of-8byte-alloca-with-2byte-step(
-; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [8 x i8], align 64
-; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <8 x i8>, ptr [[SRC:%.*]], align 1
-; CHECK-ALL-NEXT:    store <8 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
-; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i16, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]]
-; CHECK-ALL-NEXT:    [[CHUNK:%.*]] = load <2 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1
-; CHECK-ALL-NEXT:    store <2 x i8> [[CHUNK]], ptr [[DST:%.*]], align 2
-; CHECK-ALL-NEXT:    ret void
-;
-  %intermediate = alloca [8 x i8], align 64
-  %init = load <8 x i8>, ptr %src, align 1
-  store <8 x i8> %init, ptr %intermediate, align 64
-  %intermediate.off.addr = getelementptr inbounds i16, ptr %intermediate, i64 %byteOff
-  %chunk = load <2 x i8>, ptr %intermediate.off.addr, align 1
-  store <2 x i8> %chunk, ptr %dst
-  ret void
-}
-
-define void @load-volatile-2byte-chunk-of-8byte-alloca-with-2byte-step(ptr %src, i64 %byteOff, ptr %dst) nounwind {
-; CHECK-ALL-LABEL: @load-volatile-2byte-chunk-of-8byte-alloca-with-2byte-step(
-; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [8 x i8], align 64
-; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <8 x i8>, ptr [[SRC:%.*]], align 1
-; CHECK-ALL-NEXT:    store <8 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
-; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i16, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]]
-; CHECK-ALL-NEXT:    [[CHUNK:%.*]] = load volatile <2 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1
-; CHECK-ALL-NEXT:    store <2 x i8> [[CHUNK]], ptr [[DST:%.*]], align 2
-; CHECK-ALL-NEXT:    ret void
-;
-  %intermediate = alloca [8 x i8], align 64
-  %init = load <8 x i8>, ptr %src, align 1
-  store <8 x i8> %init, ptr %intermediate, align 64
-  %intermediate.off.addr = getelementptr inbounds i16, ptr %intermediate, i64 %byteOff
-  %chunk = load volatile <2 x i8>, ptr %intermediate.off.addr, align 1
-  store <2 x i8> %chunk, ptr %dst
-  ret void
-}
-
-define void @store-2byte-chunk-of-8byte-alloca-with-2byte-step(ptr %src, i64 %byteOff, <2 x i8> %reinit, ptr %dst) nounwind {
-; CHECK-ALL-LABEL: @store-2byte-chunk-of-8byte-alloca-with-2byte-step(
-; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [8 x i8], align 64
-; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <8 x i8>, ptr [[SRC:%.*]], align 1
-; CHECK-ALL-NEXT:    store <8 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
-; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i16, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]]
-; CHECK-ALL-NEXT:    store <2 x i8> [[REINIT:%.*]], ptr [[INTERMEDIATE_OFF_ADDR]], align 1
-; CHECK-ALL-NEXT:    [[FINAL:%.*]] = load <8 x i8>, ptr [[INTERMEDIATE]], align 1
-; CHECK-ALL-NEXT:    store <8 x i8> [[FINAL]], ptr [[DST:%.*]], align 8
-; CHECK-ALL-NEXT:    ret void
-;
-  %intermediate = alloca [8 x i8], align 64
-  %init = load <8 x i8>, ptr %src, align 1
-  store <8 x i8> %init, ptr %intermediate, align 64
-  %intermediate.off.addr = getelementptr inbounds i16, ptr %intermediate, i64 %byteOff
-  store <2 x i8> %reinit, ptr %intermediate.off.addr, align 1
-  %final = load <8 x i8>, ptr %intermediate, align 1
-  store <8 x i8> %final, ptr %dst
-  ret void
-}
-
-define void @store-volatile-2byte-chunk-of-8byte-alloca-with-2byte-step(ptr %src, i64 %byteOff, <2 x i8> %reinit, ptr %dst) nounwind {
-; CHECK-ALL-LABEL: @store-volatile-2byte-chunk-of-8byte-alloca-with-2byte-step(
-; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [8 x i8], align 64
-; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <8 x i8>, ptr [[SRC:%.*]], align 1
-; CHECK-ALL-NEXT:    store <8 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
-; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i16, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]]
-; CHECK-ALL-NEXT:    store volatile <2 x i8> [[REINIT:%.*]], ptr [[INTERMEDIATE_OFF_ADDR]], align 1
-; CHECK-ALL-NEXT:    [[FINAL:%.*]] = load <8 x i8>, ptr [[INTERMEDIATE]], align 1
-; CHECK-ALL-NEXT:    store <8 x i8> [[FINAL]], ptr [[DST:%.*]], align 8
-; CHECK-ALL-NEXT:    ret void
-;
-  %intermediate = alloca [8 x i8], align 64
-  %init = load <8 x i8>, ptr %src, align 1
-  store <8 x i8> %init, ptr %intermediate, align 64
-  %intermediate.off.addr = getelementptr inbounds i16, ptr %intermediate, i64 %byteOff
-  store volatile <2 x i8> %reinit, ptr %intermediate.off.addr, align 1
-  %final = load <8 x i8>, ptr %intermediate, align 1
-  store <8 x i8> %final, ptr %dst
-  ret void
-}
-
-define void @load-2byte-chunk-of-8byte-alloca-with-2byte-step-with-constant-offset-beforehand(ptr %src, i64 %byteOff, ptr %dst) nounwind {
-; CHECK-ALL-LABEL: @load-2byte-chunk-of-8byte-alloca-with-2byte-step-with-constant-offset-beforehand(
-; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [8 x i8], align 64
-; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <8 x i8>, ptr [[SRC:%.*]], align 1
-; CHECK-ALL-NEXT:    store <8 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
-; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR_CST:%.*]] = getelementptr inbounds i16, ptr [[INTERMEDIATE]], i64 1
-; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i16, ptr [[INTERMEDIATE_OFF_ADDR_CST]], i64 [[BYTEOFF:%.*]]
-; CHECK-ALL-NEXT:    [[CHUNK:%.*]] = load <2 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1
-; CHECK-ALL-NEXT:    store <2 x i8> [[CHUNK]], ptr [[DST:%.*]], align 2
-; CHECK-ALL-NEXT:    ret void
-;
-  %intermediate = alloca [8 x i8], align 64
-  %init = load <8 x i8>, ptr %src, align 1
-  store <8 x i8> %init, ptr %intermediate, align 64
-  %intermediate.off.addr.cst = getelementptr inbounds i16, ptr %intermediate, i64 1
-  %intermediate.off.addr = getelementptr inbounds i16, ptr %intermediate.off.addr.cst, i64 %byteOff
-  %chunk = load <2 x i8>, ptr %intermediate.off.addr, align 1
-  store <2 x i8> %chunk, ptr %dst
-  ret void
-}
-
-define void @load-2byte-chunk-of-8byte-alloca-with-2byte-step-with-constant-offset-afterwards(ptr %src, i64 %byteOff, ptr %dst) nounwind {
-; CHECK-ALL-LABEL: @load-2byte-chunk-of-8byte-alloca-with-2byte-step-with-constant-offset-afterwards(
-; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [8 x i8], align 64
-; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <8 x i8>, ptr [[SRC:%.*]], align 1
-; CHECK-ALL-NEXT:    store <8 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
-; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR_VARIABLE:%.*]] = getelementptr inbounds i16, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]]
-; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i16, ptr [[INTERMEDIATE_OFF_ADDR_VARIABLE]], i64 1
-; CHECK-ALL-NEXT:    [[CHUNK:%.*]] = load <2 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1
-; CHECK-ALL-NEXT:    store <2 x i8> [[CHUNK]], ptr [[DST:%.*]], align 2
-; CHECK-ALL-NEXT:    ret void
-;
-  %intermediate = alloca [8 x i8], align 64
-  %init = load <8 x i8>, ptr %src, align 1
-  store <8 x i8> %init, ptr %intermediate, align 64
-  %intermediate.off.addr.variable = getelementptr inbounds i16, ptr %intermediate, i64 %byteOff
-  %intermediate.off.addr = getelementptr inbounds i16, ptr %intermediate.off.addr.variable, i64 1
-  %chunk = load <2 x i8>, ptr %intermediate.off.addr, align 1
-  store <2 x i8> %chunk, ptr %dst
-  ret void
-}
-
-define void @load-2byte-chunk-of-8byte-alloca-with-2byte-step-with-variable-offset-inbetween-constant-offsets(ptr %src, i64 %byteOff, ptr %dst) nounwind {
-; CHECK-ALL-LABEL: @load-2byte-chunk-of-8byte-alloca-with-2byte-step-with-variable-offset-inbetween-constant-offsets(
-; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [8 x i8], align 64
-; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <8 x i8>, ptr [[SRC:%.*]], align 1
-; CHECK-ALL-NEXT:    store <8 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
-; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR_CST:%.*]] = getelementptr inbounds i16, ptr [[INTERMEDIATE]], i64 1
-; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR_VARIABLE:%.*]] = getelementptr inbounds i16, ptr [[INTERMEDIATE_OFF_ADDR_CST]], i64 [[BYTEOFF:%.*]]
-; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i16, ptr [[INTERMEDIATE_OFF_ADDR_VARIABLE]], i64 1
-; CHECK-ALL-NEXT:    [[CHUNK:%.*]] = load <2 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1
-; CHECK-ALL-NEXT:    store <2 x i8> [[CHUNK]], ptr [[DST:%.*]], align 2
-; CHECK-ALL-NEXT:    ret void
-;
-  %intermediate = alloca [8 x i8], align 64
-  %init = load <8 x i8>, ptr %src, align 1
-  store <8 x i8> %init, ptr %intermediate, align 64
-  %intermediate.off.addr.cst = getelementptr inbounds i16, ptr %intermediate, i64 1
-  %intermediate.off.addr.variable = getelementptr inbounds i16, ptr %intermediate.off.addr.cst, i64 %byteOff
-  %intermediate.off.addr = getelementptr inbounds i16, ptr %intermediate.off.addr.variable, i64 1
-  %chunk = load <2 x i8>, ptr %intermediate.off.addr, align 1
-  store <2 x i8> %chunk, ptr %dst
-  ret void
-}
-
-define void @load-2byte-chunk-of-8byte-alloca-with-2byte-step-select-of-variable-geps(ptr %src, i64 %byteOff0, i64 %byteOff1, i1 %cond, ptr %dst) nounwind {
-; CHECK-ALL-LABEL: @load-2byte-chunk-of-8byte-alloca-with-2byte-step-select-of-variable-geps(
-; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [8 x i8], align 64
-; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <8 x i8>, ptr [[SRC:%.*]], align 1
-; CHECK-ALL-NEXT:    store <8 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
-; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF0:%.*]] = getelementptr inbounds i16, ptr [[INTERMEDIATE]], i64 [[BYTEOFF0:%.*]]
-; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF1:%.*]] = getelementptr inbounds i16, ptr [[INTERMEDIATE]], i64 [[BYTEOFF1:%.*]]
-; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR:%.*]] = select i1 [[COND:%.*]], ptr [[INTERMEDIATE_OFF0]], ptr [[INTERMEDIATE_OFF1]]
-; CHECK-ALL-NEXT:    [[CHUNK:%.*]] = load <2 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1
-; CHECK-ALL-NEXT:    store <2 x i8> [[CHUNK]], ptr [[DST:%.*]], align 2
-; CHECK-ALL-NEXT:    ret void
-;
-  %intermediate = alloca [8 x i8], align 64
-  %init = load <8 x i8>, ptr %src, align 1
-  store <8 x i8> %init, ptr %intermediate, align 64
-  %intermediate.off0 = getelementptr inbounds i16, ptr %intermediate, i64 %byteOff0
-  %intermediate.off1 = getelementptr inbounds i16, ptr %intermediate, i64 %byteOff1
-  %intermediate.off.addr = select i1 %cond, ptr %intermediate.off0, ptr %intermediate.off1
-  %chunk = load <2 x i8>, ptr %intermediate.off.addr, align 1
-  store <2 x i8> %chunk, ptr %dst
-  ret void
-}
-
-define void @load-2byte-chunk-of-8byte-alloca-with-2byte-step-select-of-variable-and-const-geps(ptr %src, i64 %byteOff0, i64 %byteOff1, i1 %cond, ptr %dst) nounwind {
-; CHECK-ALL-LABEL: @load-2byte-chunk-of-8byte-alloca-with-2byte-step-select-of-variable-and-const-geps(
-; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [8 x i8], align 64
-; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <8 x i8>, ptr [[SRC:%.*]], align 1
-; CHECK-ALL-NEXT:    store <8 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
-; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF0:%.*]] = getelementptr inbounds i16, ptr [[INTERMEDIATE]], i64 1
-; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF1:%.*]] = getelementptr inbounds i16, ptr [[INTERMEDIATE]], i64 [[BYTEOFF1:%.*]]
-; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR:%.*]] = select i1 [[COND:%.*]], ptr [[INTERMEDIATE_OFF0]], ptr [[INTERMEDIATE_OFF1]]
-; CHECK-ALL-NEXT:    [[CHUNK:%.*]] = load <2 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1
-; CHECK-ALL-NEXT:    store <2 x i8> [[CHUNK]], ptr [[DST:%.*]], align 2
-; CHECK-ALL-NEXT:    ret void
-;
-  %intermediate = alloca [8 x i8], align 64
-  %init = load <8 x i8>, ptr %src, align 1
-  store <8 x i8> %init, ptr %intermediate, align 64
-  %intermediate.off0 = getelementptr inbounds i16, ptr %intermediate, i64 1
-  %intermediate.off1 = getelementptr inbounds i16, ptr %intermediate, i64 %byteOff1
-  %intermediate.off.addr = select i1 %cond, ptr %intermediate.off0, ptr %intermediate.off1
-  %chunk = load <2 x i8>, ptr %intermediate.off.addr, align 1
-  store <2 x i8> %chunk, ptr %dst
-  ret void
-}
-
-define void @load-2byte-chunk-of-8byte-alloca-with-2byte-step-variable-gep-of-select-of-const-geps(ptr %src, i64 %byteOff, i1 %cond, ptr %dst) nounwind {
-; CHECK-ALL-LABEL: @load-2byte-chunk-of-8byte-alloca-with-2byte-step-variable-gep-of-select-of-const-geps(
-; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [8 x i8], align 64
-; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <8 x i8>, ptr [[SRC:%.*]], align 1
-; CHECK-ALL-NEXT:    store <8 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
-; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF0:%.*]] = getelementptr inbounds i16, ptr [[INTERMEDIATE]], i64 0
-; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF1:%.*]] = getelementptr inbounds i16, ptr [[INTERMEDIATE]], i64 2
-; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR:%.*]] = select i1 [[COND:%.*]], ptr [[INTERMEDIATE_OFF0]], ptr [[INTERMEDIATE_OFF1]]
-; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR_VAR:%.*]] = getelementptr inbounds i16, ptr [[INTERMEDIATE_OFF_ADDR]], i64 [[BYTEOFF:%.*]]
-; CHECK-ALL-NEXT:    [[CHUNK:%.*]] = load <2 x i8>, ptr [[INTERMEDIATE_OFF_ADDR_VAR]], align 1
-; CHECK-ALL-NEXT:    store <2 x i8> [[CHUNK]], ptr [[DST:%.*]], align 2
-; CHECK-ALL-NEXT:    ret void
-;
-  %intermediate = alloca [8 x i8], align 64
-  %init = load <8 x i8>, ptr %src, align 1
-  store <8 x i8> %init, ptr %intermediate, align 64
-  %intermediate.off0 = getelementptr inbounds i16, ptr %intermediate, i64 0
-  %intermediate.off1 = getelementptr inbounds i16, ptr %intermediate, i64 2
-  %intermediate.off.addr = select i1 %cond, ptr %intermediate.off0, ptr %intermediate.off1
-  %intermediate.off.addr.var = getelementptr inbounds i16, ptr %intermediate.off.addr, i64 %byteOff
-  %chunk = load <2 x i8>, ptr %intermediate.off.addr.var, align 1
-  store <2 x i8> %chunk, ptr %dst
-  ret void
-}
-
-define void @load-ptr-chunk-of-16byte-alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind {
-; CHECK-ALL-LABEL: @load-ptr-chunk-of-16byte-alloca(
-; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [16 x i8], align 64
-; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <16 x i8>, ptr [[SRC:%.*]], align 1
-; CHECK-ALL-NEXT:    store <16 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
-; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]]
-; CHECK-ALL-NEXT:    [[CHUNK:%.*]] = load <1 x ptr>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1
-; CHECK-ALL-NEXT:    store <1 x ptr> [[CHUNK]], ptr [[DST:%.*]], align 8
-; CHECK-ALL-NEXT:    ret void
-;
-  %intermediate = alloca [16 x i8], align 64
-  %init = load <16 x i8>, ptr %src, align 1
-  store <16 x i8> %init, ptr %intermediate, align 64
-  %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff
-  %chunk = load <1 x ptr>, ptr %intermediate.off.addr, align 1
-  store <1 x ptr> %chunk, ptr %dst
-  ret void
-}
-
-define void @load-float-chunk-of-16byte-alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind {
-; CHECK-ALL-LABEL: @load-float-chunk-of-16byte-alloca(
-; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [16 x i8], align 64
-; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <16 x i8>, ptr [[SRC:%.*]], align 1
-; CHECK-ALL-NEXT:    store <16 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
-; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]]
-; CHECK-ALL-NEXT:    [[CHUNK:%.*]] = load <1 x float>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1
-; CHECK-ALL-NEXT:    store <1 x float> [[CHUNK]], ptr [[DST:%.*]], align 4
-; CHECK-ALL-NEXT:    ret void
-;
-  %intermediate = alloca [16 x i8], align 64
-  %init = load <16 x i8>, ptr %src, align 1
-  store <16 x i8> %init, ptr %intermediate, align 64
-  %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff
-  %chunk = load <1 x float>, ptr %intermediate.off.addr, align 1
-  store <1 x float> %chunk, ptr %dst
-  ret void
-}
-
-define void @two-loads-of-same-2byte-chunks-of-8byte-alloca-with-2byte-step-variable-gep(ptr %src, i64 %byteOff, ptr %dst0, ptr %dst1) nounwind {
-; CHECK-ALL-LABEL: @two-loads-of-same-2byte-chunks-of-8byte-alloca-with-2byte-step-variable-gep(
-; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [8 x i8], align 64
-; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <8 x i8>, ptr [[SRC:%.*]], align 1
-; CHECK-ALL-NEXT:    store <8 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
-; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF:%.*]] = getelementptr inbounds i16, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]]
-; CHECK-ALL-NEXT:    [[CHUNK0:%.*]] = load <2 x i8>, ptr [[INTERMEDIATE_OFF]], align 1
-; CHECK-ALL-NEXT:    store <2 x i8> [[CHUNK0]], ptr [[DST0:%.*]], align 2
-; CHECK-ALL-NEXT:    [[CHUNK1:%.*]] = load <2 x i8>, ptr [[INTERMEDIATE_OFF]], align 1
-; CHECK-ALL-NEXT:    store <2 x i8> [[CHUNK1]], ptr [[DST1:%.*]], align 2
-; CHECK-ALL-NEXT:    ret void
-;
-  %intermediate = alloca [8 x i8], align 64
-  %init = load <8 x i8>, ptr %src, align 1
-  store <8 x i8> %init, ptr %intermediate, align 64
-  %intermediate.off = getelementptr inbounds i16, ptr %intermediate, i64 %byteOff
-  %chunk0 = load <2 x i8>, ptr %intermediate.off, align 1
-  store <2 x i8> %chunk0, ptr %dst0
-  %chunk1 = load <2 x i8>, ptr %intermediate.off, align 1
-  store <2 x i8> %chunk1, ptr %dst1
-  ret void
-}
-
-define void @two-loads-of-two-2byte-chunks-of-8byte-alloca-with-2byte-step-variable-geps(ptr %src, i64 %byteOff0, i64 %byteOff1, ptr %dst0, ptr %dst1) nounwind {
-; CHECK-ALL-LABEL: @two-loads-of-two-2byte-chunks-of-8byte-alloca-with-2byte-step-variable-geps(
-; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [8 x i8], align 64
-; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <8 x i8>, ptr [[SRC:%.*]], align 1
-; CHECK-ALL-NEXT:    store <8 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
-; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF0:%.*]] = getelementptr inbounds i16, ptr [[INTERMEDIATE]], i64 [[BYTEOFF0:%.*]]
-; CHECK-ALL-NEXT:    [[CHUNK0:%.*]] = load <2 x i8>, ptr [[INTERMEDIATE_OFF0]], align 1
-; CHECK-ALL-NEXT:    store <2 x i8> [[CHUNK0]], ptr [[DST0:%.*]], align 2
-; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF1:%.*]] = getelementptr inbounds i16, ptr [[INTERMEDIATE_OFF0]], i64 [[BYTEOFF1:%.*]]
-; CHECK-ALL-NEXT:    [[CHUNK1:%.*]] = load <2 x i8>, ptr [[INTERMEDIATE_OFF1]], align 1
-; CHECK-ALL-NEXT:    store <2 x i8> [[CHUNK1]], ptr [[DST1:%.*]], align 2
-; CHECK-ALL-NEXT:    ret void
-;
-  %intermediate = alloca [8 x i8], align 64
-  %init = load <8 x i8>, ptr %src, align 1
-  store <8 x i8> %init, ptr %intermediate, align 64
-  %intermediate.off0 = getelementptr inbounds i16, ptr %intermediate, i64 %byteOff0
-  %chunk0 = load <2 x i8>, ptr %intermediate.off0, align 1
-  store <2 x i8> %chunk0, ptr %dst0
-  %intermediate.off1 = getelementptr inbounds i16, ptr %intermediate.off0, i64 %byteOff1
-  %chunk1 = load <2 x i8>, ptr %intermediate.off1, align 1
-  store <2 x i8> %chunk1, ptr %dst1
-  ret void
-}
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; CHECK-BE-32: {{.*}}
 ; CHECK-BE-64: {{.*}}


        


More information about the llvm-commits mailing list