[llvm] [InstCombine] Scalarize extractelement from single-use vector load (PR #185795)
Jianjian Guan via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 10 20:04:17 PDT 2026
https://github.com/jacquesguan created https://github.com/llvm/llvm-project/pull/185795
Fold `extractelement (load <N x T>, ptr %p), C` to `load T, ptr (getelementptr inbounds T, %p, C)`. This avoids loading unused vector elements when only one used.
>From 77eff7fdc22787b6adbcdf97381041fed030121f Mon Sep 17 00:00:00 2001
From: Jianjian GUAN <jacquesguan at me.com>
Date: Tue, 10 Mar 2026 17:17:57 +0800
Subject: [PATCH] [InstCombine] Scalarize extractelement from single-use vector
load
Fold `extractelement (load <N x T>, ptr %p), C` to `load T, ptr (getelementptr inbounds T, %p, C)`.
This avoids loading unused vector elements when only one used.
---
.../InstCombine/InstCombineVectorOps.cpp | 21 ++++
.../Transforms/InstCombine/debuginfo-sink.ll | 87 +++++++++++-----
.../Transforms/InstCombine/extractelement.ll | 99 ++++++++++++++++++-
.../InstCombine/masked_intrinsics.ll | 4 +-
.../InstCombine/scalarization-inseltpoison.ll | 16 +--
.../Transforms/InstCombine/scalarization.ll | 16 +--
.../InstCombine/sink-into-catchswitch.ll | 11 +--
.../multiply-fused-dominance.ll | 72 +++++++++-----
8 files changed, 249 insertions(+), 77 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 3b034f6c37f66..ead55debafa4b 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -467,6 +467,27 @@ Instruction *InstCombinerImpl::visitExtractElementInst(ExtractElementInst &EI) {
if (auto *Phi = dyn_cast<PHINode>(SrcVec))
if (Instruction *ScalarPHI = scalarizePHI(EI, Phi))
return ScalarPHI;
+
+ // extractelt (load <N x T>, ptr %p), C -->
+ // load T, ptr (getelementptr inbounds T, %p, C)
+ // Scalarize a single-use vector load to a scalar load at the element's
+ // address, avoiding loading unused vector elements.
+ if (HasKnownValidIndex) {
+ if (auto *LI = dyn_cast<LoadInst>(SrcVec)) {
+ if (!LI->isVolatile() && !LI->isAtomic() && LI->hasOneUse()) {
+ Type *EltTy = EI.getType();
+ uint64_t Idx = IndexC->getZExtValue();
+ Value *NewPtr = Builder.CreateConstInBoundsGEP1_64(
+ EltTy, LI->getPointerOperand(), Idx);
+ const DataLayout &DL = getDataLayout();
+ Align EltAlign = commonAlignment(
+ LI->getAlign(), Idx * DL.getTypeStoreSize(EltTy).getFixedValue());
+ LoadInst *NewLoad = Builder.CreateAlignedLoad(EltTy, NewPtr, EltAlign,
+ LI->getName() + ".elt");
+ return replaceInstUsesWith(EI, NewLoad);
+ }
+ }
+ }
}
// If SrcVec is a subvector starting at index 0, extract from the
diff --git a/llvm/test/Transforms/InstCombine/debuginfo-sink.ll b/llvm/test/Transforms/InstCombine/debuginfo-sink.ll
index c02aefe0723c5..c25729087c37c 100644
--- a/llvm/test/Transforms/InstCombine/debuginfo-sink.ll
+++ b/llvm/test/Transforms/InstCombine/debuginfo-sink.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
; RUN: opt %s -passes=instcombine -S | FileCheck %s
; Test sinking of dbg.values when instcombine sinks associated instructions.
@@ -8,22 +9,25 @@ declare void @llvm.dbg.value(metadata, metadata, metadata)
; gets folded. The dbg.value should be duplicated in the block its sunk
; into, to maximise liveness.
;
-; CHECK-LABEL: define i32 @foo(ptr
-; CHECK: #dbg_value(ptr %a, !{{[0-9]+}},
-; CHECK-SAME: !DIExpression(DW_OP_plus_uconst, 4, DW_OP_stack_value),
-; CHECK-NEXT: br label %sink1
define i32 @foo(ptr %a) !dbg !7 {
+; CHECK-LABEL: define i32 @foo(
+; CHECK-SAME: ptr [[A:%.*]]) !dbg [[DBG6:![0-9]+]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: #dbg_value(ptr [[A]], [[META11:![0-9]+]], !DIExpression(DW_OP_plus_uconst, 4, DW_OP_stack_value), [[META12:![0-9]+]])
+; CHECK-NEXT: br label %[[SINK1:.*]]
+; CHECK: [[SINK1]]:
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[A]], i64 4
+; CHECK-NEXT: #dbg_value(ptr [[GEP]], [[META11]], !DIExpression(), [[META12]])
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[GEP]], align 4, !dbg [[META12]]
+; CHECK-NEXT: ret i32 [[TMP0]], !dbg [[META12]]
+;
entry:
%gep = getelementptr i32, ptr %a, i32 1
call void @llvm.dbg.value(metadata ptr %gep, metadata !16, metadata !12), !dbg !15
br label %sink1
sink1:
-; CHECK-LABEL: sink1:
-; CHECK: #dbg_value(ptr %gep,
-; CHECK-SAME: !{{[0-9]+}}, !DIExpression(),
-; CHECK-NEXT: load
%0 = load i32, ptr %gep, align 4, !dbg !15
ret i32 %0, !dbg !15
}
@@ -32,23 +36,30 @@ sink1:
; dbg.value sunk, but an undef dbg.value is left to terminate any earlier
; value range.
-; CHECK-LABEL: define i32 @bar(
-; CHECK: #dbg_value(ptr poison,
-; CHECK-NEXT: br label %sink2
define i32 @bar(ptr %a, i32 %b) !dbg !70 {
+; CHECK-LABEL: define i32 @bar(
+; CHECK-SAME: ptr [[A:%.*]], i32 [[B:%.*]]) !dbg [[DBG13:![0-9]+]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: #dbg_value(ptr poison, [[META16:![0-9]+]], !DIExpression(), [[META17:![0-9]+]])
+; CHECK-NEXT: br label %[[SINK2:.*]]
+; CHECK: [[SINK2]]:
+; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[B]] to i64
+; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP1]], 4
+; CHECK-NEXT: [[GEP_IDX:%.*]] = mul i64 [[TMP2]], [[TMP0]]
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[GEP_IDX]]
+; CHECK-NEXT: #dbg_value(ptr [[GEP]], [[META16]], !DIExpression(), [[META17]])
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[GEP]], i64 4
+; CHECK-NEXT: [[DOTELT:%.*]] = load i32, ptr [[TMP3]], align 4
+; CHECK-NEXT: ret i32 [[DOTELT]]
+;
entry:
%gep = getelementptr <vscale x 4 x i32>, ptr %a, i32 %b
call void @llvm.dbg.value(metadata ptr %gep, metadata !73, metadata !12), !dbg !74
br label %sink2
sink2:
-; CHECK-LABEL: sink2:
-; CHECK: #dbg_value(ptr %gep,
-; CHECK-SAME: !{{[0-9]+}}, !DIExpression(),
-; CHECK-NEXT: load
-; CHECK-NEXT: extractelement
-; CHECK-NEXT: ret
%0 = load <vscale x 4 x i32>, ptr %gep
%extract = extractelement <vscale x 4 x i32> %0, i32 1
ret i32 %extract
@@ -58,14 +69,20 @@ sink2:
; only the last use is cloned into the sunk block, and that both of the
; original dbg.values are salvaged.
;
-; CHECK-LABEL: define i32 @baz(ptr
-; CHECK: #dbg_value(ptr %a, !{{[0-9]+}},
-; CHECK-SAME: !DIExpression(DW_OP_plus_uconst, 4, DW_OP_stack_value),
-; CHECK-NEXT: #dbg_value(ptr %a, !{{[0-9]+}},
-; CHECK-SAME: !DIExpression(DW_OP_plus_uconst, 9, DW_OP_stack_value),
-; CHECK-NEXT: br label %sink1
define i32 @baz(ptr %a) !dbg !80 {
+; CHECK-LABEL: define i32 @baz(
+; CHECK-SAME: ptr [[A:%.*]]) !dbg [[DBG18:![0-9]+]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: #dbg_value(ptr [[A]], [[META19:![0-9]+]], !DIExpression(DW_OP_plus_uconst, 4, DW_OP_stack_value), [[META20:![0-9]+]])
+; CHECK-NEXT: #dbg_value(ptr [[A]], [[META19]], !DIExpression(DW_OP_plus_uconst, 9, DW_OP_stack_value), [[META21:![0-9]+]])
+; CHECK-NEXT: br label %[[SINK1:.*]]
+; CHECK: [[SINK1]]:
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[A]], i64 4
+; CHECK-NEXT: #dbg_value(ptr [[GEP]], [[META19]], !DIExpression(DW_OP_plus_uconst, 5), [[META21]])
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[GEP]], align 4, !dbg [[META21]]
+; CHECK-NEXT: ret i32 [[TMP0]], !dbg [[META21]]
+;
entry:
%gep = getelementptr i32, ptr %a, i32 1
call void @llvm.dbg.value(metadata ptr %gep, metadata !83, metadata !12), !dbg !84
@@ -73,10 +90,6 @@ entry:
br label %sink1
sink1:
-; CHECK-LABEL: sink1:
-; CHECK: #dbg_value(ptr %gep,
-; CHECK-SAME: !{{[0-9]+}}, !DIExpression(DW_OP_plus_uconst, 5),
-; CHECK-NEXT: load
%0 = load i32, ptr %gep, align 4, !dbg !85
ret i32 %0, !dbg !85
}
@@ -109,3 +122,23 @@ sink1:
!83 = !DILocalVariable(name: "l", scope: !80, file: !1, line: 2, type: !10)
!84 = !DILocation(line: 5, column: 3, scope: !80)
!85 = !DILocation(line: 6, column: 3, scope: !80)
+;.
+; CHECK: [[META0:![0-9]+]] = distinct !DICompileUnit(language: DW_LANG_C99, file: [[META1:![0-9]+]], producer: "clang", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug)
+; CHECK: [[META1]] = !DIFile(filename: "{{.*}}a.c", directory: {{.*}})
+; CHECK: [[DBG6]] = distinct !DISubprogram(name: "foo", scope: [[META1]], file: [[META1]], line: 2, type: [[META7:![0-9]+]], scopeLine: 3, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: [[META0]], retainedNodes: [[META10:![0-9]+]])
+; CHECK: [[META7]] = !DISubroutineType(types: [[META8:![0-9]+]])
+; CHECK: [[META8]] = !{[[META9:![0-9]+]], [[META9]]}
+; CHECK: [[META9]] = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+; CHECK: [[META10]] = !{}
+; CHECK: [[META11]] = !DILocalVariable(name: "h", scope: [[DBG6]], file: [[META1]], line: 4, type: [[META9]])
+; CHECK: [[META12]] = !DILocation(line: 5, column: 3, scope: [[DBG6]])
+; CHECK: [[DBG13]] = distinct !DISubprogram(name: "bar", scope: [[META1]], file: [[META1]], line: 2, type: [[META14:![0-9]+]], scopeLine: 3, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: [[META0]], retainedNodes: [[META10]])
+; CHECK: [[META14]] = !DISubroutineType(types: [[META15:![0-9]+]])
+; CHECK: [[META15]] = !{[[META9]], [[META9]], [[META9]]}
+; CHECK: [[META16]] = !DILocalVariable(name: "k", scope: [[DBG13]], file: [[META1]], line: 2, type: [[META9]])
+; CHECK: [[META17]] = !DILocation(line: 5, column: 3, scope: [[DBG13]])
+; CHECK: [[DBG18]] = distinct !DISubprogram(name: "baz", scope: [[META1]], file: [[META1]], line: 2, type: [[META7]], scopeLine: 3, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: [[META0]], retainedNodes: [[META10]])
+; CHECK: [[META19]] = !DILocalVariable(name: "l", scope: [[DBG18]], file: [[META1]], line: 2, type: [[META9]])
+; CHECK: [[META20]] = !DILocation(line: 5, column: 3, scope: [[DBG18]])
+; CHECK: [[META21]] = !DILocation(line: 6, column: 3, scope: [[DBG18]])
+;.
diff --git a/llvm/test/Transforms/InstCombine/extractelement.ll b/llvm/test/Transforms/InstCombine/extractelement.ll
index 04a35e19fb0bb..ae6373fe2ce0d 100644
--- a/llvm/test/Transforms/InstCombine/extractelement.ll
+++ b/llvm/test/Transforms/InstCombine/extractelement.ll
@@ -393,7 +393,6 @@ define i4 @bitcast_scalar_legal_type_index3(i64 %x) {
; BE128-NEXT: [[R:%.*]] = extractelement <16 x i4> [[V]], i64 3
; BE128-NEXT: ret i4 [[R]]
;
-
%v = bitcast i64 %x to <16 x i4>
%r = extractelement <16 x i4> %v, i64 3
ret i4 %r
@@ -736,7 +735,6 @@ define i8 @bitcast_scalar_index0_use(i64 %x) {
; ANY-NEXT: [[R:%.*]] = extractelement <8 x i8> [[V]], i64 0
; ANY-NEXT: ret i8 [[R]]
;
-
%v = bitcast i64 %x to <8 x i8>
call void @use(<8 x i8> %v)
%r = extractelement <8 x i8> %v, i64 0
@@ -925,3 +923,100 @@ define float @crash_4b8320(<2 x float> %i1, float %i12) {
%i29 = extractelement <4 x float> %i26, i64 0
ret float %i29
}
+
+; extractelt (load <N x T>, ptr %p), C --> load T, ptr (gep inbounds T, %p, C)
+
+define i32 @load_extract_i32_idx2(ptr %p) {
+; ANY-LABEL: @load_extract_i32_idx2(
+; ANY-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[P:%.*]], i64 8
+; ANY-NEXT: [[V_ELT:%.*]] = load i32, ptr [[TMP1]], align 8
+; ANY-NEXT: ret i32 [[V_ELT]]
+;
+ %v = load <4 x i32>, ptr %p, align 16
+ %e = extractelement <4 x i32> %v, i32 2
+ ret i32 %e
+}
+
+define float @load_extract_float_idx0(ptr %p) {
+; ANY-LABEL: @load_extract_float_idx0(
+; ANY-NEXT: [[V_ELT:%.*]] = load float, ptr [[P:%.*]], align 16
+; ANY-NEXT: ret float [[V_ELT]]
+;
+ %v = load <4 x float>, ptr %p, align 16
+ %e = extractelement <4 x float> %v, i32 0
+ ret float %e
+}
+
+define float @load_extract_float_idx1_align4(ptr %p) {
+; ANY-LABEL: @load_extract_float_idx1_align4(
+; ANY-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[P:%.*]], i64 4
+; ANY-NEXT: [[V_ELT:%.*]] = load float, ptr [[TMP1]], align 4
+; ANY-NEXT: ret float [[V_ELT]]
+;
+ %v = load <4 x float>, ptr %p, align 4
+ %e = extractelement <4 x float> %v, i32 1
+ ret float %e
+}
+
+; Negative test: load has multiple uses
+declare void @use_v4i32(<4 x i32>)
+define i32 @load_extract_multiuse(ptr %p) {
+; ANY-LABEL: @load_extract_multiuse(
+; ANY-NEXT: [[V:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 16
+; ANY-NEXT: call void @use_v4i32(<4 x i32> [[V]])
+; ANY-NEXT: [[E:%.*]] = extractelement <4 x i32> [[V]], i64 1
+; ANY-NEXT: ret i32 [[E]]
+;
+ %v = load <4 x i32>, ptr %p, align 16
+ call void @use_v4i32(<4 x i32> %v)
+ %e = extractelement <4 x i32> %v, i32 1
+ ret i32 %e
+}
+
+; Negative test: volatile load
+define i32 @load_extract_volatile(ptr %p) {
+; ANY-LABEL: @load_extract_volatile(
+; ANY-NEXT: [[V:%.*]] = load volatile <4 x i32>, ptr [[P:%.*]], align 16
+; ANY-NEXT: [[E:%.*]] = extractelement <4 x i32> [[V]], i64 1
+; ANY-NEXT: ret i32 [[E]]
+;
+ %v = load volatile <4 x i32>, ptr %p, align 16
+ %e = extractelement <4 x i32> %v, i32 1
+ ret i32 %e
+}
+
+; Scalable vector
+
+define i32 @load_extract_scalable_idx2(ptr %p) {
+; ANY-LABEL: @load_extract_scalable_idx2(
+; ANY-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[P:%.*]], i64 8
+; ANY-NEXT: [[V_ELT:%.*]] = load i32, ptr [[TMP1]], align 8
+; ANY-NEXT: ret i32 [[V_ELT]]
+;
+ %v = load <vscale x 4 x i32>, ptr %p, align 16
+ %e = extractelement <vscale x 4 x i32> %v, i32 2
+ ret i32 %e
+}
+
+define i32 @load_extract_scalable_idx3(ptr %p) {
+; ANY-LABEL: @load_extract_scalable_idx3(
+; ANY-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[P:%.*]], i64 12
+; ANY-NEXT: [[V_ELT:%.*]] = load i32, ptr [[TMP1]], align 4
+; ANY-NEXT: ret i32 [[V_ELT]]
+;
+ %v = load <vscale x 4 x i32>, ptr %p, align 16
+ %e = extractelement <vscale x 4 x i32> %v, i32 3
+ ret i32 %e
+}
+
+; Negative test: index not proven inbounds
+define i32 @load_extract_scalable_idx4_oob(ptr %p) {
+; ANY-LABEL: @load_extract_scalable_idx4_oob(
+; ANY-NEXT: [[V:%.*]] = load <vscale x 4 x i32>, ptr [[P:%.*]], align 16
+; ANY-NEXT: [[E:%.*]] = extractelement <vscale x 4 x i32> [[V]], i64 4
+; ANY-NEXT: ret i32 [[E]]
+;
+ %v = load <vscale x 4 x i32>, ptr %p, align 16
+ %e = extractelement <vscale x 4 x i32> %v, i32 4
+ ret i32 %e
+}
diff --git a/llvm/test/Transforms/InstCombine/masked_intrinsics.ll b/llvm/test/Transforms/InstCombine/masked_intrinsics.ll
index 625c979cf794d..0384eec6e43dc 100644
--- a/llvm/test/Transforms/InstCombine/masked_intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/masked_intrinsics.ll
@@ -367,8 +367,8 @@ entry:
define void @scatter_v4i16_no_uniform_vals_uniform_ptrs_all_active_mask(ptr %dst, ptr %src) {
; CHECK-LABEL: @scatter_v4i16_no_uniform_vals_uniform_ptrs_all_active_mask(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[SRC:%.*]], align 2
-; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i64 3
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[SRC:%.*]], i64 6
+; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[TMP1]], align 2
; CHECK-NEXT: store i16 [[TMP0]], ptr [[DST:%.*]], align 2
; CHECK-NEXT: ret void
;
diff --git a/llvm/test/Transforms/InstCombine/scalarization-inseltpoison.ll b/llvm/test/Transforms/InstCombine/scalarization-inseltpoison.ll
index 29c0ac415ce7c..11855df189c0c 100644
--- a/llvm/test/Transforms/InstCombine/scalarization-inseltpoison.ll
+++ b/llvm/test/Transforms/InstCombine/scalarization-inseltpoison.ll
@@ -4,8 +4,8 @@
define i32 @extract_load(ptr %p) {
;
; CHECK-LABEL: @extract_load(
-; CHECK-NEXT: [[X:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 4
-; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x i32> [[X]], i64 1
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[P:%.*]], i64 4
+; CHECK-NEXT: [[EXT:%.*]] = load i32, ptr [[TMP1]], align 4
; CHECK-NEXT: ret i32 [[EXT]]
;
%x = load <4 x i32>, ptr %p, align 4
@@ -16,8 +16,8 @@ define i32 @extract_load(ptr %p) {
define double @extract_load_fp(ptr %p) {
;
; CHECK-LABEL: @extract_load_fp(
-; CHECK-NEXT: [[X:%.*]] = load <4 x double>, ptr [[P:%.*]], align 32
-; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[X]], i64 3
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[P:%.*]], i64 24
+; CHECK-NEXT: [[EXT:%.*]] = load double, ptr [[TMP1]], align 8
; CHECK-NEXT: ret double [[EXT]]
;
%x = load <4 x double>, ptr %p, align 32
@@ -183,10 +183,10 @@ define i8 @extract_element_binop_nonsplat_variable_index(<4 x i8> %x, i32 %y) {
define float @extract_element_load(<4 x float> %x, ptr %ptr) {
;
; CHECK-LABEL: @extract_element_load(
-; CHECK-NEXT: [[LOAD:%.*]] = load <4 x float>, ptr [[PTR:%.*]], align 16
-; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[X:%.*]], i64 2
-; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[LOAD]], i64 2
-; CHECK-NEXT: [[R:%.*]] = fadd float [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[LOAD:%.*]], i64 2
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR:%.*]], i64 8
+; CHECK-NEXT: [[LOAD_ELT:%.*]] = load float, ptr [[TMP3]], align 8
+; CHECK-NEXT: [[R:%.*]] = fadd float [[TMP2]], [[LOAD_ELT]]
; CHECK-NEXT: ret float [[R]]
;
%load = load <4 x float>, ptr %ptr
diff --git a/llvm/test/Transforms/InstCombine/scalarization.ll b/llvm/test/Transforms/InstCombine/scalarization.ll
index c4adf756f7756..879613b34cf7b 100644
--- a/llvm/test/Transforms/InstCombine/scalarization.ll
+++ b/llvm/test/Transforms/InstCombine/scalarization.ll
@@ -4,8 +4,8 @@
define i32 @extract_load(ptr %p) {
;
; CHECK-LABEL: @extract_load(
-; CHECK-NEXT: [[X:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 4
-; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x i32> [[X]], i64 1
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[P:%.*]], i64 4
+; CHECK-NEXT: [[EXT:%.*]] = load i32, ptr [[TMP1]], align 4
; CHECK-NEXT: ret i32 [[EXT]]
;
%x = load <4 x i32>, ptr %p, align 4
@@ -16,8 +16,8 @@ define i32 @extract_load(ptr %p) {
define double @extract_load_fp(ptr %p) {
;
; CHECK-LABEL: @extract_load_fp(
-; CHECK-NEXT: [[X:%.*]] = load <4 x double>, ptr [[P:%.*]], align 32
-; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[X]], i64 3
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[P:%.*]], i64 24
+; CHECK-NEXT: [[EXT:%.*]] = load double, ptr [[TMP1]], align 8
; CHECK-NEXT: ret double [[EXT]]
;
%x = load <4 x double>, ptr %p, align 32
@@ -255,10 +255,10 @@ define i8 @extract_element_binop_nonsplat_variable_index(<4 x i8> %x, i32 %y) {
define float @extract_element_load(<4 x float> %x, ptr %ptr) {
;
; CHECK-LABEL: @extract_element_load(
-; CHECK-NEXT: [[LOAD:%.*]] = load <4 x float>, ptr [[PTR:%.*]], align 16
-; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[X:%.*]], i64 2
-; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[LOAD]], i64 2
-; CHECK-NEXT: [[R:%.*]] = fadd float [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[LOAD:%.*]], i64 2
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR:%.*]], i64 8
+; CHECK-NEXT: [[LOAD_ELT:%.*]] = load float, ptr [[TMP3]], align 8
+; CHECK-NEXT: [[R:%.*]] = fadd float [[TMP2]], [[LOAD_ELT]]
; CHECK-NEXT: ret float [[R]]
;
%load = load <4 x float>, ptr %ptr
diff --git a/llvm/test/Transforms/InstCombine/sink-into-catchswitch.ll b/llvm/test/Transforms/InstCombine/sink-into-catchswitch.ll
index 0e4c3f9205831..1406406e731c0 100644
--- a/llvm/test/Transforms/InstCombine/sink-into-catchswitch.ll
+++ b/llvm/test/Transforms/InstCombine/sink-into-catchswitch.ll
@@ -9,18 +9,17 @@ target triple = "x86_64-pc-windows-msvc18.0.0"
define void @test1(ptr %p) personality ptr @__CxxFrameHandler3 {
; CHECK-LABEL: @test1(
; CHECK-NEXT: invoke.cont:
-; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr [[P:%.*]], align 8
-; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i64> [[TMP0]], i64 0
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[P:%.*]], align 8
; CHECK-NEXT: invoke void @throw()
-; CHECK-NEXT: to label [[UNREACHABLE:%.*]] unwind label [[CATCH_DISPATCH:%.*]]
+; CHECK-NEXT: to label [[UNREACHABLE:%.*]] unwind label [[CATCH_DISPATCH:%.*]]
; CHECK: catch.dispatch:
-; CHECK-NEXT: [[CS:%.*]] = catchswitch within none [label %invoke.cont1] unwind label [[EHCLEANUP:%.*]]
+; CHECK-NEXT: [[CS:%.*]] = catchswitch within none [label [[INVOKE_CONT1:%.*]]] unwind label [[EHCLEANUP:%.*]]
; CHECK: invoke.cont1:
; CHECK-NEXT: [[CATCH:%.*]] = catchpad within [[CS]] [ptr null, i32 64, ptr null]
; CHECK-NEXT: invoke void @throw() [ "funclet"(token [[CATCH]]) ]
-; CHECK-NEXT: to label [[UNREACHABLE]] unwind label [[EHCLEANUP]]
+; CHECK-NEXT: to label [[UNREACHABLE]] unwind label [[EHCLEANUP]]
; CHECK: ehcleanup:
-; CHECK-NEXT: [[PHI:%.*]] = phi i64 [ [[TMP1]], [[CATCH_DISPATCH]] ], [ 9, [[INVOKE_CONT1:%.*]] ]
+; CHECK-NEXT: [[PHI:%.*]] = phi i64 [ [[TMP1]], [[CATCH_DISPATCH]] ], [ 9, [[INVOKE_CONT1]] ]
; CHECK-NEXT: [[CLEANUP:%.*]] = cleanuppad within none []
; CHECK-NEXT: call void @release(i64 [[PHI]]) [ "funclet"(token [[CLEANUP]]) ]
; CHECK-NEXT: cleanupret from [[CLEANUP]] unwind to caller
diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused-dominance.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused-dominance.ll
index aa40a2df06817..76191992ab083 100644
--- a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused-dominance.ll
+++ b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused-dominance.ll
@@ -23,45 +23,53 @@ define void @multiply_can_hoist_cast(ptr noalias %A, ptr %B, ptr %C) {
; CHECK: no_alias:
; CHECK-NEXT: [[TMP3:%.*]] = phi ptr [ [[B]], [[ENTRY:%.*]] ], [ [[B]], [[ALIAS_CONT]] ], [ [[TMP2]], [[COPY]] ]
; CHECK-NEXT: [[COL_LOAD:%.*]] = load <1 x double>, ptr [[A:%.*]], align 8
-; CHECK-NEXT: [[COL_LOAD1:%.*]] = load <1 x double>, ptr [[TMP3]], align 8
+; CHECK-NEXT: [[COL_LOAD1_ELT:%.*]] = load double, ptr [[TMP3]], align 8
+; CHECK-NEXT: [[COL_LOAD1:%.*]] = insertelement <1 x double> poison, double [[COL_LOAD1_ELT]], i64 0
; CHECK-NEXT: [[TMP4:%.*]] = fmul contract <1 x double> [[COL_LOAD]], [[COL_LOAD1]]
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[A]], i64 16
; CHECK-NEXT: [[COL_LOAD2:%.*]] = load <1 x double>, ptr [[TMP5]], align 8
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[TMP3]], i64 8
-; CHECK-NEXT: [[COL_LOAD3:%.*]] = load <1 x double>, ptr [[TMP6]], align 8
+; CHECK-NEXT: [[COL_LOAD3_ELT:%.*]] = load double, ptr [[TMP6]], align 8
+; CHECK-NEXT: [[COL_LOAD3:%.*]] = insertelement <1 x double> poison, double [[COL_LOAD3_ELT]], i64 0
; CHECK-NEXT: [[TMP7:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[COL_LOAD2]], <1 x double> [[COL_LOAD3]], <1 x double> [[TMP4]])
; CHECK-NEXT: store <1 x double> [[TMP7]], ptr [[C]], align 8
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[A]], i64 8
; CHECK-NEXT: [[COL_LOAD8:%.*]] = load <1 x double>, ptr [[TMP8]], align 8
-; CHECK-NEXT: [[COL_LOAD9:%.*]] = load <1 x double>, ptr [[TMP3]], align 8
+; CHECK-NEXT: [[COL_LOAD9_ELT:%.*]] = load double, ptr [[TMP3]], align 8
+; CHECK-NEXT: [[COL_LOAD9:%.*]] = insertelement <1 x double> poison, double [[COL_LOAD9_ELT]], i64 0
; CHECK-NEXT: [[TMP9:%.*]] = fmul contract <1 x double> [[COL_LOAD8]], [[COL_LOAD9]]
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[A]], i64 24
; CHECK-NEXT: [[COL_LOAD13:%.*]] = load <1 x double>, ptr [[TMP10]], align 8
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[TMP3]], i64 8
-; CHECK-NEXT: [[COL_LOAD14:%.*]] = load <1 x double>, ptr [[TMP11]], align 8
+; CHECK-NEXT: [[COL_LOAD14_ELT:%.*]] = load double, ptr [[TMP11]], align 8
+; CHECK-NEXT: [[COL_LOAD14:%.*]] = insertelement <1 x double> poison, double [[COL_LOAD14_ELT]], i64 0
; CHECK-NEXT: [[TMP12:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[COL_LOAD13]], <1 x double> [[COL_LOAD14]], <1 x double> [[TMP9]])
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[C]], i64 8
; CHECK-NEXT: store <1 x double> [[TMP12]], ptr [[TMP13]], align 8
; CHECK-NEXT: [[COL_LOAD19:%.*]] = load <1 x double>, ptr [[A]], align 8
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[TMP3]], i64 16
-; CHECK-NEXT: [[COL_LOAD20:%.*]] = load <1 x double>, ptr [[TMP14]], align 8
+; CHECK-NEXT: [[COL_LOAD20_ELT:%.*]] = load double, ptr [[TMP14]], align 8
+; CHECK-NEXT: [[COL_LOAD20:%.*]] = insertelement <1 x double> poison, double [[COL_LOAD20_ELT]], i64 0
; CHECK-NEXT: [[TMP15:%.*]] = fmul contract <1 x double> [[COL_LOAD19]], [[COL_LOAD20]]
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[A]], i64 16
; CHECK-NEXT: [[COL_LOAD24:%.*]] = load <1 x double>, ptr [[TMP16]], align 8
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[TMP3]], i64 24
-; CHECK-NEXT: [[COL_LOAD25:%.*]] = load <1 x double>, ptr [[TMP17]], align 8
+; CHECK-NEXT: [[COL_LOAD25_ELT:%.*]] = load double, ptr [[TMP17]], align 8
+; CHECK-NEXT: [[COL_LOAD25:%.*]] = insertelement <1 x double> poison, double [[COL_LOAD25_ELT]], i64 0
; CHECK-NEXT: [[TMP18:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[COL_LOAD24]], <1 x double> [[COL_LOAD25]], <1 x double> [[TMP15]])
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[C]], i64 16
; CHECK-NEXT: store <1 x double> [[TMP18]], ptr [[TMP19]], align 8
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[A]], i64 8
; CHECK-NEXT: [[COL_LOAD30:%.*]] = load <1 x double>, ptr [[TMP20]], align 8
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[TMP3]], i64 16
-; CHECK-NEXT: [[COL_LOAD31:%.*]] = load <1 x double>, ptr [[TMP21]], align 8
+; CHECK-NEXT: [[COL_LOAD31_ELT:%.*]] = load double, ptr [[TMP21]], align 8
+; CHECK-NEXT: [[COL_LOAD31:%.*]] = insertelement <1 x double> poison, double [[COL_LOAD31_ELT]], i64 0
; CHECK-NEXT: [[TMP22:%.*]] = fmul contract <1 x double> [[COL_LOAD30]], [[COL_LOAD31]]
; CHECK-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[A]], i64 24
; CHECK-NEXT: [[COL_LOAD35:%.*]] = load <1 x double>, ptr [[TMP23]], align 8
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[TMP3]], i64 24
-; CHECK-NEXT: [[COL_LOAD36:%.*]] = load <1 x double>, ptr [[TMP24]], align 8
+; CHECK-NEXT: [[COL_LOAD36_ELT:%.*]] = load double, ptr [[TMP24]], align 8
+; CHECK-NEXT: [[COL_LOAD36:%.*]] = insertelement <1 x double> poison, double [[COL_LOAD36_ELT]], i64 0
; CHECK-NEXT: [[TMP25:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[COL_LOAD35]], <1 x double> [[COL_LOAD36]], <1 x double> [[TMP22]])
; CHECK-NEXT: [[TMP26:%.*]] = getelementptr i8, ptr [[C]], i64 24
; CHECK-NEXT: store <1 x double> [[TMP25]], ptr [[TMP26]], align 8
@@ -93,45 +101,53 @@ define void @multiply_can_hoist_multiple_insts(ptr noalias %A, ptr %B, ptr %C) {
; CHECK: no_alias:
; CHECK-NEXT: [[TMP3:%.*]] = phi ptr [ [[B]], [[ENTRY:%.*]] ], [ [[B]], [[ALIAS_CONT]] ], [ [[TMP2]], [[COPY]] ]
; CHECK-NEXT: [[COL_LOAD:%.*]] = load <1 x double>, ptr [[A:%.*]], align 8
-; CHECK-NEXT: [[COL_LOAD1:%.*]] = load <1 x double>, ptr [[TMP3]], align 8
+; CHECK-NEXT: [[COL_LOAD1_ELT:%.*]] = load double, ptr [[TMP3]], align 8
+; CHECK-NEXT: [[COL_LOAD1:%.*]] = insertelement <1 x double> poison, double [[COL_LOAD1_ELT]], i64 0
; CHECK-NEXT: [[TMP4:%.*]] = fmul contract <1 x double> [[COL_LOAD]], [[COL_LOAD1]]
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[A]], i64 16
; CHECK-NEXT: [[COL_LOAD2:%.*]] = load <1 x double>, ptr [[TMP5]], align 8
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[TMP3]], i64 8
-; CHECK-NEXT: [[COL_LOAD3:%.*]] = load <1 x double>, ptr [[TMP6]], align 8
+; CHECK-NEXT: [[COL_LOAD3_ELT:%.*]] = load double, ptr [[TMP6]], align 8
+; CHECK-NEXT: [[COL_LOAD3:%.*]] = insertelement <1 x double> poison, double [[COL_LOAD3_ELT]], i64 0
; CHECK-NEXT: [[TMP7:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[COL_LOAD2]], <1 x double> [[COL_LOAD3]], <1 x double> [[TMP4]])
; CHECK-NEXT: store <1 x double> [[TMP7]], ptr [[GEP]], align 8
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[A]], i64 8
; CHECK-NEXT: [[COL_LOAD8:%.*]] = load <1 x double>, ptr [[TMP8]], align 8
-; CHECK-NEXT: [[COL_LOAD9:%.*]] = load <1 x double>, ptr [[TMP3]], align 8
+; CHECK-NEXT: [[COL_LOAD9_ELT:%.*]] = load double, ptr [[TMP3]], align 8
+; CHECK-NEXT: [[COL_LOAD9:%.*]] = insertelement <1 x double> poison, double [[COL_LOAD9_ELT]], i64 0
; CHECK-NEXT: [[TMP9:%.*]] = fmul contract <1 x double> [[COL_LOAD8]], [[COL_LOAD9]]
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[A]], i64 24
; CHECK-NEXT: [[COL_LOAD13:%.*]] = load <1 x double>, ptr [[TMP10]], align 8
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[TMP3]], i64 8
-; CHECK-NEXT: [[COL_LOAD14:%.*]] = load <1 x double>, ptr [[TMP11]], align 8
+; CHECK-NEXT: [[COL_LOAD14_ELT:%.*]] = load double, ptr [[TMP11]], align 8
+; CHECK-NEXT: [[COL_LOAD14:%.*]] = insertelement <1 x double> poison, double [[COL_LOAD14_ELT]], i64 0
; CHECK-NEXT: [[TMP12:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[COL_LOAD13]], <1 x double> [[COL_LOAD14]], <1 x double> [[TMP9]])
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[C]], i64 72
; CHECK-NEXT: store <1 x double> [[TMP12]], ptr [[TMP13]], align 8
; CHECK-NEXT: [[COL_LOAD19:%.*]] = load <1 x double>, ptr [[A]], align 8
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[TMP3]], i64 16
-; CHECK-NEXT: [[COL_LOAD20:%.*]] = load <1 x double>, ptr [[TMP14]], align 8
+; CHECK-NEXT: [[COL_LOAD20_ELT:%.*]] = load double, ptr [[TMP14]], align 8
+; CHECK-NEXT: [[COL_LOAD20:%.*]] = insertelement <1 x double> poison, double [[COL_LOAD20_ELT]], i64 0
; CHECK-NEXT: [[TMP15:%.*]] = fmul contract <1 x double> [[COL_LOAD19]], [[COL_LOAD20]]
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[A]], i64 16
; CHECK-NEXT: [[COL_LOAD24:%.*]] = load <1 x double>, ptr [[TMP16]], align 8
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[TMP3]], i64 24
-; CHECK-NEXT: [[COL_LOAD25:%.*]] = load <1 x double>, ptr [[TMP17]], align 8
+; CHECK-NEXT: [[COL_LOAD25_ELT:%.*]] = load double, ptr [[TMP17]], align 8
+; CHECK-NEXT: [[COL_LOAD25:%.*]] = insertelement <1 x double> poison, double [[COL_LOAD25_ELT]], i64 0
; CHECK-NEXT: [[TMP18:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[COL_LOAD24]], <1 x double> [[COL_LOAD25]], <1 x double> [[TMP15]])
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[C]], i64 80
; CHECK-NEXT: store <1 x double> [[TMP18]], ptr [[TMP19]], align 8
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[A]], i64 8
; CHECK-NEXT: [[COL_LOAD30:%.*]] = load <1 x double>, ptr [[TMP20]], align 8
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[TMP3]], i64 16
-; CHECK-NEXT: [[COL_LOAD31:%.*]] = load <1 x double>, ptr [[TMP21]], align 8
+; CHECK-NEXT: [[COL_LOAD31_ELT:%.*]] = load double, ptr [[TMP21]], align 8
+; CHECK-NEXT: [[COL_LOAD31:%.*]] = insertelement <1 x double> poison, double [[COL_LOAD31_ELT]], i64 0
; CHECK-NEXT: [[TMP22:%.*]] = fmul contract <1 x double> [[COL_LOAD30]], [[COL_LOAD31]]
; CHECK-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[A]], i64 24
; CHECK-NEXT: [[COL_LOAD35:%.*]] = load <1 x double>, ptr [[TMP23]], align 8
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[TMP3]], i64 24
-; CHECK-NEXT: [[COL_LOAD36:%.*]] = load <1 x double>, ptr [[TMP24]], align 8
+; CHECK-NEXT: [[COL_LOAD36_ELT:%.*]] = load double, ptr [[TMP24]], align 8
+; CHECK-NEXT: [[COL_LOAD36:%.*]] = insertelement <1 x double> poison, double [[COL_LOAD36_ELT]], i64 0
; CHECK-NEXT: [[TMP25:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[COL_LOAD35]], <1 x double> [[COL_LOAD36]], <1 x double> [[TMP22]])
; CHECK-NEXT: [[TMP26:%.*]] = getelementptr i8, ptr [[C]], i64 88
; CHECK-NEXT: store <1 x double> [[TMP25]], ptr [[TMP26]], align 8
@@ -165,45 +181,53 @@ define void @multiply_can_hoist_multiple_insts2(ptr noalias %A, ptr %B, ptr %C)
; CHECK: no_alias:
; CHECK-NEXT: [[TMP3:%.*]] = phi ptr [ [[B]], [[ENTRY:%.*]] ], [ [[B]], [[ALIAS_CONT]] ], [ [[TMP2]], [[COPY]] ]
; CHECK-NEXT: [[COL_LOAD:%.*]] = load <1 x double>, ptr [[A:%.*]], align 8
-; CHECK-NEXT: [[COL_LOAD1:%.*]] = load <1 x double>, ptr [[TMP3]], align 8
+; CHECK-NEXT: [[COL_LOAD1_ELT:%.*]] = load double, ptr [[TMP3]], align 8
+; CHECK-NEXT: [[COL_LOAD1:%.*]] = insertelement <1 x double> poison, double [[COL_LOAD1_ELT]], i64 0
; CHECK-NEXT: [[TMP4:%.*]] = fmul contract <1 x double> [[COL_LOAD]], [[COL_LOAD1]]
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[A]], i64 16
; CHECK-NEXT: [[COL_LOAD2:%.*]] = load <1 x double>, ptr [[TMP5]], align 8
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[TMP3]], i64 8
-; CHECK-NEXT: [[COL_LOAD3:%.*]] = load <1 x double>, ptr [[TMP6]], align 8
+; CHECK-NEXT: [[COL_LOAD3_ELT:%.*]] = load double, ptr [[TMP6]], align 8
+; CHECK-NEXT: [[COL_LOAD3:%.*]] = insertelement <1 x double> poison, double [[COL_LOAD3_ELT]], i64 0
; CHECK-NEXT: [[TMP7:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[COL_LOAD2]], <1 x double> [[COL_LOAD3]], <1 x double> [[TMP4]])
; CHECK-NEXT: store <1 x double> [[TMP7]], ptr [[GEP_1]], align 8
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[A]], i64 8
; CHECK-NEXT: [[COL_LOAD8:%.*]] = load <1 x double>, ptr [[TMP8]], align 8
-; CHECK-NEXT: [[COL_LOAD9:%.*]] = load <1 x double>, ptr [[TMP3]], align 8
+; CHECK-NEXT: [[COL_LOAD9_ELT:%.*]] = load double, ptr [[TMP3]], align 8
+; CHECK-NEXT: [[COL_LOAD9:%.*]] = insertelement <1 x double> poison, double [[COL_LOAD9_ELT]], i64 0
; CHECK-NEXT: [[TMP9:%.*]] = fmul contract <1 x double> [[COL_LOAD8]], [[COL_LOAD9]]
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[A]], i64 24
; CHECK-NEXT: [[COL_LOAD13:%.*]] = load <1 x double>, ptr [[TMP10]], align 8
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[TMP3]], i64 8
-; CHECK-NEXT: [[COL_LOAD14:%.*]] = load <1 x double>, ptr [[TMP11]], align 8
+; CHECK-NEXT: [[COL_LOAD14_ELT:%.*]] = load double, ptr [[TMP11]], align 8
+; CHECK-NEXT: [[COL_LOAD14:%.*]] = insertelement <1 x double> poison, double [[COL_LOAD14_ELT]], i64 0
; CHECK-NEXT: [[TMP12:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[COL_LOAD13]], <1 x double> [[COL_LOAD14]], <1 x double> [[TMP9]])
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[C]], i64 1352
; CHECK-NEXT: store <1 x double> [[TMP12]], ptr [[TMP13]], align 8
; CHECK-NEXT: [[COL_LOAD19:%.*]] = load <1 x double>, ptr [[A]], align 8
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[TMP3]], i64 16
-; CHECK-NEXT: [[COL_LOAD20:%.*]] = load <1 x double>, ptr [[TMP14]], align 8
+; CHECK-NEXT: [[COL_LOAD20_ELT:%.*]] = load double, ptr [[TMP14]], align 8
+; CHECK-NEXT: [[COL_LOAD20:%.*]] = insertelement <1 x double> poison, double [[COL_LOAD20_ELT]], i64 0
; CHECK-NEXT: [[TMP15:%.*]] = fmul contract <1 x double> [[COL_LOAD19]], [[COL_LOAD20]]
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[A]], i64 16
; CHECK-NEXT: [[COL_LOAD24:%.*]] = load <1 x double>, ptr [[TMP16]], align 8
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[TMP3]], i64 24
-; CHECK-NEXT: [[COL_LOAD25:%.*]] = load <1 x double>, ptr [[TMP17]], align 8
+; CHECK-NEXT: [[COL_LOAD25_ELT:%.*]] = load double, ptr [[TMP17]], align 8
+; CHECK-NEXT: [[COL_LOAD25:%.*]] = insertelement <1 x double> poison, double [[COL_LOAD25_ELT]], i64 0
; CHECK-NEXT: [[TMP18:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[COL_LOAD24]], <1 x double> [[COL_LOAD25]], <1 x double> [[TMP15]])
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[C]], i64 1360
; CHECK-NEXT: store <1 x double> [[TMP18]], ptr [[TMP19]], align 8
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[A]], i64 8
; CHECK-NEXT: [[COL_LOAD30:%.*]] = load <1 x double>, ptr [[TMP20]], align 8
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[TMP3]], i64 16
-; CHECK-NEXT: [[COL_LOAD31:%.*]] = load <1 x double>, ptr [[TMP21]], align 8
+; CHECK-NEXT: [[COL_LOAD31_ELT:%.*]] = load double, ptr [[TMP21]], align 8
+; CHECK-NEXT: [[COL_LOAD31:%.*]] = insertelement <1 x double> poison, double [[COL_LOAD31_ELT]], i64 0
; CHECK-NEXT: [[TMP22:%.*]] = fmul contract <1 x double> [[COL_LOAD30]], [[COL_LOAD31]]
; CHECK-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[A]], i64 24
; CHECK-NEXT: [[COL_LOAD35:%.*]] = load <1 x double>, ptr [[TMP23]], align 8
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[TMP3]], i64 24
-; CHECK-NEXT: [[COL_LOAD36:%.*]] = load <1 x double>, ptr [[TMP24]], align 8
+; CHECK-NEXT: [[COL_LOAD36_ELT:%.*]] = load double, ptr [[TMP24]], align 8
+; CHECK-NEXT: [[COL_LOAD36:%.*]] = insertelement <1 x double> poison, double [[COL_LOAD36_ELT]], i64 0
; CHECK-NEXT: [[TMP25:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[COL_LOAD35]], <1 x double> [[COL_LOAD36]], <1 x double> [[TMP22]])
; CHECK-NEXT: [[TMP26:%.*]] = getelementptr i8, ptr [[C]], i64 1368
; CHECK-NEXT: store <1 x double> [[TMP25]], ptr [[TMP26]], align 8
More information about the llvm-commits
mailing list