[llvm] 4dda564 - [RISCV][SLP] Add test coverage for 2^N-1 vector sizes
Philip Reames via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 27 10:25:54 PDT 2024
Author: Philip Reames
Date: 2024-08-27T10:24:46-07:00
New Revision: 4dda564c725cb84c09ee87f0ccfe5ffd295100b0
URL: https://github.com/llvm/llvm-project/commit/4dda564c725cb84c09ee87f0ccfe5ffd295100b0
DIFF: https://github.com/llvm/llvm-project/commit/4dda564c725cb84c09ee87f0ccfe5ffd295100b0.diff
LOG: [RISCV][SLP] Add test coverage for 2^N-1 vector sizes
Mostly copied from the AArch64 coverage for same, but also added
a couple tests for reductions which aren't currently supported.
Added:
llvm/test/Transforms/SLPVectorizer/RISCV/vec15-base.ll
llvm/test/Transforms/SLPVectorizer/RISCV/vec3-base.ll
Modified:
Removed:
################################################################################
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/vec15-base.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/vec15-base.ll
new file mode 100644
index 00000000000000..3a1a8fb4b2e32f
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/vec15-base.ll
@@ -0,0 +1,130 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -passes=slp-vectorizer -slp-vectorize-non-power-of-2 -mtriple=riscv64 -mattr=+v -S %s | FileCheck --check-prefixes=NON-POW2 %s
+; RUN: opt -passes=slp-vectorizer -slp-vectorize-non-power-of-2=false -mtriple=riscv64 -mattr=+v -S %s | FileCheck --check-prefixes=POW2-ONLY %s
+
+define void @v15_load_i8_mul_by_constant_store(ptr %src, ptr noalias %dst) {
+; NON-POW2-LABEL: define void @v15_load_i8_mul_by_constant_store(
+; NON-POW2-SAME: ptr [[SRC:%.*]], ptr noalias [[DST:%.*]]) #[[ATTR0:[0-9]+]] {
+; NON-POW2-NEXT: entry:
+; NON-POW2-NEXT: [[GEP_SRC_0:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i8 0
+; NON-POW2-NEXT: [[TMP0:%.*]] = load <15 x i8>, ptr [[GEP_SRC_0]], align 4
+; NON-POW2-NEXT: [[TMP1:%.*]] = mul nsw <15 x i8> [[TMP0]], <i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10>
+; NON-POW2-NEXT: store <15 x i8> [[TMP1]], ptr [[DST]], align 1
+; NON-POW2-NEXT: ret void
+;
+; POW2-ONLY-LABEL: define void @v15_load_i8_mul_by_constant_store(
+; POW2-ONLY-SAME: ptr [[SRC:%.*]], ptr noalias [[DST:%.*]]) #[[ATTR0:[0-9]+]] {
+; POW2-ONLY-NEXT: entry:
+; POW2-ONLY-NEXT: [[GEP_SRC_0:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i8 0
+; POW2-ONLY-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[GEP_SRC_0]], align 4
+; POW2-ONLY-NEXT: [[TMP1:%.*]] = mul nsw <8 x i8> [[TMP0]], <i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10>
+; POW2-ONLY-NEXT: store <8 x i8> [[TMP1]], ptr [[DST]], align 1
+; POW2-ONLY-NEXT: [[GEP_SRC_8:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i8 8
+; POW2-ONLY-NEXT: [[DST_8:%.*]] = getelementptr i8, ptr [[DST]], i8 8
+; POW2-ONLY-NEXT: [[TMP2:%.*]] = load <4 x i8>, ptr [[GEP_SRC_8]], align 4
+; POW2-ONLY-NEXT: [[TMP3:%.*]] = mul nsw <4 x i8> [[TMP2]], <i8 10, i8 10, i8 10, i8 10>
+; POW2-ONLY-NEXT: store <4 x i8> [[TMP3]], ptr [[DST_8]], align 1
+; POW2-ONLY-NEXT: [[GEP_SRC_12:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i8 12
+; POW2-ONLY-NEXT: [[DST_12:%.*]] = getelementptr i8, ptr [[DST]], i8 12
+; POW2-ONLY-NEXT: [[TMP4:%.*]] = load <2 x i8>, ptr [[GEP_SRC_12]], align 4
+; POW2-ONLY-NEXT: [[TMP5:%.*]] = mul nsw <2 x i8> [[TMP4]], <i8 10, i8 10>
+; POW2-ONLY-NEXT: store <2 x i8> [[TMP5]], ptr [[DST_12]], align 1
+; POW2-ONLY-NEXT: [[GEP_SRC_14:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i8 14
+; POW2-ONLY-NEXT: [[L_SRC_14:%.*]] = load i8, ptr [[GEP_SRC_14]], align 4
+; POW2-ONLY-NEXT: [[MUL_14:%.*]] = mul nsw i8 [[L_SRC_14]], 10
+; POW2-ONLY-NEXT: [[DST_14:%.*]] = getelementptr i8, ptr [[DST]], i8 14
+; POW2-ONLY-NEXT: store i8 [[MUL_14]], ptr [[DST_14]], align 1
+; POW2-ONLY-NEXT: ret void
+;
+entry:
+ %gep.src.0 = getelementptr inbounds i8, ptr %src, i8 0
+ %l.src.0 = load i8, ptr %gep.src.0, align 4
+ %mul.0 = mul nsw i8 %l.src.0, 10
+ store i8 %mul.0, ptr %dst
+
+ %gep.src.1 = getelementptr inbounds i8, ptr %src, i8 1
+ %l.src.1 = load i8, ptr %gep.src.1, align 4
+ %mul.1 = mul nsw i8 %l.src.1, 10
+ %dst.1 = getelementptr i8, ptr %dst, i8 1
+ store i8 %mul.1, ptr %dst.1
+
+ %gep.src.2 = getelementptr inbounds i8, ptr %src, i8 2
+ %l.src.2 = load i8, ptr %gep.src.2, align 4
+ %mul.2 = mul nsw i8 %l.src.2, 10
+ %dst.2 = getelementptr i8, ptr %dst, i8 2
+ store i8 %mul.2, ptr %dst.2
+
+ %gep.src.3 = getelementptr inbounds i8, ptr %src, i8 3
+ %l.src.3 = load i8, ptr %gep.src.3, align 4
+ %mul.3 = mul nsw i8 %l.src.3, 10
+ %dst.3 = getelementptr i8, ptr %dst, i8 3
+ store i8 %mul.3, ptr %dst.3
+
+ %gep.src.4 = getelementptr inbounds i8, ptr %src, i8 4
+ %l.src.4 = load i8, ptr %gep.src.4, align 4
+ %mul.4 = mul nsw i8 %l.src.4, 10
+ %dst.4 = getelementptr i8, ptr %dst, i8 4
+ store i8 %mul.4, ptr %dst.4
+
+ %gep.src.5 = getelementptr inbounds i8, ptr %src, i8 5
+ %l.src.5 = load i8, ptr %gep.src.5, align 4
+ %mul.5 = mul nsw i8 %l.src.5, 10
+ %dst.5 = getelementptr i8, ptr %dst, i8 5
+ store i8 %mul.5, ptr %dst.5
+
+ %gep.src.6 = getelementptr inbounds i8, ptr %src, i8 6
+ %l.src.6 = load i8, ptr %gep.src.6, align 4
+ %mul.6 = mul nsw i8 %l.src.6, 10
+ %dst.6 = getelementptr i8, ptr %dst, i8 6
+ store i8 %mul.6, ptr %dst.6
+
+ %gep.src.7 = getelementptr inbounds i8, ptr %src, i8 7
+ %l.src.7 = load i8, ptr %gep.src.7, align 4
+ %mul.7 = mul nsw i8 %l.src.7, 10
+ %dst.7 = getelementptr i8, ptr %dst, i8 7
+ store i8 %mul.7, ptr %dst.7
+
+ %gep.src.8 = getelementptr inbounds i8, ptr %src, i8 8
+ %l.src.8 = load i8, ptr %gep.src.8, align 4
+ %mul.8 = mul nsw i8 %l.src.8, 10
+ %dst.8 = getelementptr i8, ptr %dst, i8 8
+ store i8 %mul.8, ptr %dst.8
+
+ %gep.src.9 = getelementptr inbounds i8, ptr %src, i8 9
+ %l.src.9 = load i8, ptr %gep.src.9, align 4
+ %mul.9 = mul nsw i8 %l.src.9, 10
+ %dst.9 = getelementptr i8, ptr %dst, i8 9
+ store i8 %mul.9, ptr %dst.9
+
+ %gep.src.10 = getelementptr inbounds i8, ptr %src, i8 10
+ %l.src.10 = load i8, ptr %gep.src.10, align 4
+ %mul.10 = mul nsw i8 %l.src.10, 10
+ %dst.10 = getelementptr i8, ptr %dst, i8 10
+ store i8 %mul.10, ptr %dst.10
+
+ %gep.src.11 = getelementptr inbounds i8, ptr %src, i8 11
+ %l.src.11 = load i8, ptr %gep.src.11, align 4
+ %mul.11 = mul nsw i8 %l.src.11, 10
+ %dst.11 = getelementptr i8, ptr %dst, i8 11
+ store i8 %mul.11, ptr %dst.11
+
+ %gep.src.12 = getelementptr inbounds i8, ptr %src, i8 12
+ %l.src.12 = load i8, ptr %gep.src.12, align 4
+ %mul.12 = mul nsw i8 %l.src.12, 10
+ %dst.12 = getelementptr i8, ptr %dst, i8 12
+ store i8 %mul.12, ptr %dst.12
+
+ %gep.src.13 = getelementptr inbounds i8, ptr %src, i8 13
+ %l.src.13 = load i8, ptr %gep.src.13, align 4
+ %mul.13 = mul nsw i8 %l.src.13, 10
+ %dst.13 = getelementptr i8, ptr %dst, i8 13
+ store i8 %mul.13, ptr %dst.13
+
+ %gep.src.14 = getelementptr inbounds i8, ptr %src, i8 14
+ %l.src.14 = load i8, ptr %gep.src.14, align 4
+ %mul.14 = mul nsw i8 %l.src.14, 10
+ %dst.14 = getelementptr i8, ptr %dst, i8 14
+ store i8 %mul.14, ptr %dst.14
+
+ ret void
+}
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/vec3-base.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/vec3-base.ll
new file mode 100644
index 00000000000000..9cf41d9ad1fe60
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/vec3-base.ll
@@ -0,0 +1,514 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -passes=slp-vectorizer -slp-vectorize-non-power-of-2 -mtriple=riscv64 -mattr=+v -S %s | FileCheck --check-prefixes=CHECK,NON-POW2 %s
+; RUN: opt -passes=slp-vectorizer -slp-vectorize-non-power-of-2=false -mtriple=riscv64 -mattr=+v -S %s | FileCheck --check-prefixes=CHECK,POW2-ONLY %s
+
+define void @v3_load_i32_mul_by_constant_store(ptr %src, ptr %dst) {
+; NON-POW2-LABEL: @v3_load_i32_mul_by_constant_store(
+; NON-POW2-NEXT: entry:
+; NON-POW2-NEXT: [[GEP_SRC_0:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i32 0
+; NON-POW2-NEXT: [[TMP0:%.*]] = load <3 x i32>, ptr [[GEP_SRC_0]], align 4
+; NON-POW2-NEXT: [[TMP1:%.*]] = mul nsw <3 x i32> [[TMP0]], <i32 10, i32 10, i32 10>
+; NON-POW2-NEXT: store <3 x i32> [[TMP1]], ptr [[DST:%.*]], align 4
+; NON-POW2-NEXT: ret void
+;
+; POW2-ONLY-LABEL: @v3_load_i32_mul_by_constant_store(
+; POW2-ONLY-NEXT: entry:
+; POW2-ONLY-NEXT: [[GEP_SRC_0:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i32 0
+; POW2-ONLY-NEXT: [[GEP_SRC_2:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 2
+; POW2-ONLY-NEXT: [[L_SRC_2:%.*]] = load i32, ptr [[GEP_SRC_2]], align 4
+; POW2-ONLY-NEXT: [[MUL_2:%.*]] = mul nsw i32 [[L_SRC_2]], 10
+; POW2-ONLY-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[GEP_SRC_0]], align 4
+; POW2-ONLY-NEXT: [[TMP1:%.*]] = mul nsw <2 x i32> [[TMP0]], <i32 10, i32 10>
+; POW2-ONLY-NEXT: store <2 x i32> [[TMP1]], ptr [[DST:%.*]], align 4
+; POW2-ONLY-NEXT: [[DST_2:%.*]] = getelementptr i32, ptr [[DST]], i32 2
+; POW2-ONLY-NEXT: store i32 [[MUL_2]], ptr [[DST_2]], align 4
+; POW2-ONLY-NEXT: ret void
+;
+entry:
+ %gep.src.0 = getelementptr inbounds i32, ptr %src, i32 0
+ %l.src.0 = load i32, ptr %gep.src.0, align 4
+ %mul.0 = mul nsw i32 %l.src.0, 10
+
+ %gep.src.1 = getelementptr inbounds i32, ptr %src, i32 1
+ %l.src.1 = load i32, ptr %gep.src.1, align 4
+ %mul.1 = mul nsw i32 %l.src.1, 10
+
+ %gep.src.2 = getelementptr inbounds i32, ptr %src, i32 2
+ %l.src.2 = load i32, ptr %gep.src.2, align 4
+ %mul.2 = mul nsw i32 %l.src.2, 10
+
+ store i32 %mul.0, ptr %dst
+
+ %dst.1 = getelementptr i32, ptr %dst, i32 1
+ store i32 %mul.1, ptr %dst.1
+
+ %dst.2 = getelementptr i32, ptr %dst, i32 2
+ store i32 %mul.2, ptr %dst.2
+
+ ret void
+}
+
+; Should no be vectorized with a undef/poison element as padding, as
+; division by undef/poison may cause UB. Must use VL predication or
+; masking instead, where RISCV wins.
+define void @v3_load_i32_udiv_by_constant_store(ptr %src, ptr %dst) {
+; NON-POW2-LABEL: @v3_load_i32_udiv_by_constant_store(
+; NON-POW2-NEXT: entry:
+; NON-POW2-NEXT: [[GEP_SRC_0:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i32 0
+; NON-POW2-NEXT: [[TMP0:%.*]] = load <3 x i32>, ptr [[GEP_SRC_0]], align 4
+; NON-POW2-NEXT: [[TMP1:%.*]] = udiv <3 x i32> <i32 10, i32 10, i32 10>, [[TMP0]]
+; NON-POW2-NEXT: store <3 x i32> [[TMP1]], ptr [[DST:%.*]], align 4
+; NON-POW2-NEXT: ret void
+;
+; POW2-ONLY-LABEL: @v3_load_i32_udiv_by_constant_store(
+; POW2-ONLY-NEXT: entry:
+; POW2-ONLY-NEXT: [[GEP_SRC_0:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i32 0
+; POW2-ONLY-NEXT: [[L_SRC_0:%.*]] = load i32, ptr [[GEP_SRC_0]], align 4
+; POW2-ONLY-NEXT: [[MUL_0:%.*]] = udiv i32 10, [[L_SRC_0]]
+; POW2-ONLY-NEXT: [[GEP_SRC_1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 1
+; POW2-ONLY-NEXT: [[L_SRC_1:%.*]] = load i32, ptr [[GEP_SRC_1]], align 4
+; POW2-ONLY-NEXT: [[MUL_1:%.*]] = udiv i32 10, [[L_SRC_1]]
+; POW2-ONLY-NEXT: [[GEP_SRC_2:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 2
+; POW2-ONLY-NEXT: [[L_SRC_2:%.*]] = load i32, ptr [[GEP_SRC_2]], align 4
+; POW2-ONLY-NEXT: [[MUL_2:%.*]] = udiv i32 10, [[L_SRC_2]]
+; POW2-ONLY-NEXT: store i32 [[MUL_0]], ptr [[DST:%.*]], align 4
+; POW2-ONLY-NEXT: [[DST_1:%.*]] = getelementptr i32, ptr [[DST]], i32 1
+; POW2-ONLY-NEXT: store i32 [[MUL_1]], ptr [[DST_1]], align 4
+; POW2-ONLY-NEXT: [[DST_2:%.*]] = getelementptr i32, ptr [[DST]], i32 2
+; POW2-ONLY-NEXT: store i32 [[MUL_2]], ptr [[DST_2]], align 4
+; POW2-ONLY-NEXT: ret void
+;
+entry:
+ %gep.src.0 = getelementptr inbounds i32, ptr %src, i32 0
+ %l.src.0 = load i32, ptr %gep.src.0, align 4
+ %mul.0 = udiv i32 10, %l.src.0
+
+ %gep.src.1 = getelementptr inbounds i32, ptr %src, i32 1
+ %l.src.1 = load i32, ptr %gep.src.1, align 4
+ %mul.1 = udiv i32 10, %l.src.1
+
+ %gep.src.2 = getelementptr inbounds i32, ptr %src, i32 2
+ %l.src.2 = load i32, ptr %gep.src.2, align 4
+ %mul.2 = udiv i32 10, %l.src.2
+
+ store i32 %mul.0, ptr %dst
+
+ %dst.1 = getelementptr i32, ptr %dst, i32 1
+ store i32 %mul.1, ptr %dst.1
+
+ %dst.2 = getelementptr i32, ptr %dst, i32 2
+ store i32 %mul.2, ptr %dst.2
+
+ ret void
+}
+
+
+
+define void @v3_load_i32_mul_store(ptr %src.1, ptr %src.2, ptr %dst) {
+; NON-POW2-LABEL: @v3_load_i32_mul_store(
+; NON-POW2-NEXT: entry:
+; NON-POW2-NEXT: [[GEP_SRC_1_0:%.*]] = getelementptr inbounds i32, ptr [[SRC_1:%.*]], i32 0
+; NON-POW2-NEXT: [[GEP_SRC_2_0:%.*]] = getelementptr inbounds i32, ptr [[SRC_2:%.*]], i32 0
+; NON-POW2-NEXT: [[TMP0:%.*]] = load <3 x i32>, ptr [[GEP_SRC_1_0]], align 4
+; NON-POW2-NEXT: [[TMP1:%.*]] = load <3 x i32>, ptr [[GEP_SRC_2_0]], align 4
+; NON-POW2-NEXT: [[TMP2:%.*]] = mul nsw <3 x i32> [[TMP0]], [[TMP1]]
+; NON-POW2-NEXT: store <3 x i32> [[TMP2]], ptr [[DST:%.*]], align 4
+; NON-POW2-NEXT: ret void
+;
+; POW2-ONLY-LABEL: @v3_load_i32_mul_store(
+; POW2-ONLY-NEXT: entry:
+; POW2-ONLY-NEXT: [[GEP_SRC_1_0:%.*]] = getelementptr inbounds i32, ptr [[SRC_1:%.*]], i32 0
+; POW2-ONLY-NEXT: [[GEP_SRC_2_0:%.*]] = getelementptr inbounds i32, ptr [[SRC_2:%.*]], i32 0
+; POW2-ONLY-NEXT: [[GEP_SRC_1_2:%.*]] = getelementptr inbounds i32, ptr [[SRC_1]], i32 2
+; POW2-ONLY-NEXT: [[L_SRC_1_2:%.*]] = load i32, ptr [[GEP_SRC_1_2]], align 4
+; POW2-ONLY-NEXT: [[GEP_SRC_2_2:%.*]] = getelementptr inbounds i32, ptr [[SRC_2]], i32 2
+; POW2-ONLY-NEXT: [[L_SRC_2_2:%.*]] = load i32, ptr [[GEP_SRC_2_2]], align 4
+; POW2-ONLY-NEXT: [[MUL_2:%.*]] = mul nsw i32 [[L_SRC_1_2]], [[L_SRC_2_2]]
+; POW2-ONLY-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[GEP_SRC_1_0]], align 4
+; POW2-ONLY-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[GEP_SRC_2_0]], align 4
+; POW2-ONLY-NEXT: [[TMP2:%.*]] = mul nsw <2 x i32> [[TMP0]], [[TMP1]]
+; POW2-ONLY-NEXT: store <2 x i32> [[TMP2]], ptr [[DST:%.*]], align 4
+; POW2-ONLY-NEXT: [[DST_2:%.*]] = getelementptr i32, ptr [[DST]], i32 2
+; POW2-ONLY-NEXT: store i32 [[MUL_2]], ptr [[DST_2]], align 4
+; POW2-ONLY-NEXT: ret void
+;
+entry:
+ %gep.src.1.0 = getelementptr inbounds i32, ptr %src.1, i32 0
+ %l.src.1.0 = load i32, ptr %gep.src.1.0, align 4
+ %gep.src.2.0 = getelementptr inbounds i32, ptr %src.2, i32 0
+ %l.src.2.0 = load i32, ptr %gep.src.2.0, align 4
+ %mul.0 = mul nsw i32 %l.src.1.0, %l.src.2.0
+
+ %gep.src.1.1 = getelementptr inbounds i32, ptr %src.1, i32 1
+ %l.src.1.1 = load i32, ptr %gep.src.1.1, align 4
+ %gep.src.2.1 = getelementptr inbounds i32, ptr %src.2, i32 1
+ %l.src.2.1 = load i32, ptr %gep.src.2.1, align 4
+ %mul.1 = mul nsw i32 %l.src.1.1, %l.src.2.1
+
+ %gep.src.1.2 = getelementptr inbounds i32, ptr %src.1, i32 2
+ %l.src.1.2 = load i32, ptr %gep.src.1.2, align 4
+ %gep.src.2.2 = getelementptr inbounds i32, ptr %src.2, i32 2
+ %l.src.2.2 = load i32, ptr %gep.src.2.2, align 4
+ %mul.2 = mul nsw i32 %l.src.1.2, %l.src.2.2
+
+ store i32 %mul.0, ptr %dst
+
+ %dst.1 = getelementptr i32, ptr %dst, i32 1
+ store i32 %mul.1, ptr %dst.1
+
+ %dst.2 = getelementptr i32, ptr %dst, i32 2
+ store i32 %mul.2, ptr %dst.2
+
+ ret void
+}
+
+define void @v3_load_i32_mul_add_const_store(ptr %src.1, ptr %src.2, ptr %dst) {
+; NON-POW2-LABEL: @v3_load_i32_mul_add_const_store(
+; NON-POW2-NEXT: entry:
+; NON-POW2-NEXT: [[GEP_SRC_1_0:%.*]] = getelementptr inbounds i32, ptr [[SRC_1:%.*]], i32 0
+; NON-POW2-NEXT: [[GEP_SRC_2_0:%.*]] = getelementptr inbounds i32, ptr [[SRC_2:%.*]], i32 0
+; NON-POW2-NEXT: [[TMP0:%.*]] = load <3 x i32>, ptr [[GEP_SRC_1_0]], align 4
+; NON-POW2-NEXT: [[TMP1:%.*]] = load <3 x i32>, ptr [[GEP_SRC_2_0]], align 4
+; NON-POW2-NEXT: [[TMP2:%.*]] = mul nsw <3 x i32> [[TMP0]], [[TMP1]]
+; NON-POW2-NEXT: [[TMP3:%.*]] = add <3 x i32> [[TMP2]], <i32 9, i32 9, i32 9>
+; NON-POW2-NEXT: store <3 x i32> [[TMP3]], ptr [[DST:%.*]], align 4
+; NON-POW2-NEXT: ret void
+;
+; POW2-ONLY-LABEL: @v3_load_i32_mul_add_const_store(
+; POW2-ONLY-NEXT: entry:
+; POW2-ONLY-NEXT: [[GEP_SRC_1_0:%.*]] = getelementptr inbounds i32, ptr [[SRC_1:%.*]], i32 0
+; POW2-ONLY-NEXT: [[GEP_SRC_2_0:%.*]] = getelementptr inbounds i32, ptr [[SRC_2:%.*]], i32 0
+; POW2-ONLY-NEXT: [[GEP_SRC_1_2:%.*]] = getelementptr inbounds i32, ptr [[SRC_1]], i32 2
+; POW2-ONLY-NEXT: [[L_SRC_1_2:%.*]] = load i32, ptr [[GEP_SRC_1_2]], align 4
+; POW2-ONLY-NEXT: [[GEP_SRC_2_2:%.*]] = getelementptr inbounds i32, ptr [[SRC_2]], i32 2
+; POW2-ONLY-NEXT: [[L_SRC_2_2:%.*]] = load i32, ptr [[GEP_SRC_2_2]], align 4
+; POW2-ONLY-NEXT: [[MUL_2:%.*]] = mul nsw i32 [[L_SRC_1_2]], [[L_SRC_2_2]]
+; POW2-ONLY-NEXT: [[ADD_2:%.*]] = add i32 [[MUL_2]], 9
+; POW2-ONLY-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[GEP_SRC_1_0]], align 4
+; POW2-ONLY-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[GEP_SRC_2_0]], align 4
+; POW2-ONLY-NEXT: [[TMP2:%.*]] = mul nsw <2 x i32> [[TMP0]], [[TMP1]]
+; POW2-ONLY-NEXT: [[TMP3:%.*]] = add <2 x i32> [[TMP2]], <i32 9, i32 9>
+; POW2-ONLY-NEXT: store <2 x i32> [[TMP3]], ptr [[DST:%.*]], align 4
+; POW2-ONLY-NEXT: [[DST_2:%.*]] = getelementptr i32, ptr [[DST]], i32 2
+; POW2-ONLY-NEXT: store i32 [[ADD_2]], ptr [[DST_2]], align 4
+; POW2-ONLY-NEXT: ret void
+;
+entry:
+ %gep.src.1.0 = getelementptr inbounds i32, ptr %src.1, i32 0
+ %l.src.1.0 = load i32, ptr %gep.src.1.0, align 4
+ %gep.src.2.0 = getelementptr inbounds i32, ptr %src.2, i32 0
+ %l.src.2.0 = load i32, ptr %gep.src.2.0, align 4
+ %mul.0 = mul nsw i32 %l.src.1.0, %l.src.2.0
+ %add.0 = add i32 %mul.0, 9
+
+ %gep.src.1.1 = getelementptr inbounds i32, ptr %src.1, i32 1
+ %l.src.1.1 = load i32, ptr %gep.src.1.1, align 4
+ %gep.src.2.1 = getelementptr inbounds i32, ptr %src.2, i32 1
+ %l.src.2.1 = load i32, ptr %gep.src.2.1, align 4
+ %mul.1 = mul nsw i32 %l.src.1.1, %l.src.2.1
+ %add.1 = add i32 %mul.1, 9
+
+ %gep.src.1.2 = getelementptr inbounds i32, ptr %src.1, i32 2
+ %l.src.1.2 = load i32, ptr %gep.src.1.2, align 4
+ %gep.src.2.2 = getelementptr inbounds i32, ptr %src.2, i32 2
+ %l.src.2.2 = load i32, ptr %gep.src.2.2, align 4
+ %mul.2 = mul nsw i32 %l.src.1.2, %l.src.2.2
+ %add.2 = add i32 %mul.2, 9
+
+ store i32 %add.0, ptr %dst
+
+ %dst.1 = getelementptr i32, ptr %dst, i32 1
+ store i32 %add.1, ptr %dst.1
+
+ %dst.2 = getelementptr i32, ptr %dst, i32 2
+ store i32 %add.2, ptr %dst.2
+
+ ret void
+}
+
+define void @v3_load_f32_fadd_fadd_by_constant_store(ptr %src, ptr %dst) {
+; NON-POW2-LABEL: @v3_load_f32_fadd_fadd_by_constant_store(
+; NON-POW2-NEXT: entry:
+; NON-POW2-NEXT: [[GEP_SRC_0:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i32 0
+; NON-POW2-NEXT: [[TMP0:%.*]] = load <3 x float>, ptr [[GEP_SRC_0]], align 4
+; NON-POW2-NEXT: [[TMP1:%.*]] = fadd <3 x float> [[TMP0]], <float 1.000000e+01, float 1.000000e+01, float 1.000000e+01>
+; NON-POW2-NEXT: store <3 x float> [[TMP1]], ptr [[DST:%.*]], align 4
+; NON-POW2-NEXT: ret void
+;
+; POW2-ONLY-LABEL: @v3_load_f32_fadd_fadd_by_constant_store(
+; POW2-ONLY-NEXT: entry:
+; POW2-ONLY-NEXT: [[GEP_SRC_0:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i32 0
+; POW2-ONLY-NEXT: [[GEP_SRC_2:%.*]] = getelementptr inbounds float, ptr [[SRC]], i32 2
+; POW2-ONLY-NEXT: [[L_SRC_2:%.*]] = load float, ptr [[GEP_SRC_2]], align 4
+; POW2-ONLY-NEXT: [[FADD_2:%.*]] = fadd float [[L_SRC_2]], 1.000000e+01
+; POW2-ONLY-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr [[GEP_SRC_0]], align 4
+; POW2-ONLY-NEXT: [[TMP1:%.*]] = fadd <2 x float> [[TMP0]], <float 1.000000e+01, float 1.000000e+01>
+; POW2-ONLY-NEXT: store <2 x float> [[TMP1]], ptr [[DST:%.*]], align 4
+; POW2-ONLY-NEXT: [[DST_2:%.*]] = getelementptr float, ptr [[DST]], i32 2
+; POW2-ONLY-NEXT: store float [[FADD_2]], ptr [[DST_2]], align 4
+; POW2-ONLY-NEXT: ret void
+;
+entry:
+ %gep.src.0 = getelementptr inbounds float, ptr %src, i32 0
+ %l.src.0 = load float , ptr %gep.src.0, align 4
+ %fadd.0 = fadd float %l.src.0, 10.0
+
+ %gep.src.1 = getelementptr inbounds float , ptr %src, i32 1
+ %l.src.1 = load float, ptr %gep.src.1, align 4
+ %fadd.1 = fadd float %l.src.1, 10.0
+
+ %gep.src.2 = getelementptr inbounds float, ptr %src, i32 2
+ %l.src.2 = load float, ptr %gep.src.2, align 4
+ %fadd.2 = fadd float %l.src.2, 10.0
+
+ store float %fadd.0, ptr %dst
+
+ %dst.1 = getelementptr float, ptr %dst, i32 1
+ store float %fadd.1, ptr %dst.1
+
+ %dst.2 = getelementptr float, ptr %dst, i32 2
+ store float %fadd.2, ptr %dst.2
+
+ ret void
+}
+
+define void @phi_store3(ptr %dst) {
+; NON-POW2-LABEL: @phi_store3(
+; NON-POW2-NEXT: entry:
+; NON-POW2-NEXT: br label [[EXIT:%.*]]
+; NON-POW2: invoke.cont8.loopexit:
+; NON-POW2-NEXT: br label [[EXIT]]
+; NON-POW2: exit:
+; NON-POW2-NEXT: [[TMP0:%.*]] = phi <3 x i32> [ <i32 1, i32 2, i32 3>, [[ENTRY:%.*]] ], [ poison, [[INVOKE_CONT8_LOOPEXIT:%.*]] ]
+; NON-POW2-NEXT: store <3 x i32> [[TMP0]], ptr [[DST:%.*]], align 4
+; NON-POW2-NEXT: ret void
+;
+; POW2-ONLY-LABEL: @phi_store3(
+; POW2-ONLY-NEXT: entry:
+; POW2-ONLY-NEXT: br label [[EXIT:%.*]]
+; POW2-ONLY: invoke.cont8.loopexit:
+; POW2-ONLY-NEXT: br label [[EXIT]]
+; POW2-ONLY: exit:
+; POW2-ONLY-NEXT: [[P_2:%.*]] = phi i32 [ 3, [[ENTRY:%.*]] ], [ 0, [[INVOKE_CONT8_LOOPEXIT:%.*]] ]
+; POW2-ONLY-NEXT: [[TMP0:%.*]] = phi <2 x i32> [ <i32 1, i32 2>, [[ENTRY]] ], [ poison, [[INVOKE_CONT8_LOOPEXIT]] ]
+; POW2-ONLY-NEXT: [[DST_2:%.*]] = getelementptr i32, ptr [[DST:%.*]], i32 2
+; POW2-ONLY-NEXT: store <2 x i32> [[TMP0]], ptr [[DST]], align 4
+; POW2-ONLY-NEXT: store i32 [[P_2]], ptr [[DST_2]], align 4
+; POW2-ONLY-NEXT: ret void
+;
+entry:
+ br label %exit
+
+invoke.cont8.loopexit: ; No predecessors!
+ br label %exit
+
+exit:
+ %p.0 = phi i32 [ 1, %entry ], [ 0, %invoke.cont8.loopexit ]
+ %p.1 = phi i32 [ 2, %entry ], [ 0, %invoke.cont8.loopexit ]
+ %p.2 = phi i32 [ 3, %entry ], [ 0, %invoke.cont8.loopexit ]
+
+ %dst.1 = getelementptr i32, ptr %dst, i32 1
+ %dst.2 = getelementptr i32, ptr %dst, i32 2
+
+ store i32 %p.0, ptr %dst, align 4
+ store i32 %p.1, ptr %dst.1, align 4
+ store i32 %p.2, ptr %dst.2, align 4
+ ret void
+}
+
+define void @store_try_reorder(ptr %dst) {
+; NON-POW2-LABEL: @store_try_reorder(
+; NON-POW2-NEXT: entry:
+; NON-POW2-NEXT: store <3 x i32> zeroinitializer, ptr [[DST:%.*]], align 4
+; NON-POW2-NEXT: ret void
+;
+; POW2-ONLY-LABEL: @store_try_reorder(
+; POW2-ONLY-NEXT: entry:
+; POW2-ONLY-NEXT: [[ADD:%.*]] = add i32 0, 0
+; POW2-ONLY-NEXT: store i32 [[ADD]], ptr [[DST:%.*]], align 4
+; POW2-ONLY-NEXT: [[ARRAYIDX_I1887:%.*]] = getelementptr i32, ptr [[DST]], i64 1
+; POW2-ONLY-NEXT: store <2 x i32> zeroinitializer, ptr [[ARRAYIDX_I1887]], align 4
+; POW2-ONLY-NEXT: ret void
+;
+entry:
+ %add = add i32 0, 0
+ store i32 %add, ptr %dst, align 4
+ %add207 = sub i32 0, 0
+ %arrayidx.i1887 = getelementptr i32, ptr %dst, i64 1
+ store i32 %add207, ptr %arrayidx.i1887, align 4
+ %add216 = sub i32 0, 0
+ %arrayidx.i1891 = getelementptr i32, ptr %dst, i64 2
+ store i32 %add216, ptr %arrayidx.i1891, align 4
+ ret void
+}
+
+define void @vec3_fpext_cost(ptr %Colour, float %0) {
+; NON-POW2-LABEL: @vec3_fpext_cost(
+; NON-POW2-NEXT: entry:
+; NON-POW2-NEXT: [[TMP1:%.*]] = insertelement <3 x float> poison, float [[TMP0:%.*]], i32 0
+; NON-POW2-NEXT: [[TMP2:%.*]] = shufflevector <3 x float> [[TMP1]], <3 x float> poison, <3 x i32> zeroinitializer
+; NON-POW2-NEXT: [[TMP3:%.*]] = fpext <3 x float> [[TMP2]] to <3 x double>
+; NON-POW2-NEXT: [[TMP4:%.*]] = call <3 x double> @llvm.fmuladd.v3f64(<3 x double> [[TMP3]], <3 x double> zeroinitializer, <3 x double> zeroinitializer)
+; NON-POW2-NEXT: [[TMP5:%.*]] = fptrunc <3 x double> [[TMP4]] to <3 x float>
+; NON-POW2-NEXT: store <3 x float> [[TMP5]], ptr [[COLOUR:%.*]], align 4
+; NON-POW2-NEXT: ret void
+;
+; POW2-ONLY-LABEL: @vec3_fpext_cost(
+; POW2-ONLY-NEXT: entry:
+; POW2-ONLY-NEXT: [[ARRAYIDX80:%.*]] = getelementptr float, ptr [[COLOUR:%.*]], i64 2
+; POW2-ONLY-NEXT: [[TMP1:%.*]] = insertelement <2 x float> poison, float [[TMP0:%.*]], i32 0
+; POW2-ONLY-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> poison, <2 x i32> zeroinitializer
+; POW2-ONLY-NEXT: [[TMP3:%.*]] = fpext <2 x float> [[TMP2]] to <2 x double>
+; POW2-ONLY-NEXT: [[TMP4:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[TMP3]], <2 x double> zeroinitializer, <2 x double> zeroinitializer)
+; POW2-ONLY-NEXT: [[TMP5:%.*]] = fptrunc <2 x double> [[TMP4]] to <2 x float>
+; POW2-ONLY-NEXT: store <2 x float> [[TMP5]], ptr [[COLOUR]], align 4
+; POW2-ONLY-NEXT: [[CONV78:%.*]] = fpext float [[TMP0]] to double
+; POW2-ONLY-NEXT: [[TMP6:%.*]] = call double @llvm.fmuladd.f64(double [[CONV78]], double 0.000000e+00, double 0.000000e+00)
+; POW2-ONLY-NEXT: [[CONV82:%.*]] = fptrunc double [[TMP6]] to float
+; POW2-ONLY-NEXT: store float [[CONV82]], ptr [[ARRAYIDX80]], align 4
+; POW2-ONLY-NEXT: ret void
+;
+entry:
+ %arrayidx72 = getelementptr float, ptr %Colour, i64 1
+ %arrayidx80 = getelementptr float, ptr %Colour, i64 2
+ %conv62 = fpext float %0 to double
+ %1 = call double @llvm.fmuladd.f64(double %conv62, double 0.000000e+00, double 0.000000e+00)
+ %conv66 = fptrunc double %1 to float
+ store float %conv66, ptr %Colour, align 4
+ %conv70 = fpext float %0 to double
+ %2 = call double @llvm.fmuladd.f64(double %conv70, double 0.000000e+00, double 0.000000e+00)
+ %conv74 = fptrunc double %2 to float
+ store float %conv74, ptr %arrayidx72, align 4
+ %conv78 = fpext float %0 to double
+ %3 = call double @llvm.fmuladd.f64(double %conv78, double 0.000000e+00, double 0.000000e+00)
+ %conv82 = fptrunc double %3 to float
+ store float %conv82, ptr %arrayidx80, align 4
+ ret void
+}
+
+define void @fpext_scatter(ptr %dst, double %conv) {
+; CHECK-LABEL: @fpext_scatter(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CONV25:%.*]] = fptrunc double [[CONV:%.*]] to float
+; CHECK-NEXT: [[LENGTHS:%.*]] = getelementptr float, ptr [[DST:%.*]], i64 0
+; CHECK-NEXT: store float [[CONV25]], ptr [[LENGTHS]], align 4
+; CHECK-NEXT: [[ARRAYIDX32:%.*]] = getelementptr float, ptr [[DST]], i64 1
+; CHECK-NEXT: store float [[CONV25]], ptr [[ARRAYIDX32]], align 4
+; CHECK-NEXT: [[ARRAYIDX37:%.*]] = getelementptr float, ptr [[DST]], i64 2
+; CHECK-NEXT: store float [[CONV25]], ptr [[ARRAYIDX37]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %conv25 = fptrunc double %conv to float
+ %Lengths = getelementptr float, ptr %dst, i64 0
+ store float %conv25, ptr %Lengths, align 4
+ %arrayidx32 = getelementptr float, ptr %dst, i64 1
+ store float %conv25, ptr %arrayidx32, align 4
+ %arrayidx37 = getelementptr float, ptr %dst, i64 2
+ store float %conv25, ptr %arrayidx37, align 4
+ ret void
+}
+
+define i32 @reduce_add(ptr %src) {
+; CHECK-LABEL: @reduce_add(
+; CHECK-NEXT: [[GEP_SRC_0:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i32 0
+; CHECK-NEXT: [[L_SRC_0:%.*]] = load i32, ptr [[GEP_SRC_0]], align 4
+; CHECK-NEXT: [[GEP_SRC_1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 1
+; CHECK-NEXT: [[L_SRC_1:%.*]] = load i32, ptr [[GEP_SRC_1]], align 4
+; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 2
+; CHECK-NEXT: [[L_SRC_2:%.*]] = load i32, ptr [[GEP_SRC_2]], align 4
+; CHECK-NEXT: [[ADD_0:%.*]] = add i32 [[L_SRC_0]], [[L_SRC_1]]
+; CHECK-NEXT: [[ADD_1:%.*]] = add i32 [[ADD_0]], [[L_SRC_2]]
+; CHECK-NEXT: ret i32 [[ADD_1]]
+;
+ %gep.src.0 = getelementptr inbounds i32, ptr %src, i32 0
+ %l.src.0 = load i32, ptr %gep.src.0, align 4
+ %gep.src.1 = getelementptr inbounds i32, ptr %src, i32 1
+ %l.src.1 = load i32, ptr %gep.src.1, align 4
+ %gep.src.2 = getelementptr inbounds i32, ptr %src, i32 2
+ %l.src.2 = load i32, ptr %gep.src.2, align 4
+
+ %add.0 = add i32 %l.src.0, %l.src.1
+ %add.1 = add i32 %add.0, %l.src.2
+ ret i32 %add.1
+}
+
+
+define i32 @reduce_add_after_mul(ptr %src) {
+; CHECK-LABEL: @reduce_add_after_mul(
+; CHECK-NEXT: [[GEP_SRC_0:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i32 0
+; CHECK-NEXT: [[L_SRC_0:%.*]] = load i32, ptr [[GEP_SRC_0]], align 4
+; CHECK-NEXT: [[GEP_SRC_1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 1
+; CHECK-NEXT: [[L_SRC_1:%.*]] = load i32, ptr [[GEP_SRC_1]], align 4
+; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 2
+; CHECK-NEXT: [[L_SRC_2:%.*]] = load i32, ptr [[GEP_SRC_2]], align 4
+; CHECK-NEXT: [[MUL_0:%.*]] = mul nsw i32 [[L_SRC_0]], 10
+; CHECK-NEXT: [[MUL_1:%.*]] = mul nsw i32 [[L_SRC_1]], 10
+; CHECK-NEXT: [[MUL_2:%.*]] = mul nsw i32 [[L_SRC_2]], 10
+; CHECK-NEXT: [[ADD_0:%.*]] = add i32 [[MUL_0]], [[MUL_1]]
+; CHECK-NEXT: [[ADD_1:%.*]] = add i32 [[ADD_0]], [[MUL_2]]
+; CHECK-NEXT: ret i32 [[ADD_1]]
+;
+ %gep.src.0 = getelementptr inbounds i32, ptr %src, i32 0
+ %l.src.0 = load i32, ptr %gep.src.0, align 4
+ %gep.src.1 = getelementptr inbounds i32, ptr %src, i32 1
+ %l.src.1 = load i32, ptr %gep.src.1, align 4
+ %gep.src.2 = getelementptr inbounds i32, ptr %src, i32 2
+ %l.src.2 = load i32, ptr %gep.src.2, align 4
+
+ %mul.0 = mul nsw i32 %l.src.0, 10
+ %mul.1 = mul nsw i32 %l.src.1, 10
+ %mul.2 = mul nsw i32 %l.src.2, 10
+
+ %add.0 = add i32 %mul.0, %mul.1
+ %add.1 = add i32 %add.0, %mul.2
+ ret i32 %add.1
+}
+
+define i32 @dot_product(ptr %a, ptr %b) {
+; CHECK-LABEL: @dot_product(
+; CHECK-NEXT: [[GEP_A_0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 0
+; CHECK-NEXT: [[L_A_0:%.*]] = load i32, ptr [[GEP_A_0]], align 4
+; CHECK-NEXT: [[GEP_A_1:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 1
+; CHECK-NEXT: [[L_A_1:%.*]] = load i32, ptr [[GEP_A_1]], align 4
+; CHECK-NEXT: [[GEP_A_2:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 2
+; CHECK-NEXT: [[L_A_2:%.*]] = load i32, ptr [[GEP_A_2]], align 4
+; CHECK-NEXT: [[GEP_B_0:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i32 0
+; CHECK-NEXT: [[L_B_0:%.*]] = load i32, ptr [[GEP_B_0]], align 4
+; CHECK-NEXT: [[GEP_B_1:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 1
+; CHECK-NEXT: [[L_B_1:%.*]] = load i32, ptr [[GEP_B_1]], align 4
+; CHECK-NEXT: [[GEP_B_2:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 2
+; CHECK-NEXT: [[L_B_2:%.*]] = load i32, ptr [[GEP_B_2]], align 4
+; CHECK-NEXT: [[MUL_0:%.*]] = mul nsw i32 [[L_A_0]], [[L_B_0]]
+; CHECK-NEXT: [[MUL_1:%.*]] = mul nsw i32 [[L_A_1]], [[L_B_1]]
+; CHECK-NEXT: [[MUL_2:%.*]] = mul nsw i32 [[L_A_2]], [[L_B_2]]
+; CHECK-NEXT: [[ADD_0:%.*]] = add i32 [[MUL_0]], [[MUL_1]]
+; CHECK-NEXT: [[ADD_1:%.*]] = add i32 [[ADD_0]], [[MUL_2]]
+; CHECK-NEXT: ret i32 [[ADD_1]]
+;
+ %gep.a.0 = getelementptr inbounds i32, ptr %a, i32 0
+ %l.a.0 = load i32, ptr %gep.a.0, align 4
+ %gep.a.1 = getelementptr inbounds i32, ptr %a, i32 1
+ %l.a.1 = load i32, ptr %gep.a.1, align 4
+ %gep.a.2 = getelementptr inbounds i32, ptr %a, i32 2
+ %l.a.2 = load i32, ptr %gep.a.2, align 4
+
+ %gep.b.0 = getelementptr inbounds i32, ptr %b, i32 0
+ %l.b.0 = load i32, ptr %gep.b.0, align 4
+ %gep.b.1 = getelementptr inbounds i32, ptr %b, i32 1
+ %l.b.1 = load i32, ptr %gep.b.1, align 4
+ %gep.b.2 = getelementptr inbounds i32, ptr %b, i32 2
+ %l.b.2 = load i32, ptr %gep.b.2, align 4
+
+ %mul.0 = mul nsw i32 %l.a.0, %l.b.0
+ %mul.1 = mul nsw i32 %l.a.1, %l.b.1
+ %mul.2 = mul nsw i32 %l.a.2, %l.b.2
+
+ %add.0 = add i32 %mul.0, %mul.1
+ %add.1 = add i32 %add.0, %mul.2
+ ret i32 %add.1
+}
+
+declare float @llvm.fmuladd.f32(float, float, float)
+
+declare double @llvm.fmuladd.f64(double, double, double)
More information about the llvm-commits
mailing list