[llvm] 8bef17e - [AArch64][SLP] Add a test with mutual reductions. NFC

David Green via llvm-commits llvm-commits at lists.llvm.org
Wed Mar 9 13:47:01 PST 2022


Author: David Green
Date: 2022-03-09T21:46:57Z
New Revision: 8bef17ed59aafc1b9e645dd956dad124ec7dfb57

URL: https://github.com/llvm/llvm-project/commit/8bef17ed59aafc1b9e645dd956dad124ec7dfb57
DIFF: https://github.com/llvm/llvm-project/commit/8bef17ed59aafc1b9e645dd956dad124ec7dfb57.diff

LOG: [AArch64][SLP] Add a test with mutual reductions. NFC

Added: 
    llvm/test/Transforms/SLPVectorizer/AArch64/multiple_reduction.ll

Modified: 
    

Removed: 
    


################################################################################
diff  --git a/llvm/test/Transforms/SLPVectorizer/AArch64/multiple_reduction.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/multiple_reduction.ll
new file mode 100644
index 0000000000000..d868e876e611f
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/multiple_reduction.ll
@@ -0,0 +1,1026 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -basic-aa -slp-vectorizer -S | FileCheck %s
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-arm-none-eabi"
+
+; This test has mutual reductions, referencing the same data:
+; for i = ...
+;   sm += x[i];
+;   sq += x[i] * x[i];
+; It currently doesn't SLP vectorize, but should.
+
+define i64 @straight(i16* nocapture noundef readonly %p, i32 noundef %st) {
+; CHECK-LABEL: @straight(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[IDX_EXT:%.*]] = sext i32 [[ST:%.*]] to i64
+; CHECK-NEXT:    [[TMP0:%.*]] = load i16, i16* [[P:%.*]], align 2
+; CHECK-NEXT:    [[CONV:%.*]] = zext i16 [[TMP0]] to i32
+; CHECK-NEXT:    [[MUL:%.*]] = mul nuw nsw i32 [[CONV]], [[CONV]]
+; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 1
+; CHECK-NEXT:    [[TMP1:%.*]] = load i16, i16* [[ARRAYIDX_1]], align 2
+; CHECK-NEXT:    [[CONV_1:%.*]] = zext i16 [[TMP1]] to i32
+; CHECK-NEXT:    [[ADD_1:%.*]] = add nuw nsw i32 [[CONV]], [[CONV_1]]
+; CHECK-NEXT:    [[MUL_1:%.*]] = mul nuw nsw i32 [[CONV_1]], [[CONV_1]]
+; CHECK-NEXT:    [[ADD11_1:%.*]] = add nuw i32 [[MUL_1]], [[MUL]]
+; CHECK-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 2
+; CHECK-NEXT:    [[TMP2:%.*]] = load i16, i16* [[ARRAYIDX_2]], align 2
+; CHECK-NEXT:    [[CONV_2:%.*]] = zext i16 [[TMP2]] to i32
+; CHECK-NEXT:    [[ADD_2:%.*]] = add nuw nsw i32 [[ADD_1]], [[CONV_2]]
+; CHECK-NEXT:    [[MUL_2:%.*]] = mul nuw nsw i32 [[CONV_2]], [[CONV_2]]
+; CHECK-NEXT:    [[ADD11_2:%.*]] = add i32 [[MUL_2]], [[ADD11_1]]
+; CHECK-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 3
+; CHECK-NEXT:    [[TMP3:%.*]] = load i16, i16* [[ARRAYIDX_3]], align 2
+; CHECK-NEXT:    [[CONV_3:%.*]] = zext i16 [[TMP3]] to i32
+; CHECK-NEXT:    [[ADD_3:%.*]] = add nuw nsw i32 [[ADD_2]], [[CONV_3]]
+; CHECK-NEXT:    [[MUL_3:%.*]] = mul nuw nsw i32 [[CONV_3]], [[CONV_3]]
+; CHECK-NEXT:    [[ADD11_3:%.*]] = add i32 [[MUL_3]], [[ADD11_2]]
+; CHECK-NEXT:    [[ARRAYIDX_4:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 4
+; CHECK-NEXT:    [[TMP4:%.*]] = load i16, i16* [[ARRAYIDX_4]], align 2
+; CHECK-NEXT:    [[CONV_4:%.*]] = zext i16 [[TMP4]] to i32
+; CHECK-NEXT:    [[ADD_4:%.*]] = add nuw nsw i32 [[ADD_3]], [[CONV_4]]
+; CHECK-NEXT:    [[MUL_4:%.*]] = mul nuw nsw i32 [[CONV_4]], [[CONV_4]]
+; CHECK-NEXT:    [[ADD11_4:%.*]] = add i32 [[MUL_4]], [[ADD11_3]]
+; CHECK-NEXT:    [[ARRAYIDX_5:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 5
+; CHECK-NEXT:    [[TMP5:%.*]] = load i16, i16* [[ARRAYIDX_5]], align 2
+; CHECK-NEXT:    [[CONV_5:%.*]] = zext i16 [[TMP5]] to i32
+; CHECK-NEXT:    [[ADD_5:%.*]] = add nuw nsw i32 [[ADD_4]], [[CONV_5]]
+; CHECK-NEXT:    [[MUL_5:%.*]] = mul nuw nsw i32 [[CONV_5]], [[CONV_5]]
+; CHECK-NEXT:    [[ADD11_5:%.*]] = add i32 [[MUL_5]], [[ADD11_4]]
+; CHECK-NEXT:    [[ARRAYIDX_6:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 6
+; CHECK-NEXT:    [[TMP6:%.*]] = load i16, i16* [[ARRAYIDX_6]], align 2
+; CHECK-NEXT:    [[CONV_6:%.*]] = zext i16 [[TMP6]] to i32
+; CHECK-NEXT:    [[ADD_6:%.*]] = add nuw nsw i32 [[ADD_5]], [[CONV_6]]
+; CHECK-NEXT:    [[MUL_6:%.*]] = mul nuw nsw i32 [[CONV_6]], [[CONV_6]]
+; CHECK-NEXT:    [[ADD11_6:%.*]] = add i32 [[MUL_6]], [[ADD11_5]]
+; CHECK-NEXT:    [[ARRAYIDX_7:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 7
+; CHECK-NEXT:    [[TMP7:%.*]] = load i16, i16* [[ARRAYIDX_7]], align 2
+; CHECK-NEXT:    [[CONV_7:%.*]] = zext i16 [[TMP7]] to i32
+; CHECK-NEXT:    [[ADD_7:%.*]] = add nuw nsw i32 [[ADD_6]], [[CONV_7]]
+; CHECK-NEXT:    [[MUL_7:%.*]] = mul nuw nsw i32 [[CONV_7]], [[CONV_7]]
+; CHECK-NEXT:    [[ADD11_7:%.*]] = add i32 [[MUL_7]], [[ADD11_6]]
+; CHECK-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IDX_EXT]]
+; CHECK-NEXT:    [[TMP8:%.*]] = load i16, i16* [[ADD_PTR]], align 2
+; CHECK-NEXT:    [[CONV_140:%.*]] = zext i16 [[TMP8]] to i32
+; CHECK-NEXT:    [[ADD_141:%.*]] = add nuw nsw i32 [[ADD_7]], [[CONV_140]]
+; CHECK-NEXT:    [[MUL_142:%.*]] = mul nuw nsw i32 [[CONV_140]], [[CONV_140]]
+; CHECK-NEXT:    [[ADD11_143:%.*]] = add i32 [[MUL_142]], [[ADD11_7]]
+; CHECK-NEXT:    [[ARRAYIDX_1_1:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR]], i64 1
+; CHECK-NEXT:    [[TMP9:%.*]] = load i16, i16* [[ARRAYIDX_1_1]], align 2
+; CHECK-NEXT:    [[CONV_1_1:%.*]] = zext i16 [[TMP9]] to i32
+; CHECK-NEXT:    [[ADD_1_1:%.*]] = add nuw nsw i32 [[ADD_141]], [[CONV_1_1]]
+; CHECK-NEXT:    [[MUL_1_1:%.*]] = mul nuw nsw i32 [[CONV_1_1]], [[CONV_1_1]]
+; CHECK-NEXT:    [[ADD11_1_1:%.*]] = add i32 [[MUL_1_1]], [[ADD11_143]]
+; CHECK-NEXT:    [[ARRAYIDX_2_1:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR]], i64 2
+; CHECK-NEXT:    [[TMP10:%.*]] = load i16, i16* [[ARRAYIDX_2_1]], align 2
+; CHECK-NEXT:    [[CONV_2_1:%.*]] = zext i16 [[TMP10]] to i32
+; CHECK-NEXT:    [[ADD_2_1:%.*]] = add nuw nsw i32 [[ADD_1_1]], [[CONV_2_1]]
+; CHECK-NEXT:    [[MUL_2_1:%.*]] = mul nuw nsw i32 [[CONV_2_1]], [[CONV_2_1]]
+; CHECK-NEXT:    [[ADD11_2_1:%.*]] = add i32 [[MUL_2_1]], [[ADD11_1_1]]
+; CHECK-NEXT:    [[ARRAYIDX_3_1:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR]], i64 3
+; CHECK-NEXT:    [[TMP11:%.*]] = load i16, i16* [[ARRAYIDX_3_1]], align 2
+; CHECK-NEXT:    [[CONV_3_1:%.*]] = zext i16 [[TMP11]] to i32
+; CHECK-NEXT:    [[ADD_3_1:%.*]] = add nuw nsw i32 [[ADD_2_1]], [[CONV_3_1]]
+; CHECK-NEXT:    [[MUL_3_1:%.*]] = mul nuw nsw i32 [[CONV_3_1]], [[CONV_3_1]]
+; CHECK-NEXT:    [[ADD11_3_1:%.*]] = add i32 [[MUL_3_1]], [[ADD11_2_1]]
+; CHECK-NEXT:    [[ARRAYIDX_4_1:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR]], i64 4
+; CHECK-NEXT:    [[TMP12:%.*]] = load i16, i16* [[ARRAYIDX_4_1]], align 2
+; CHECK-NEXT:    [[CONV_4_1:%.*]] = zext i16 [[TMP12]] to i32
+; CHECK-NEXT:    [[ADD_4_1:%.*]] = add nuw nsw i32 [[ADD_3_1]], [[CONV_4_1]]
+; CHECK-NEXT:    [[MUL_4_1:%.*]] = mul nuw nsw i32 [[CONV_4_1]], [[CONV_4_1]]
+; CHECK-NEXT:    [[ADD11_4_1:%.*]] = add i32 [[MUL_4_1]], [[ADD11_3_1]]
+; CHECK-NEXT:    [[ARRAYIDX_5_1:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR]], i64 5
+; CHECK-NEXT:    [[TMP13:%.*]] = load i16, i16* [[ARRAYIDX_5_1]], align 2
+; CHECK-NEXT:    [[CONV_5_1:%.*]] = zext i16 [[TMP13]] to i32
+; CHECK-NEXT:    [[ADD_5_1:%.*]] = add nuw nsw i32 [[ADD_4_1]], [[CONV_5_1]]
+; CHECK-NEXT:    [[MUL_5_1:%.*]] = mul nuw nsw i32 [[CONV_5_1]], [[CONV_5_1]]
+; CHECK-NEXT:    [[ADD11_5_1:%.*]] = add i32 [[MUL_5_1]], [[ADD11_4_1]]
+; CHECK-NEXT:    [[ARRAYIDX_6_1:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR]], i64 6
+; CHECK-NEXT:    [[TMP14:%.*]] = load i16, i16* [[ARRAYIDX_6_1]], align 2
+; CHECK-NEXT:    [[CONV_6_1:%.*]] = zext i16 [[TMP14]] to i32
+; CHECK-NEXT:    [[ADD_6_1:%.*]] = add nuw nsw i32 [[ADD_5_1]], [[CONV_6_1]]
+; CHECK-NEXT:    [[MUL_6_1:%.*]] = mul nuw nsw i32 [[CONV_6_1]], [[CONV_6_1]]
+; CHECK-NEXT:    [[ADD11_6_1:%.*]] = add i32 [[MUL_6_1]], [[ADD11_5_1]]
+; CHECK-NEXT:    [[ARRAYIDX_7_1:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR]], i64 7
+; CHECK-NEXT:    [[TMP15:%.*]] = load i16, i16* [[ARRAYIDX_7_1]], align 2
+; CHECK-NEXT:    [[CONV_7_1:%.*]] = zext i16 [[TMP15]] to i32
+; CHECK-NEXT:    [[ADD_7_1:%.*]] = add nuw nsw i32 [[ADD_6_1]], [[CONV_7_1]]
+; CHECK-NEXT:    [[MUL_7_1:%.*]] = mul nuw nsw i32 [[CONV_7_1]], [[CONV_7_1]]
+; CHECK-NEXT:    [[ADD11_7_1:%.*]] = add i32 [[MUL_7_1]], [[ADD11_6_1]]
+; CHECK-NEXT:    [[ADD_PTR_1:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR]], i64 [[IDX_EXT]]
+; CHECK-NEXT:    [[TMP16:%.*]] = load i16, i16* [[ADD_PTR_1]], align 2
+; CHECK-NEXT:    [[CONV_244:%.*]] = zext i16 [[TMP16]] to i32
+; CHECK-NEXT:    [[ADD_245:%.*]] = add nuw nsw i32 [[ADD_7_1]], [[CONV_244]]
+; CHECK-NEXT:    [[MUL_246:%.*]] = mul nuw nsw i32 [[CONV_244]], [[CONV_244]]
+; CHECK-NEXT:    [[ADD11_247:%.*]] = add i32 [[MUL_246]], [[ADD11_7_1]]
+; CHECK-NEXT:    [[ARRAYIDX_1_2:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_1]], i64 1
+; CHECK-NEXT:    [[TMP17:%.*]] = load i16, i16* [[ARRAYIDX_1_2]], align 2
+; CHECK-NEXT:    [[CONV_1_2:%.*]] = zext i16 [[TMP17]] to i32
+; CHECK-NEXT:    [[ADD_1_2:%.*]] = add nuw nsw i32 [[ADD_245]], [[CONV_1_2]]
+; CHECK-NEXT:    [[MUL_1_2:%.*]] = mul nuw nsw i32 [[CONV_1_2]], [[CONV_1_2]]
+; CHECK-NEXT:    [[ADD11_1_2:%.*]] = add i32 [[MUL_1_2]], [[ADD11_247]]
+; CHECK-NEXT:    [[ARRAYIDX_2_2:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_1]], i64 2
+; CHECK-NEXT:    [[TMP18:%.*]] = load i16, i16* [[ARRAYIDX_2_2]], align 2
+; CHECK-NEXT:    [[CONV_2_2:%.*]] = zext i16 [[TMP18]] to i32
+; CHECK-NEXT:    [[ADD_2_2:%.*]] = add nuw nsw i32 [[ADD_1_2]], [[CONV_2_2]]
+; CHECK-NEXT:    [[MUL_2_2:%.*]] = mul nuw nsw i32 [[CONV_2_2]], [[CONV_2_2]]
+; CHECK-NEXT:    [[ADD11_2_2:%.*]] = add i32 [[MUL_2_2]], [[ADD11_1_2]]
+; CHECK-NEXT:    [[ARRAYIDX_3_2:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_1]], i64 3
+; CHECK-NEXT:    [[TMP19:%.*]] = load i16, i16* [[ARRAYIDX_3_2]], align 2
+; CHECK-NEXT:    [[CONV_3_2:%.*]] = zext i16 [[TMP19]] to i32
+; CHECK-NEXT:    [[ADD_3_2:%.*]] = add nuw nsw i32 [[ADD_2_2]], [[CONV_3_2]]
+; CHECK-NEXT:    [[MUL_3_2:%.*]] = mul nuw nsw i32 [[CONV_3_2]], [[CONV_3_2]]
+; CHECK-NEXT:    [[ADD11_3_2:%.*]] = add i32 [[MUL_3_2]], [[ADD11_2_2]]
+; CHECK-NEXT:    [[ARRAYIDX_4_2:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_1]], i64 4
+; CHECK-NEXT:    [[TMP20:%.*]] = load i16, i16* [[ARRAYIDX_4_2]], align 2
+; CHECK-NEXT:    [[CONV_4_2:%.*]] = zext i16 [[TMP20]] to i32
+; CHECK-NEXT:    [[ADD_4_2:%.*]] = add nuw nsw i32 [[ADD_3_2]], [[CONV_4_2]]
+; CHECK-NEXT:    [[MUL_4_2:%.*]] = mul nuw nsw i32 [[CONV_4_2]], [[CONV_4_2]]
+; CHECK-NEXT:    [[ADD11_4_2:%.*]] = add i32 [[MUL_4_2]], [[ADD11_3_2]]
+; CHECK-NEXT:    [[ARRAYIDX_5_2:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_1]], i64 5
+; CHECK-NEXT:    [[TMP21:%.*]] = load i16, i16* [[ARRAYIDX_5_2]], align 2
+; CHECK-NEXT:    [[CONV_5_2:%.*]] = zext i16 [[TMP21]] to i32
+; CHECK-NEXT:    [[ADD_5_2:%.*]] = add nuw nsw i32 [[ADD_4_2]], [[CONV_5_2]]
+; CHECK-NEXT:    [[MUL_5_2:%.*]] = mul nuw nsw i32 [[CONV_5_2]], [[CONV_5_2]]
+; CHECK-NEXT:    [[ADD11_5_2:%.*]] = add i32 [[MUL_5_2]], [[ADD11_4_2]]
+; CHECK-NEXT:    [[ARRAYIDX_6_2:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_1]], i64 6
+; CHECK-NEXT:    [[TMP22:%.*]] = load i16, i16* [[ARRAYIDX_6_2]], align 2
+; CHECK-NEXT:    [[CONV_6_2:%.*]] = zext i16 [[TMP22]] to i32
+; CHECK-NEXT:    [[ADD_6_2:%.*]] = add nuw nsw i32 [[ADD_5_2]], [[CONV_6_2]]
+; CHECK-NEXT:    [[MUL_6_2:%.*]] = mul nuw nsw i32 [[CONV_6_2]], [[CONV_6_2]]
+; CHECK-NEXT:    [[ADD11_6_2:%.*]] = add i32 [[MUL_6_2]], [[ADD11_5_2]]
+; CHECK-NEXT:    [[ARRAYIDX_7_2:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_1]], i64 7
+; CHECK-NEXT:    [[TMP23:%.*]] = load i16, i16* [[ARRAYIDX_7_2]], align 2
+; CHECK-NEXT:    [[CONV_7_2:%.*]] = zext i16 [[TMP23]] to i32
+; CHECK-NEXT:    [[ADD_7_2:%.*]] = add nuw nsw i32 [[ADD_6_2]], [[CONV_7_2]]
+; CHECK-NEXT:    [[MUL_7_2:%.*]] = mul nuw nsw i32 [[CONV_7_2]], [[CONV_7_2]]
+; CHECK-NEXT:    [[ADD11_7_2:%.*]] = add i32 [[MUL_7_2]], [[ADD11_6_2]]
+; CHECK-NEXT:    [[ADD_PTR_2:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_1]], i64 [[IDX_EXT]]
+; CHECK-NEXT:    [[TMP24:%.*]] = load i16, i16* [[ADD_PTR_2]], align 2
+; CHECK-NEXT:    [[CONV_348:%.*]] = zext i16 [[TMP24]] to i32
+; CHECK-NEXT:    [[ADD_349:%.*]] = add nuw nsw i32 [[ADD_7_2]], [[CONV_348]]
+; CHECK-NEXT:    [[MUL_350:%.*]] = mul nuw nsw i32 [[CONV_348]], [[CONV_348]]
+; CHECK-NEXT:    [[ADD11_351:%.*]] = add i32 [[MUL_350]], [[ADD11_7_2]]
+; CHECK-NEXT:    [[ARRAYIDX_1_3:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_2]], i64 1
+; CHECK-NEXT:    [[TMP25:%.*]] = load i16, i16* [[ARRAYIDX_1_3]], align 2
+; CHECK-NEXT:    [[CONV_1_3:%.*]] = zext i16 [[TMP25]] to i32
+; CHECK-NEXT:    [[ADD_1_3:%.*]] = add nuw nsw i32 [[ADD_349]], [[CONV_1_3]]
+; CHECK-NEXT:    [[MUL_1_3:%.*]] = mul nuw nsw i32 [[CONV_1_3]], [[CONV_1_3]]
+; CHECK-NEXT:    [[ADD11_1_3:%.*]] = add i32 [[MUL_1_3]], [[ADD11_351]]
+; CHECK-NEXT:    [[ARRAYIDX_2_3:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_2]], i64 2
+; CHECK-NEXT:    [[TMP26:%.*]] = load i16, i16* [[ARRAYIDX_2_3]], align 2
+; CHECK-NEXT:    [[CONV_2_3:%.*]] = zext i16 [[TMP26]] to i32
+; CHECK-NEXT:    [[ADD_2_3:%.*]] = add nuw nsw i32 [[ADD_1_3]], [[CONV_2_3]]
+; CHECK-NEXT:    [[MUL_2_3:%.*]] = mul nuw nsw i32 [[CONV_2_3]], [[CONV_2_3]]
+; CHECK-NEXT:    [[ADD11_2_3:%.*]] = add i32 [[MUL_2_3]], [[ADD11_1_3]]
+; CHECK-NEXT:    [[ARRAYIDX_3_3:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_2]], i64 3
+; CHECK-NEXT:    [[TMP27:%.*]] = load i16, i16* [[ARRAYIDX_3_3]], align 2
+; CHECK-NEXT:    [[CONV_3_3:%.*]] = zext i16 [[TMP27]] to i32
+; CHECK-NEXT:    [[ADD_3_3:%.*]] = add nuw nsw i32 [[ADD_2_3]], [[CONV_3_3]]
+; CHECK-NEXT:    [[MUL_3_3:%.*]] = mul nuw nsw i32 [[CONV_3_3]], [[CONV_3_3]]
+; CHECK-NEXT:    [[ADD11_3_3:%.*]] = add i32 [[MUL_3_3]], [[ADD11_2_3]]
+; CHECK-NEXT:    [[ARRAYIDX_4_3:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_2]], i64 4
+; CHECK-NEXT:    [[TMP28:%.*]] = load i16, i16* [[ARRAYIDX_4_3]], align 2
+; CHECK-NEXT:    [[CONV_4_3:%.*]] = zext i16 [[TMP28]] to i32
+; CHECK-NEXT:    [[ADD_4_3:%.*]] = add nuw nsw i32 [[ADD_3_3]], [[CONV_4_3]]
+; CHECK-NEXT:    [[MUL_4_3:%.*]] = mul nuw nsw i32 [[CONV_4_3]], [[CONV_4_3]]
+; CHECK-NEXT:    [[ADD11_4_3:%.*]] = add i32 [[MUL_4_3]], [[ADD11_3_3]]
+; CHECK-NEXT:    [[ARRAYIDX_5_3:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_2]], i64 5
+; CHECK-NEXT:    [[TMP29:%.*]] = load i16, i16* [[ARRAYIDX_5_3]], align 2
+; CHECK-NEXT:    [[CONV_5_3:%.*]] = zext i16 [[TMP29]] to i32
+; CHECK-NEXT:    [[ADD_5_3:%.*]] = add nuw nsw i32 [[ADD_4_3]], [[CONV_5_3]]
+; CHECK-NEXT:    [[MUL_5_3:%.*]] = mul nuw nsw i32 [[CONV_5_3]], [[CONV_5_3]]
+; CHECK-NEXT:    [[ADD11_5_3:%.*]] = add i32 [[MUL_5_3]], [[ADD11_4_3]]
+; CHECK-NEXT:    [[ARRAYIDX_6_3:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_2]], i64 6
+; CHECK-NEXT:    [[TMP30:%.*]] = load i16, i16* [[ARRAYIDX_6_3]], align 2
+; CHECK-NEXT:    [[CONV_6_3:%.*]] = zext i16 [[TMP30]] to i32
+; CHECK-NEXT:    [[ADD_6_3:%.*]] = add nuw nsw i32 [[ADD_5_3]], [[CONV_6_3]]
+; CHECK-NEXT:    [[MUL_6_3:%.*]] = mul nuw nsw i32 [[CONV_6_3]], [[CONV_6_3]]
+; CHECK-NEXT:    [[ADD11_6_3:%.*]] = add i32 [[MUL_6_3]], [[ADD11_5_3]]
+; CHECK-NEXT:    [[ARRAYIDX_7_3:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_2]], i64 7
+; CHECK-NEXT:    [[TMP31:%.*]] = load i16, i16* [[ARRAYIDX_7_3]], align 2
+; CHECK-NEXT:    [[CONV_7_3:%.*]] = zext i16 [[TMP31]] to i32
+; CHECK-NEXT:    [[ADD_7_3:%.*]] = add nuw nsw i32 [[ADD_6_3]], [[CONV_7_3]]
+; CHECK-NEXT:    [[MUL_7_3:%.*]] = mul nuw nsw i32 [[CONV_7_3]], [[CONV_7_3]]
+; CHECK-NEXT:    [[ADD11_7_3:%.*]] = add i32 [[MUL_7_3]], [[ADD11_6_3]]
+; CHECK-NEXT:    [[ADD_PTR_3:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_2]], i64 [[IDX_EXT]]
+; CHECK-NEXT:    [[TMP32:%.*]] = load i16, i16* [[ADD_PTR_3]], align 2
+; CHECK-NEXT:    [[CONV_452:%.*]] = zext i16 [[TMP32]] to i32
+; CHECK-NEXT:    [[ADD_453:%.*]] = add nuw nsw i32 [[ADD_7_3]], [[CONV_452]]
+; CHECK-NEXT:    [[MUL_454:%.*]] = mul nuw nsw i32 [[CONV_452]], [[CONV_452]]
+; CHECK-NEXT:    [[ADD11_455:%.*]] = add i32 [[MUL_454]], [[ADD11_7_3]]
+; CHECK-NEXT:    [[ARRAYIDX_1_4:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_3]], i64 1
+; CHECK-NEXT:    [[TMP33:%.*]] = load i16, i16* [[ARRAYIDX_1_4]], align 2
+; CHECK-NEXT:    [[CONV_1_4:%.*]] = zext i16 [[TMP33]] to i32
+; CHECK-NEXT:    [[ADD_1_4:%.*]] = add nuw nsw i32 [[ADD_453]], [[CONV_1_4]]
+; CHECK-NEXT:    [[MUL_1_4:%.*]] = mul nuw nsw i32 [[CONV_1_4]], [[CONV_1_4]]
+; CHECK-NEXT:    [[ADD11_1_4:%.*]] = add i32 [[MUL_1_4]], [[ADD11_455]]
+; CHECK-NEXT:    [[ARRAYIDX_2_4:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_3]], i64 2
+; CHECK-NEXT:    [[TMP34:%.*]] = load i16, i16* [[ARRAYIDX_2_4]], align 2
+; CHECK-NEXT:    [[CONV_2_4:%.*]] = zext i16 [[TMP34]] to i32
+; CHECK-NEXT:    [[ADD_2_4:%.*]] = add nuw nsw i32 [[ADD_1_4]], [[CONV_2_4]]
+; CHECK-NEXT:    [[MUL_2_4:%.*]] = mul nuw nsw i32 [[CONV_2_4]], [[CONV_2_4]]
+; CHECK-NEXT:    [[ADD11_2_4:%.*]] = add i32 [[MUL_2_4]], [[ADD11_1_4]]
+; CHECK-NEXT:    [[ARRAYIDX_3_4:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_3]], i64 3
+; CHECK-NEXT:    [[TMP35:%.*]] = load i16, i16* [[ARRAYIDX_3_4]], align 2
+; CHECK-NEXT:    [[CONV_3_4:%.*]] = zext i16 [[TMP35]] to i32
+; CHECK-NEXT:    [[ADD_3_4:%.*]] = add nuw nsw i32 [[ADD_2_4]], [[CONV_3_4]]
+; CHECK-NEXT:    [[MUL_3_4:%.*]] = mul nuw nsw i32 [[CONV_3_4]], [[CONV_3_4]]
+; CHECK-NEXT:    [[ADD11_3_4:%.*]] = add i32 [[MUL_3_4]], [[ADD11_2_4]]
+; CHECK-NEXT:    [[ARRAYIDX_4_4:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_3]], i64 4
+; CHECK-NEXT:    [[TMP36:%.*]] = load i16, i16* [[ARRAYIDX_4_4]], align 2
+; CHECK-NEXT:    [[CONV_4_4:%.*]] = zext i16 [[TMP36]] to i32
+; CHECK-NEXT:    [[ADD_4_4:%.*]] = add nuw nsw i32 [[ADD_3_4]], [[CONV_4_4]]
+; CHECK-NEXT:    [[MUL_4_4:%.*]] = mul nuw nsw i32 [[CONV_4_4]], [[CONV_4_4]]
+; CHECK-NEXT:    [[ADD11_4_4:%.*]] = add i32 [[MUL_4_4]], [[ADD11_3_4]]
+; CHECK-NEXT:    [[ARRAYIDX_5_4:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_3]], i64 5
+; CHECK-NEXT:    [[TMP37:%.*]] = load i16, i16* [[ARRAYIDX_5_4]], align 2
+; CHECK-NEXT:    [[CONV_5_4:%.*]] = zext i16 [[TMP37]] to i32
+; CHECK-NEXT:    [[ADD_5_4:%.*]] = add nuw nsw i32 [[ADD_4_4]], [[CONV_5_4]]
+; CHECK-NEXT:    [[MUL_5_4:%.*]] = mul nuw nsw i32 [[CONV_5_4]], [[CONV_5_4]]
+; CHECK-NEXT:    [[ADD11_5_4:%.*]] = add i32 [[MUL_5_4]], [[ADD11_4_4]]
+; CHECK-NEXT:    [[ARRAYIDX_6_4:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_3]], i64 6
+; CHECK-NEXT:    [[TMP38:%.*]] = load i16, i16* [[ARRAYIDX_6_4]], align 2
+; CHECK-NEXT:    [[CONV_6_4:%.*]] = zext i16 [[TMP38]] to i32
+; CHECK-NEXT:    [[ADD_6_4:%.*]] = add nuw nsw i32 [[ADD_5_4]], [[CONV_6_4]]
+; CHECK-NEXT:    [[MUL_6_4:%.*]] = mul nuw nsw i32 [[CONV_6_4]], [[CONV_6_4]]
+; CHECK-NEXT:    [[ADD11_6_4:%.*]] = add i32 [[MUL_6_4]], [[ADD11_5_4]]
+; CHECK-NEXT:    [[ARRAYIDX_7_4:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_3]], i64 7
+; CHECK-NEXT:    [[TMP39:%.*]] = load i16, i16* [[ARRAYIDX_7_4]], align 2
+; CHECK-NEXT:    [[CONV_7_4:%.*]] = zext i16 [[TMP39]] to i32
+; CHECK-NEXT:    [[ADD_7_4:%.*]] = add nuw nsw i32 [[ADD_6_4]], [[CONV_7_4]]
+; CHECK-NEXT:    [[MUL_7_4:%.*]] = mul nuw nsw i32 [[CONV_7_4]], [[CONV_7_4]]
+; CHECK-NEXT:    [[ADD11_7_4:%.*]] = add i32 [[MUL_7_4]], [[ADD11_6_4]]
+; CHECK-NEXT:    [[ADD_PTR_4:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_3]], i64 [[IDX_EXT]]
+; CHECK-NEXT:    [[TMP40:%.*]] = load i16, i16* [[ADD_PTR_4]], align 2
+; CHECK-NEXT:    [[CONV_556:%.*]] = zext i16 [[TMP40]] to i32
+; CHECK-NEXT:    [[ADD_557:%.*]] = add nuw nsw i32 [[ADD_7_4]], [[CONV_556]]
+; CHECK-NEXT:    [[MUL_558:%.*]] = mul nuw nsw i32 [[CONV_556]], [[CONV_556]]
+; CHECK-NEXT:    [[ADD11_559:%.*]] = add i32 [[MUL_558]], [[ADD11_7_4]]
+; CHECK-NEXT:    [[ARRAYIDX_1_5:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_4]], i64 1
+; CHECK-NEXT:    [[TMP41:%.*]] = load i16, i16* [[ARRAYIDX_1_5]], align 2
+; CHECK-NEXT:    [[CONV_1_5:%.*]] = zext i16 [[TMP41]] to i32
+; CHECK-NEXT:    [[ADD_1_5:%.*]] = add nuw nsw i32 [[ADD_557]], [[CONV_1_5]]
+; CHECK-NEXT:    [[MUL_1_5:%.*]] = mul nuw nsw i32 [[CONV_1_5]], [[CONV_1_5]]
+; CHECK-NEXT:    [[ADD11_1_5:%.*]] = add i32 [[MUL_1_5]], [[ADD11_559]]
+; CHECK-NEXT:    [[ARRAYIDX_2_5:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_4]], i64 2
+; CHECK-NEXT:    [[TMP42:%.*]] = load i16, i16* [[ARRAYIDX_2_5]], align 2
+; CHECK-NEXT:    [[CONV_2_5:%.*]] = zext i16 [[TMP42]] to i32
+; CHECK-NEXT:    [[ADD_2_5:%.*]] = add nuw nsw i32 [[ADD_1_5]], [[CONV_2_5]]
+; CHECK-NEXT:    [[MUL_2_5:%.*]] = mul nuw nsw i32 [[CONV_2_5]], [[CONV_2_5]]
+; CHECK-NEXT:    [[ADD11_2_5:%.*]] = add i32 [[MUL_2_5]], [[ADD11_1_5]]
+; CHECK-NEXT:    [[ARRAYIDX_3_5:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_4]], i64 3
+; CHECK-NEXT:    [[TMP43:%.*]] = load i16, i16* [[ARRAYIDX_3_5]], align 2
+; CHECK-NEXT:    [[CONV_3_5:%.*]] = zext i16 [[TMP43]] to i32
+; CHECK-NEXT:    [[ADD_3_5:%.*]] = add nuw nsw i32 [[ADD_2_5]], [[CONV_3_5]]
+; CHECK-NEXT:    [[MUL_3_5:%.*]] = mul nuw nsw i32 [[CONV_3_5]], [[CONV_3_5]]
+; CHECK-NEXT:    [[ADD11_3_5:%.*]] = add i32 [[MUL_3_5]], [[ADD11_2_5]]
+; CHECK-NEXT:    [[ARRAYIDX_4_5:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_4]], i64 4
+; CHECK-NEXT:    [[TMP44:%.*]] = load i16, i16* [[ARRAYIDX_4_5]], align 2
+; CHECK-NEXT:    [[CONV_4_5:%.*]] = zext i16 [[TMP44]] to i32
+; CHECK-NEXT:    [[ADD_4_5:%.*]] = add nuw nsw i32 [[ADD_3_5]], [[CONV_4_5]]
+; CHECK-NEXT:    [[MUL_4_5:%.*]] = mul nuw nsw i32 [[CONV_4_5]], [[CONV_4_5]]
+; CHECK-NEXT:    [[ADD11_4_5:%.*]] = add i32 [[MUL_4_5]], [[ADD11_3_5]]
+; CHECK-NEXT:    [[ARRAYIDX_5_5:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_4]], i64 5
+; CHECK-NEXT:    [[TMP45:%.*]] = load i16, i16* [[ARRAYIDX_5_5]], align 2
+; CHECK-NEXT:    [[CONV_5_5:%.*]] = zext i16 [[TMP45]] to i32
+; CHECK-NEXT:    [[ADD_5_5:%.*]] = add nuw nsw i32 [[ADD_4_5]], [[CONV_5_5]]
+; CHECK-NEXT:    [[MUL_5_5:%.*]] = mul nuw nsw i32 [[CONV_5_5]], [[CONV_5_5]]
+; CHECK-NEXT:    [[ADD11_5_5:%.*]] = add i32 [[MUL_5_5]], [[ADD11_4_5]]
+; CHECK-NEXT:    [[ARRAYIDX_6_5:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_4]], i64 6
+; CHECK-NEXT:    [[TMP46:%.*]] = load i16, i16* [[ARRAYIDX_6_5]], align 2
+; CHECK-NEXT:    [[CONV_6_5:%.*]] = zext i16 [[TMP46]] to i32
+; CHECK-NEXT:    [[ADD_6_5:%.*]] = add nuw nsw i32 [[ADD_5_5]], [[CONV_6_5]]
+; CHECK-NEXT:    [[MUL_6_5:%.*]] = mul nuw nsw i32 [[CONV_6_5]], [[CONV_6_5]]
+; CHECK-NEXT:    [[ADD11_6_5:%.*]] = add i32 [[MUL_6_5]], [[ADD11_5_5]]
+; CHECK-NEXT:    [[ARRAYIDX_7_5:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_4]], i64 7
+; CHECK-NEXT:    [[TMP47:%.*]] = load i16, i16* [[ARRAYIDX_7_5]], align 2
+; CHECK-NEXT:    [[CONV_7_5:%.*]] = zext i16 [[TMP47]] to i32
+; CHECK-NEXT:    [[ADD_7_5:%.*]] = add nuw nsw i32 [[ADD_6_5]], [[CONV_7_5]]
+; CHECK-NEXT:    [[MUL_7_5:%.*]] = mul nuw nsw i32 [[CONV_7_5]], [[CONV_7_5]]
+; CHECK-NEXT:    [[ADD11_7_5:%.*]] = add i32 [[MUL_7_5]], [[ADD11_6_5]]
+; CHECK-NEXT:    [[ADD_PTR_5:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_4]], i64 [[IDX_EXT]]
+; CHECK-NEXT:    [[TMP48:%.*]] = load i16, i16* [[ADD_PTR_5]], align 2
+; CHECK-NEXT:    [[CONV_660:%.*]] = zext i16 [[TMP48]] to i32
+; CHECK-NEXT:    [[ADD_661:%.*]] = add nuw nsw i32 [[ADD_7_5]], [[CONV_660]]
+; CHECK-NEXT:    [[MUL_662:%.*]] = mul nuw nsw i32 [[CONV_660]], [[CONV_660]]
+; CHECK-NEXT:    [[ADD11_663:%.*]] = add i32 [[MUL_662]], [[ADD11_7_5]]
+; CHECK-NEXT:    [[ARRAYIDX_1_6:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_5]], i64 1
+; CHECK-NEXT:    [[TMP49:%.*]] = load i16, i16* [[ARRAYIDX_1_6]], align 2
+; CHECK-NEXT:    [[CONV_1_6:%.*]] = zext i16 [[TMP49]] to i32
+; CHECK-NEXT:    [[ADD_1_6:%.*]] = add nuw nsw i32 [[ADD_661]], [[CONV_1_6]]
+; CHECK-NEXT:    [[MUL_1_6:%.*]] = mul nuw nsw i32 [[CONV_1_6]], [[CONV_1_6]]
+; CHECK-NEXT:    [[ADD11_1_6:%.*]] = add i32 [[MUL_1_6]], [[ADD11_663]]
+; CHECK-NEXT:    [[ARRAYIDX_2_6:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_5]], i64 2
+; CHECK-NEXT:    [[TMP50:%.*]] = load i16, i16* [[ARRAYIDX_2_6]], align 2
+; CHECK-NEXT:    [[CONV_2_6:%.*]] = zext i16 [[TMP50]] to i32
+; CHECK-NEXT:    [[ADD_2_6:%.*]] = add nuw nsw i32 [[ADD_1_6]], [[CONV_2_6]]
+; CHECK-NEXT:    [[MUL_2_6:%.*]] = mul nuw nsw i32 [[CONV_2_6]], [[CONV_2_6]]
+; CHECK-NEXT:    [[ADD11_2_6:%.*]] = add i32 [[MUL_2_6]], [[ADD11_1_6]]
+; CHECK-NEXT:    [[ARRAYIDX_3_6:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_5]], i64 3
+; CHECK-NEXT:    [[TMP51:%.*]] = load i16, i16* [[ARRAYIDX_3_6]], align 2
+; CHECK-NEXT:    [[CONV_3_6:%.*]] = zext i16 [[TMP51]] to i32
+; CHECK-NEXT:    [[ADD_3_6:%.*]] = add nuw nsw i32 [[ADD_2_6]], [[CONV_3_6]]
+; CHECK-NEXT:    [[MUL_3_6:%.*]] = mul nuw nsw i32 [[CONV_3_6]], [[CONV_3_6]]
+; CHECK-NEXT:    [[ADD11_3_6:%.*]] = add i32 [[MUL_3_6]], [[ADD11_2_6]]
+; CHECK-NEXT:    [[ARRAYIDX_4_6:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_5]], i64 4
+; CHECK-NEXT:    [[TMP52:%.*]] = load i16, i16* [[ARRAYIDX_4_6]], align 2
+; CHECK-NEXT:    [[CONV_4_6:%.*]] = zext i16 [[TMP52]] to i32
+; CHECK-NEXT:    [[ADD_4_6:%.*]] = add nuw nsw i32 [[ADD_3_6]], [[CONV_4_6]]
+; CHECK-NEXT:    [[MUL_4_6:%.*]] = mul nuw nsw i32 [[CONV_4_6]], [[CONV_4_6]]
+; CHECK-NEXT:    [[ADD11_4_6:%.*]] = add i32 [[MUL_4_6]], [[ADD11_3_6]]
+; CHECK-NEXT:    [[ARRAYIDX_5_6:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_5]], i64 5
+; CHECK-NEXT:    [[TMP53:%.*]] = load i16, i16* [[ARRAYIDX_5_6]], align 2
+; CHECK-NEXT:    [[CONV_5_6:%.*]] = zext i16 [[TMP53]] to i32
+; CHECK-NEXT:    [[ADD_5_6:%.*]] = add nuw nsw i32 [[ADD_4_6]], [[CONV_5_6]]
+; CHECK-NEXT:    [[MUL_5_6:%.*]] = mul nuw nsw i32 [[CONV_5_6]], [[CONV_5_6]]
+; CHECK-NEXT:    [[ADD11_5_6:%.*]] = add i32 [[MUL_5_6]], [[ADD11_4_6]]
+; CHECK-NEXT:    [[ARRAYIDX_6_6:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_5]], i64 6
+; CHECK-NEXT:    [[TMP54:%.*]] = load i16, i16* [[ARRAYIDX_6_6]], align 2
+; CHECK-NEXT:    [[CONV_6_6:%.*]] = zext i16 [[TMP54]] to i32
+; CHECK-NEXT:    [[ADD_6_6:%.*]] = add nuw nsw i32 [[ADD_5_6]], [[CONV_6_6]]
+; CHECK-NEXT:    [[MUL_6_6:%.*]] = mul nuw nsw i32 [[CONV_6_6]], [[CONV_6_6]]
+; CHECK-NEXT:    [[ADD11_6_6:%.*]] = add i32 [[MUL_6_6]], [[ADD11_5_6]]
+; CHECK-NEXT:    [[ARRAYIDX_7_6:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_5]], i64 7
+; CHECK-NEXT:    [[TMP55:%.*]] = load i16, i16* [[ARRAYIDX_7_6]], align 2
+; CHECK-NEXT:    [[CONV_7_6:%.*]] = zext i16 [[TMP55]] to i32
+; CHECK-NEXT:    [[ADD_7_6:%.*]] = add nuw nsw i32 [[ADD_6_6]], [[CONV_7_6]]
+; CHECK-NEXT:    [[MUL_7_6:%.*]] = mul nuw nsw i32 [[CONV_7_6]], [[CONV_7_6]]
+; CHECK-NEXT:    [[ADD11_7_6:%.*]] = add i32 [[MUL_7_6]], [[ADD11_6_6]]
+; CHECK-NEXT:    [[ADD_PTR_6:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_5]], i64 [[IDX_EXT]]
+; CHECK-NEXT:    [[TMP56:%.*]] = load i16, i16* [[ADD_PTR_6]], align 2
+; CHECK-NEXT:    [[CONV_764:%.*]] = zext i16 [[TMP56]] to i32
+; CHECK-NEXT:    [[ADD_765:%.*]] = add nuw nsw i32 [[ADD_7_6]], [[CONV_764]]
+; CHECK-NEXT:    [[MUL_766:%.*]] = mul nuw nsw i32 [[CONV_764]], [[CONV_764]]
+; CHECK-NEXT:    [[ADD11_767:%.*]] = add i32 [[MUL_766]], [[ADD11_7_6]]
+; CHECK-NEXT:    [[ARRAYIDX_1_7:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_6]], i64 1
+; CHECK-NEXT:    [[TMP57:%.*]] = load i16, i16* [[ARRAYIDX_1_7]], align 2
+; CHECK-NEXT:    [[CONV_1_7:%.*]] = zext i16 [[TMP57]] to i32
+; CHECK-NEXT:    [[ADD_1_7:%.*]] = add nuw nsw i32 [[ADD_765]], [[CONV_1_7]]
+; CHECK-NEXT:    [[MUL_1_7:%.*]] = mul nuw nsw i32 [[CONV_1_7]], [[CONV_1_7]]
+; CHECK-NEXT:    [[ADD11_1_7:%.*]] = add i32 [[MUL_1_7]], [[ADD11_767]]
+; CHECK-NEXT:    [[ARRAYIDX_2_7:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_6]], i64 2
+; CHECK-NEXT:    [[TMP58:%.*]] = load i16, i16* [[ARRAYIDX_2_7]], align 2
+; CHECK-NEXT:    [[CONV_2_7:%.*]] = zext i16 [[TMP58]] to i32
+; CHECK-NEXT:    [[ADD_2_7:%.*]] = add nuw nsw i32 [[ADD_1_7]], [[CONV_2_7]]
+; CHECK-NEXT:    [[MUL_2_7:%.*]] = mul nuw nsw i32 [[CONV_2_7]], [[CONV_2_7]]
+; CHECK-NEXT:    [[ADD11_2_7:%.*]] = add i32 [[MUL_2_7]], [[ADD11_1_7]]
+; CHECK-NEXT:    [[ARRAYIDX_3_7:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_6]], i64 3
+; CHECK-NEXT:    [[TMP59:%.*]] = load i16, i16* [[ARRAYIDX_3_7]], align 2
+; CHECK-NEXT:    [[CONV_3_7:%.*]] = zext i16 [[TMP59]] to i32
+; CHECK-NEXT:    [[ADD_3_7:%.*]] = add nuw nsw i32 [[ADD_2_7]], [[CONV_3_7]]
+; CHECK-NEXT:    [[MUL_3_7:%.*]] = mul nuw nsw i32 [[CONV_3_7]], [[CONV_3_7]]
+; CHECK-NEXT:    [[ADD11_3_7:%.*]] = add i32 [[MUL_3_7]], [[ADD11_2_7]]
+; CHECK-NEXT:    [[ARRAYIDX_4_7:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_6]], i64 4
+; CHECK-NEXT:    [[TMP60:%.*]] = load i16, i16* [[ARRAYIDX_4_7]], align 2
+; CHECK-NEXT:    [[CONV_4_7:%.*]] = zext i16 [[TMP60]] to i32
+; CHECK-NEXT:    [[ADD_4_7:%.*]] = add nuw nsw i32 [[ADD_3_7]], [[CONV_4_7]]
+; CHECK-NEXT:    [[MUL_4_7:%.*]] = mul nuw nsw i32 [[CONV_4_7]], [[CONV_4_7]]
+; CHECK-NEXT:    [[ADD11_4_7:%.*]] = add i32 [[MUL_4_7]], [[ADD11_3_7]]
+; CHECK-NEXT:    [[ARRAYIDX_5_7:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_6]], i64 5
+; CHECK-NEXT:    [[TMP61:%.*]] = load i16, i16* [[ARRAYIDX_5_7]], align 2
+; CHECK-NEXT:    [[CONV_5_7:%.*]] = zext i16 [[TMP61]] to i32
+; CHECK-NEXT:    [[ADD_5_7:%.*]] = add nuw nsw i32 [[ADD_4_7]], [[CONV_5_7]]
+; CHECK-NEXT:    [[MUL_5_7:%.*]] = mul nuw nsw i32 [[CONV_5_7]], [[CONV_5_7]]
+; CHECK-NEXT:    [[ADD11_5_7:%.*]] = add i32 [[MUL_5_7]], [[ADD11_4_7]]
+; CHECK-NEXT:    [[ARRAYIDX_6_7:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_6]], i64 6
+; CHECK-NEXT:    [[TMP62:%.*]] = load i16, i16* [[ARRAYIDX_6_7]], align 2
+; CHECK-NEXT:    [[CONV_6_7:%.*]] = zext i16 [[TMP62]] to i32
+; CHECK-NEXT:    [[ADD_6_7:%.*]] = add nuw nsw i32 [[ADD_5_7]], [[CONV_6_7]]
+; CHECK-NEXT:    [[MUL_6_7:%.*]] = mul nuw nsw i32 [[CONV_6_7]], [[CONV_6_7]]
+; CHECK-NEXT:    [[ADD11_6_7:%.*]] = add i32 [[MUL_6_7]], [[ADD11_5_7]]
+; CHECK-NEXT:    [[ARRAYIDX_7_7:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_6]], i64 7
+; CHECK-NEXT:    [[TMP63:%.*]] = load i16, i16* [[ARRAYIDX_7_7]], align 2
+; CHECK-NEXT:    [[CONV_7_7:%.*]] = zext i16 [[TMP63]] to i32
+; CHECK-NEXT:    [[ADD_7_7:%.*]] = add nuw nsw i32 [[ADD_6_7]], [[CONV_7_7]]
+; CHECK-NEXT:    [[MUL_7_7:%.*]] = mul nuw nsw i32 [[CONV_7_7]], [[CONV_7_7]]
+; CHECK-NEXT:    [[ADD11_7_7:%.*]] = add i32 [[MUL_7_7]], [[ADD11_6_7]]
+; CHECK-NEXT:    [[CONV15:%.*]] = zext i32 [[ADD_7_7]] to i64
+; CHECK-NEXT:    [[CONV16:%.*]] = zext i32 [[ADD11_7_7]] to i64
+; CHECK-NEXT:    [[SHL:%.*]] = shl nuw i64 [[CONV16]], 32
+; CHECK-NEXT:    [[ADD17:%.*]] = or i64 [[SHL]], [[CONV15]]
+; CHECK-NEXT:    ret i64 [[ADD17]]
+;
+entry:
+  %idx.ext = sext i32 %st to i64
+  %0 = load i16, i16* %p, align 2
+  %conv = zext i16 %0 to i32
+  %mul = mul nuw nsw i32 %conv, %conv
+  %arrayidx.1 = getelementptr inbounds i16, i16* %p, i64 1
+  %1 = load i16, i16* %arrayidx.1, align 2
+  %conv.1 = zext i16 %1 to i32
+  %add.1 = add nuw nsw i32 %conv, %conv.1
+  %mul.1 = mul nuw nsw i32 %conv.1, %conv.1
+  %add11.1 = add nuw i32 %mul.1, %mul
+  %arrayidx.2 = getelementptr inbounds i16, i16* %p, i64 2
+  %2 = load i16, i16* %arrayidx.2, align 2
+  %conv.2 = zext i16 %2 to i32
+  %add.2 = add nuw nsw i32 %add.1, %conv.2
+  %mul.2 = mul nuw nsw i32 %conv.2, %conv.2
+  %add11.2 = add i32 %mul.2, %add11.1
+  %arrayidx.3 = getelementptr inbounds i16, i16* %p, i64 3
+  %3 = load i16, i16* %arrayidx.3, align 2
+  %conv.3 = zext i16 %3 to i32
+  %add.3 = add nuw nsw i32 %add.2, %conv.3
+  %mul.3 = mul nuw nsw i32 %conv.3, %conv.3
+  %add11.3 = add i32 %mul.3, %add11.2
+  %arrayidx.4 = getelementptr inbounds i16, i16* %p, i64 4
+  %4 = load i16, i16* %arrayidx.4, align 2
+  %conv.4 = zext i16 %4 to i32
+  %add.4 = add nuw nsw i32 %add.3, %conv.4
+  %mul.4 = mul nuw nsw i32 %conv.4, %conv.4
+  %add11.4 = add i32 %mul.4, %add11.3
+  %arrayidx.5 = getelementptr inbounds i16, i16* %p, i64 5
+  %5 = load i16, i16* %arrayidx.5, align 2
+  %conv.5 = zext i16 %5 to i32
+  %add.5 = add nuw nsw i32 %add.4, %conv.5
+  %mul.5 = mul nuw nsw i32 %conv.5, %conv.5
+  %add11.5 = add i32 %mul.5, %add11.4
+  %arrayidx.6 = getelementptr inbounds i16, i16* %p, i64 6
+  %6 = load i16, i16* %arrayidx.6, align 2
+  %conv.6 = zext i16 %6 to i32
+  %add.6 = add nuw nsw i32 %add.5, %conv.6
+  %mul.6 = mul nuw nsw i32 %conv.6, %conv.6
+  %add11.6 = add i32 %mul.6, %add11.5
+  %arrayidx.7 = getelementptr inbounds i16, i16* %p, i64 7
+  %7 = load i16, i16* %arrayidx.7, align 2
+  %conv.7 = zext i16 %7 to i32
+  %add.7 = add nuw nsw i32 %add.6, %conv.7
+  %mul.7 = mul nuw nsw i32 %conv.7, %conv.7
+  %add11.7 = add i32 %mul.7, %add11.6
+  %add.ptr = getelementptr inbounds i16, i16* %p, i64 %idx.ext
+  %8 = load i16, i16* %add.ptr, align 2
+  %conv.140 = zext i16 %8 to i32
+  %add.141 = add nuw nsw i32 %add.7, %conv.140
+  %mul.142 = mul nuw nsw i32 %conv.140, %conv.140
+  %add11.143 = add i32 %mul.142, %add11.7
+  %arrayidx.1.1 = getelementptr inbounds i16, i16* %add.ptr, i64 1
+  %9 = load i16, i16* %arrayidx.1.1, align 2
+  %conv.1.1 = zext i16 %9 to i32
+  %add.1.1 = add nuw nsw i32 %add.141, %conv.1.1
+  %mul.1.1 = mul nuw nsw i32 %conv.1.1, %conv.1.1
+  %add11.1.1 = add i32 %mul.1.1, %add11.143
+  %arrayidx.2.1 = getelementptr inbounds i16, i16* %add.ptr, i64 2
+  %10 = load i16, i16* %arrayidx.2.1, align 2
+  %conv.2.1 = zext i16 %10 to i32
+  %add.2.1 = add nuw nsw i32 %add.1.1, %conv.2.1
+  %mul.2.1 = mul nuw nsw i32 %conv.2.1, %conv.2.1
+  %add11.2.1 = add i32 %mul.2.1, %add11.1.1
+  %arrayidx.3.1 = getelementptr inbounds i16, i16* %add.ptr, i64 3
+  %11 = load i16, i16* %arrayidx.3.1, align 2
+  %conv.3.1 = zext i16 %11 to i32
+  %add.3.1 = add nuw nsw i32 %add.2.1, %conv.3.1
+  %mul.3.1 = mul nuw nsw i32 %conv.3.1, %conv.3.1
+  %add11.3.1 = add i32 %mul.3.1, %add11.2.1
+  %arrayidx.4.1 = getelementptr inbounds i16, i16* %add.ptr, i64 4
+  %12 = load i16, i16* %arrayidx.4.1, align 2
+  %conv.4.1 = zext i16 %12 to i32
+  %add.4.1 = add nuw nsw i32 %add.3.1, %conv.4.1
+  %mul.4.1 = mul nuw nsw i32 %conv.4.1, %conv.4.1
+  %add11.4.1 = add i32 %mul.4.1, %add11.3.1
+  %arrayidx.5.1 = getelementptr inbounds i16, i16* %add.ptr, i64 5
+  %13 = load i16, i16* %arrayidx.5.1, align 2
+  %conv.5.1 = zext i16 %13 to i32
+  %add.5.1 = add nuw nsw i32 %add.4.1, %conv.5.1
+  %mul.5.1 = mul nuw nsw i32 %conv.5.1, %conv.5.1
+  %add11.5.1 = add i32 %mul.5.1, %add11.4.1
+  %arrayidx.6.1 = getelementptr inbounds i16, i16* %add.ptr, i64 6
+  %14 = load i16, i16* %arrayidx.6.1, align 2
+  %conv.6.1 = zext i16 %14 to i32
+  %add.6.1 = add nuw nsw i32 %add.5.1, %conv.6.1
+  %mul.6.1 = mul nuw nsw i32 %conv.6.1, %conv.6.1
+  %add11.6.1 = add i32 %mul.6.1, %add11.5.1
+  %arrayidx.7.1 = getelementptr inbounds i16, i16* %add.ptr, i64 7
+  %15 = load i16, i16* %arrayidx.7.1, align 2
+  %conv.7.1 = zext i16 %15 to i32
+  %add.7.1 = add nuw nsw i32 %add.6.1, %conv.7.1
+  %mul.7.1 = mul nuw nsw i32 %conv.7.1, %conv.7.1
+  %add11.7.1 = add i32 %mul.7.1, %add11.6.1
+  %add.ptr.1 = getelementptr inbounds i16, i16* %add.ptr, i64 %idx.ext
+  %16 = load i16, i16* %add.ptr.1, align 2
+  %conv.244 = zext i16 %16 to i32
+  %add.245 = add nuw nsw i32 %add.7.1, %conv.244
+  %mul.246 = mul nuw nsw i32 %conv.244, %conv.244
+  %add11.247 = add i32 %mul.246, %add11.7.1
+  %arrayidx.1.2 = getelementptr inbounds i16, i16* %add.ptr.1, i64 1
+  %17 = load i16, i16* %arrayidx.1.2, align 2
+  %conv.1.2 = zext i16 %17 to i32
+  %add.1.2 = add nuw nsw i32 %add.245, %conv.1.2
+  %mul.1.2 = mul nuw nsw i32 %conv.1.2, %conv.1.2
+  %add11.1.2 = add i32 %mul.1.2, %add11.247
+  %arrayidx.2.2 = getelementptr inbounds i16, i16* %add.ptr.1, i64 2
+  %18 = load i16, i16* %arrayidx.2.2, align 2
+  %conv.2.2 = zext i16 %18 to i32
+  %add.2.2 = add nuw nsw i32 %add.1.2, %conv.2.2
+  %mul.2.2 = mul nuw nsw i32 %conv.2.2, %conv.2.2
+  %add11.2.2 = add i32 %mul.2.2, %add11.1.2
+  %arrayidx.3.2 = getelementptr inbounds i16, i16* %add.ptr.1, i64 3
+  %19 = load i16, i16* %arrayidx.3.2, align 2
+  %conv.3.2 = zext i16 %19 to i32
+  %add.3.2 = add nuw nsw i32 %add.2.2, %conv.3.2
+  %mul.3.2 = mul nuw nsw i32 %conv.3.2, %conv.3.2
+  %add11.3.2 = add i32 %mul.3.2, %add11.2.2
+  %arrayidx.4.2 = getelementptr inbounds i16, i16* %add.ptr.1, i64 4
+  %20 = load i16, i16* %arrayidx.4.2, align 2
+  %conv.4.2 = zext i16 %20 to i32
+  %add.4.2 = add nuw nsw i32 %add.3.2, %conv.4.2
+  %mul.4.2 = mul nuw nsw i32 %conv.4.2, %conv.4.2
+  %add11.4.2 = add i32 %mul.4.2, %add11.3.2
+  %arrayidx.5.2 = getelementptr inbounds i16, i16* %add.ptr.1, i64 5
+  %21 = load i16, i16* %arrayidx.5.2, align 2
+  %conv.5.2 = zext i16 %21 to i32
+  %add.5.2 = add nuw nsw i32 %add.4.2, %conv.5.2
+  %mul.5.2 = mul nuw nsw i32 %conv.5.2, %conv.5.2
+  %add11.5.2 = add i32 %mul.5.2, %add11.4.2
+  %arrayidx.6.2 = getelementptr inbounds i16, i16* %add.ptr.1, i64 6
+  %22 = load i16, i16* %arrayidx.6.2, align 2
+  %conv.6.2 = zext i16 %22 to i32
+  %add.6.2 = add nuw nsw i32 %add.5.2, %conv.6.2
+  %mul.6.2 = mul nuw nsw i32 %conv.6.2, %conv.6.2
+  %add11.6.2 = add i32 %mul.6.2, %add11.5.2
+  %arrayidx.7.2 = getelementptr inbounds i16, i16* %add.ptr.1, i64 7
+  %23 = load i16, i16* %arrayidx.7.2, align 2
+  %conv.7.2 = zext i16 %23 to i32
+  %add.7.2 = add nuw nsw i32 %add.6.2, %conv.7.2
+  %mul.7.2 = mul nuw nsw i32 %conv.7.2, %conv.7.2
+  %add11.7.2 = add i32 %mul.7.2, %add11.6.2
+  %add.ptr.2 = getelementptr inbounds i16, i16* %add.ptr.1, i64 %idx.ext
+  %24 = load i16, i16* %add.ptr.2, align 2
+  %conv.348 = zext i16 %24 to i32
+  %add.349 = add nuw nsw i32 %add.7.2, %conv.348
+  %mul.350 = mul nuw nsw i32 %conv.348, %conv.348
+  %add11.351 = add i32 %mul.350, %add11.7.2
+  %arrayidx.1.3 = getelementptr inbounds i16, i16* %add.ptr.2, i64 1
+  %25 = load i16, i16* %arrayidx.1.3, align 2
+  %conv.1.3 = zext i16 %25 to i32
+  %add.1.3 = add nuw nsw i32 %add.349, %conv.1.3
+  %mul.1.3 = mul nuw nsw i32 %conv.1.3, %conv.1.3
+  %add11.1.3 = add i32 %mul.1.3, %add11.351
+  %arrayidx.2.3 = getelementptr inbounds i16, i16* %add.ptr.2, i64 2
+  %26 = load i16, i16* %arrayidx.2.3, align 2
+  %conv.2.3 = zext i16 %26 to i32
+  %add.2.3 = add nuw nsw i32 %add.1.3, %conv.2.3
+  %mul.2.3 = mul nuw nsw i32 %conv.2.3, %conv.2.3
+  %add11.2.3 = add i32 %mul.2.3, %add11.1.3
+  %arrayidx.3.3 = getelementptr inbounds i16, i16* %add.ptr.2, i64 3
+  %27 = load i16, i16* %arrayidx.3.3, align 2
+  %conv.3.3 = zext i16 %27 to i32
+  %add.3.3 = add nuw nsw i32 %add.2.3, %conv.3.3
+  %mul.3.3 = mul nuw nsw i32 %conv.3.3, %conv.3.3
+  %add11.3.3 = add i32 %mul.3.3, %add11.2.3
+  %arrayidx.4.3 = getelementptr inbounds i16, i16* %add.ptr.2, i64 4
+  %28 = load i16, i16* %arrayidx.4.3, align 2
+  %conv.4.3 = zext i16 %28 to i32
+  %add.4.3 = add nuw nsw i32 %add.3.3, %conv.4.3
+  %mul.4.3 = mul nuw nsw i32 %conv.4.3, %conv.4.3
+  %add11.4.3 = add i32 %mul.4.3, %add11.3.3
+  %arrayidx.5.3 = getelementptr inbounds i16, i16* %add.ptr.2, i64 5
+  %29 = load i16, i16* %arrayidx.5.3, align 2
+  %conv.5.3 = zext i16 %29 to i32
+  %add.5.3 = add nuw nsw i32 %add.4.3, %conv.5.3
+  %mul.5.3 = mul nuw nsw i32 %conv.5.3, %conv.5.3
+  %add11.5.3 = add i32 %mul.5.3, %add11.4.3
+  %arrayidx.6.3 = getelementptr inbounds i16, i16* %add.ptr.2, i64 6
+  %30 = load i16, i16* %arrayidx.6.3, align 2
+  %conv.6.3 = zext i16 %30 to i32
+  %add.6.3 = add nuw nsw i32 %add.5.3, %conv.6.3
+  %mul.6.3 = mul nuw nsw i32 %conv.6.3, %conv.6.3
+  %add11.6.3 = add i32 %mul.6.3, %add11.5.3
+  %arrayidx.7.3 = getelementptr inbounds i16, i16* %add.ptr.2, i64 7
+  %31 = load i16, i16* %arrayidx.7.3, align 2
+  %conv.7.3 = zext i16 %31 to i32
+  %add.7.3 = add nuw nsw i32 %add.6.3, %conv.7.3
+  %mul.7.3 = mul nuw nsw i32 %conv.7.3, %conv.7.3
+  %add11.7.3 = add i32 %mul.7.3, %add11.6.3
+  %add.ptr.3 = getelementptr inbounds i16, i16* %add.ptr.2, i64 %idx.ext
+  %32 = load i16, i16* %add.ptr.3, align 2
+  %conv.452 = zext i16 %32 to i32
+  %add.453 = add nuw nsw i32 %add.7.3, %conv.452
+  %mul.454 = mul nuw nsw i32 %conv.452, %conv.452
+  %add11.455 = add i32 %mul.454, %add11.7.3
+  %arrayidx.1.4 = getelementptr inbounds i16, i16* %add.ptr.3, i64 1
+  %33 = load i16, i16* %arrayidx.1.4, align 2
+  %conv.1.4 = zext i16 %33 to i32
+  %add.1.4 = add nuw nsw i32 %add.453, %conv.1.4
+  %mul.1.4 = mul nuw nsw i32 %conv.1.4, %conv.1.4
+  %add11.1.4 = add i32 %mul.1.4, %add11.455
+  %arrayidx.2.4 = getelementptr inbounds i16, i16* %add.ptr.3, i64 2
+  %34 = load i16, i16* %arrayidx.2.4, align 2
+  %conv.2.4 = zext i16 %34 to i32
+  %add.2.4 = add nuw nsw i32 %add.1.4, %conv.2.4
+  %mul.2.4 = mul nuw nsw i32 %conv.2.4, %conv.2.4
+  %add11.2.4 = add i32 %mul.2.4, %add11.1.4
+  %arrayidx.3.4 = getelementptr inbounds i16, i16* %add.ptr.3, i64 3
+  %35 = load i16, i16* %arrayidx.3.4, align 2
+  %conv.3.4 = zext i16 %35 to i32
+  %add.3.4 = add nuw nsw i32 %add.2.4, %conv.3.4
+  %mul.3.4 = mul nuw nsw i32 %conv.3.4, %conv.3.4
+  %add11.3.4 = add i32 %mul.3.4, %add11.2.4
+  %arrayidx.4.4 = getelementptr inbounds i16, i16* %add.ptr.3, i64 4
+  %36 = load i16, i16* %arrayidx.4.4, align 2
+  %conv.4.4 = zext i16 %36 to i32
+  %add.4.4 = add nuw nsw i32 %add.3.4, %conv.4.4
+  %mul.4.4 = mul nuw nsw i32 %conv.4.4, %conv.4.4
+  %add11.4.4 = add i32 %mul.4.4, %add11.3.4
+  %arrayidx.5.4 = getelementptr inbounds i16, i16* %add.ptr.3, i64 5
+  %37 = load i16, i16* %arrayidx.5.4, align 2
+  %conv.5.4 = zext i16 %37 to i32
+  %add.5.4 = add nuw nsw i32 %add.4.4, %conv.5.4
+  %mul.5.4 = mul nuw nsw i32 %conv.5.4, %conv.5.4
+  %add11.5.4 = add i32 %mul.5.4, %add11.4.4
+  %arrayidx.6.4 = getelementptr inbounds i16, i16* %add.ptr.3, i64 6
+  %38 = load i16, i16* %arrayidx.6.4, align 2
+  %conv.6.4 = zext i16 %38 to i32
+  %add.6.4 = add nuw nsw i32 %add.5.4, %conv.6.4
+  %mul.6.4 = mul nuw nsw i32 %conv.6.4, %conv.6.4
+  %add11.6.4 = add i32 %mul.6.4, %add11.5.4
+  %arrayidx.7.4 = getelementptr inbounds i16, i16* %add.ptr.3, i64 7
+  %39 = load i16, i16* %arrayidx.7.4, align 2
+  %conv.7.4 = zext i16 %39 to i32
+  %add.7.4 = add nuw nsw i32 %add.6.4, %conv.7.4
+  %mul.7.4 = mul nuw nsw i32 %conv.7.4, %conv.7.4
+  %add11.7.4 = add i32 %mul.7.4, %add11.6.4
+  %add.ptr.4 = getelementptr inbounds i16, i16* %add.ptr.3, i64 %idx.ext
+  %40 = load i16, i16* %add.ptr.4, align 2
+  %conv.556 = zext i16 %40 to i32
+  %add.557 = add nuw nsw i32 %add.7.4, %conv.556
+  %mul.558 = mul nuw nsw i32 %conv.556, %conv.556
+  %add11.559 = add i32 %mul.558, %add11.7.4
+  %arrayidx.1.5 = getelementptr inbounds i16, i16* %add.ptr.4, i64 1
+  %41 = load i16, i16* %arrayidx.1.5, align 2
+  %conv.1.5 = zext i16 %41 to i32
+  %add.1.5 = add nuw nsw i32 %add.557, %conv.1.5
+  %mul.1.5 = mul nuw nsw i32 %conv.1.5, %conv.1.5
+  %add11.1.5 = add i32 %mul.1.5, %add11.559
+  %arrayidx.2.5 = getelementptr inbounds i16, i16* %add.ptr.4, i64 2
+  %42 = load i16, i16* %arrayidx.2.5, align 2
+  %conv.2.5 = zext i16 %42 to i32
+  %add.2.5 = add nuw nsw i32 %add.1.5, %conv.2.5
+  %mul.2.5 = mul nuw nsw i32 %conv.2.5, %conv.2.5
+  %add11.2.5 = add i32 %mul.2.5, %add11.1.5
+  %arrayidx.3.5 = getelementptr inbounds i16, i16* %add.ptr.4, i64 3
+  %43 = load i16, i16* %arrayidx.3.5, align 2
+  %conv.3.5 = zext i16 %43 to i32
+  %add.3.5 = add nuw nsw i32 %add.2.5, %conv.3.5
+  %mul.3.5 = mul nuw nsw i32 %conv.3.5, %conv.3.5
+  %add11.3.5 = add i32 %mul.3.5, %add11.2.5
+  %arrayidx.4.5 = getelementptr inbounds i16, i16* %add.ptr.4, i64 4
+  %44 = load i16, i16* %arrayidx.4.5, align 2
+  %conv.4.5 = zext i16 %44 to i32
+  %add.4.5 = add nuw nsw i32 %add.3.5, %conv.4.5
+  %mul.4.5 = mul nuw nsw i32 %conv.4.5, %conv.4.5
+  %add11.4.5 = add i32 %mul.4.5, %add11.3.5
+  %arrayidx.5.5 = getelementptr inbounds i16, i16* %add.ptr.4, i64 5
+  %45 = load i16, i16* %arrayidx.5.5, align 2
+  %conv.5.5 = zext i16 %45 to i32
+  %add.5.5 = add nuw nsw i32 %add.4.5, %conv.5.5
+  %mul.5.5 = mul nuw nsw i32 %conv.5.5, %conv.5.5
+  %add11.5.5 = add i32 %mul.5.5, %add11.4.5
+  %arrayidx.6.5 = getelementptr inbounds i16, i16* %add.ptr.4, i64 6
+  %46 = load i16, i16* %arrayidx.6.5, align 2
+  %conv.6.5 = zext i16 %46 to i32
+  %add.6.5 = add nuw nsw i32 %add.5.5, %conv.6.5
+  %mul.6.5 = mul nuw nsw i32 %conv.6.5, %conv.6.5
+  %add11.6.5 = add i32 %mul.6.5, %add11.5.5
+  %arrayidx.7.5 = getelementptr inbounds i16, i16* %add.ptr.4, i64 7
+  %47 = load i16, i16* %arrayidx.7.5, align 2
+  %conv.7.5 = zext i16 %47 to i32
+  %add.7.5 = add nuw nsw i32 %add.6.5, %conv.7.5
+  %mul.7.5 = mul nuw nsw i32 %conv.7.5, %conv.7.5
+  %add11.7.5 = add i32 %mul.7.5, %add11.6.5
+  %add.ptr.5 = getelementptr inbounds i16, i16* %add.ptr.4, i64 %idx.ext
+  %48 = load i16, i16* %add.ptr.5, align 2
+  %conv.660 = zext i16 %48 to i32
+  %add.661 = add nuw nsw i32 %add.7.5, %conv.660
+  %mul.662 = mul nuw nsw i32 %conv.660, %conv.660
+  %add11.663 = add i32 %mul.662, %add11.7.5
+  %arrayidx.1.6 = getelementptr inbounds i16, i16* %add.ptr.5, i64 1
+  %49 = load i16, i16* %arrayidx.1.6, align 2
+  %conv.1.6 = zext i16 %49 to i32
+  %add.1.6 = add nuw nsw i32 %add.661, %conv.1.6
+  %mul.1.6 = mul nuw nsw i32 %conv.1.6, %conv.1.6
+  %add11.1.6 = add i32 %mul.1.6, %add11.663
+  %arrayidx.2.6 = getelementptr inbounds i16, i16* %add.ptr.5, i64 2
+  %50 = load i16, i16* %arrayidx.2.6, align 2
+  %conv.2.6 = zext i16 %50 to i32
+  %add.2.6 = add nuw nsw i32 %add.1.6, %conv.2.6
+  %mul.2.6 = mul nuw nsw i32 %conv.2.6, %conv.2.6
+  %add11.2.6 = add i32 %mul.2.6, %add11.1.6
+  %arrayidx.3.6 = getelementptr inbounds i16, i16* %add.ptr.5, i64 3
+  %51 = load i16, i16* %arrayidx.3.6, align 2
+  %conv.3.6 = zext i16 %51 to i32
+  %add.3.6 = add nuw nsw i32 %add.2.6, %conv.3.6
+  %mul.3.6 = mul nuw nsw i32 %conv.3.6, %conv.3.6
+  %add11.3.6 = add i32 %mul.3.6, %add11.2.6
+  %arrayidx.4.6 = getelementptr inbounds i16, i16* %add.ptr.5, i64 4
+  %52 = load i16, i16* %arrayidx.4.6, align 2
+  %conv.4.6 = zext i16 %52 to i32
+  %add.4.6 = add nuw nsw i32 %add.3.6, %conv.4.6
+  %mul.4.6 = mul nuw nsw i32 %conv.4.6, %conv.4.6
+  %add11.4.6 = add i32 %mul.4.6, %add11.3.6
+  %arrayidx.5.6 = getelementptr inbounds i16, i16* %add.ptr.5, i64 5
+  %53 = load i16, i16* %arrayidx.5.6, align 2
+  %conv.5.6 = zext i16 %53 to i32
+  %add.5.6 = add nuw nsw i32 %add.4.6, %conv.5.6
+  %mul.5.6 = mul nuw nsw i32 %conv.5.6, %conv.5.6
+  %add11.5.6 = add i32 %mul.5.6, %add11.4.6
+  %arrayidx.6.6 = getelementptr inbounds i16, i16* %add.ptr.5, i64 6
+  %54 = load i16, i16* %arrayidx.6.6, align 2
+  %conv.6.6 = zext i16 %54 to i32
+  %add.6.6 = add nuw nsw i32 %add.5.6, %conv.6.6
+  %mul.6.6 = mul nuw nsw i32 %conv.6.6, %conv.6.6
+  %add11.6.6 = add i32 %mul.6.6, %add11.5.6
+  %arrayidx.7.6 = getelementptr inbounds i16, i16* %add.ptr.5, i64 7
+  %55 = load i16, i16* %arrayidx.7.6, align 2
+  %conv.7.6 = zext i16 %55 to i32
+  %add.7.6 = add nuw nsw i32 %add.6.6, %conv.7.6
+  %mul.7.6 = mul nuw nsw i32 %conv.7.6, %conv.7.6
+  %add11.7.6 = add i32 %mul.7.6, %add11.6.6
+  %add.ptr.6 = getelementptr inbounds i16, i16* %add.ptr.5, i64 %idx.ext
+  %56 = load i16, i16* %add.ptr.6, align 2
+  %conv.764 = zext i16 %56 to i32
+  %add.765 = add nuw nsw i32 %add.7.6, %conv.764
+  %mul.766 = mul nuw nsw i32 %conv.764, %conv.764
+  %add11.767 = add i32 %mul.766, %add11.7.6
+  %arrayidx.1.7 = getelementptr inbounds i16, i16* %add.ptr.6, i64 1
+  %57 = load i16, i16* %arrayidx.1.7, align 2
+  %conv.1.7 = zext i16 %57 to i32
+  %add.1.7 = add nuw nsw i32 %add.765, %conv.1.7
+  %mul.1.7 = mul nuw nsw i32 %conv.1.7, %conv.1.7
+  %add11.1.7 = add i32 %mul.1.7, %add11.767
+  %arrayidx.2.7 = getelementptr inbounds i16, i16* %add.ptr.6, i64 2
+  %58 = load i16, i16* %arrayidx.2.7, align 2
+  %conv.2.7 = zext i16 %58 to i32
+  %add.2.7 = add nuw nsw i32 %add.1.7, %conv.2.7
+  %mul.2.7 = mul nuw nsw i32 %conv.2.7, %conv.2.7
+  %add11.2.7 = add i32 %mul.2.7, %add11.1.7
+  %arrayidx.3.7 = getelementptr inbounds i16, i16* %add.ptr.6, i64 3
+  %59 = load i16, i16* %arrayidx.3.7, align 2
+  %conv.3.7 = zext i16 %59 to i32
+  %add.3.7 = add nuw nsw i32 %add.2.7, %conv.3.7
+  %mul.3.7 = mul nuw nsw i32 %conv.3.7, %conv.3.7
+  %add11.3.7 = add i32 %mul.3.7, %add11.2.7
+  %arrayidx.4.7 = getelementptr inbounds i16, i16* %add.ptr.6, i64 4
+  %60 = load i16, i16* %arrayidx.4.7, align 2
+  %conv.4.7 = zext i16 %60 to i32
+  %add.4.7 = add nuw nsw i32 %add.3.7, %conv.4.7
+  %mul.4.7 = mul nuw nsw i32 %conv.4.7, %conv.4.7
+  %add11.4.7 = add i32 %mul.4.7, %add11.3.7
+  %arrayidx.5.7 = getelementptr inbounds i16, i16* %add.ptr.6, i64 5
+  %61 = load i16, i16* %arrayidx.5.7, align 2
+  %conv.5.7 = zext i16 %61 to i32
+  %add.5.7 = add nuw nsw i32 %add.4.7, %conv.5.7
+  %mul.5.7 = mul nuw nsw i32 %conv.5.7, %conv.5.7
+  %add11.5.7 = add i32 %mul.5.7, %add11.4.7
+  %arrayidx.6.7 = getelementptr inbounds i16, i16* %add.ptr.6, i64 6
+  %62 = load i16, i16* %arrayidx.6.7, align 2
+  %conv.6.7 = zext i16 %62 to i32
+  %add.6.7 = add nuw nsw i32 %add.5.7, %conv.6.7
+  %mul.6.7 = mul nuw nsw i32 %conv.6.7, %conv.6.7
+  %add11.6.7 = add i32 %mul.6.7, %add11.5.7
+  %arrayidx.7.7 = getelementptr inbounds i16, i16* %add.ptr.6, i64 7
+  %63 = load i16, i16* %arrayidx.7.7, align 2
+  %conv.7.7 = zext i16 %63 to i32
+  %add.7.7 = add nuw nsw i32 %add.6.7, %conv.7.7
+  %mul.7.7 = mul nuw nsw i32 %conv.7.7, %conv.7.7
+  %add11.7.7 = add i32 %mul.7.7, %add11.6.7
+  %conv15 = zext i32 %add.7.7 to i64
+  %conv16 = zext i32 %add11.7.7 to i64
+  %shl = shl nuw i64 %conv16, 32
+  %add17 = or i64 %shl, %conv15
+  ret i64 %add17
+}
+
+define i64 @looped(i16* nocapture noundef readonly %p, i32 noundef %st) {
+; CHECK-LABEL: @looped(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[IDX_EXT:%.*]] = sext i32 [[ST:%.*]] to i64
+; CHECK-NEXT:    br label [[FOR_COND1_PREHEADER:%.*]]
+; CHECK:       for.cond1.preheader:
+; CHECK-NEXT:    [[Y_038:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC13:%.*]], [[FOR_COND1_PREHEADER]] ]
+; CHECK-NEXT:    [[SQ_037:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ADD11_15:%.*]], [[FOR_COND1_PREHEADER]] ]
+; CHECK-NEXT:    [[SM_036:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ADD_15:%.*]], [[FOR_COND1_PREHEADER]] ]
+; CHECK-NEXT:    [[P_ADDR_035:%.*]] = phi i16* [ [[P:%.*]], [[ENTRY]] ], [ [[ADD_PTR:%.*]], [[FOR_COND1_PREHEADER]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i16, i16* [[P_ADDR_035]], align 2
+; CHECK-NEXT:    [[CONV:%.*]] = zext i16 [[TMP0]] to i32
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[SM_036]], [[CONV]]
+; CHECK-NEXT:    [[MUL:%.*]] = mul nuw nsw i32 [[CONV]], [[CONV]]
+; CHECK-NEXT:    [[ADD11:%.*]] = add i32 [[MUL]], [[SQ_037]]
+; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds i16, i16* [[P_ADDR_035]], i64 1
+; CHECK-NEXT:    [[TMP1:%.*]] = load i16, i16* [[ARRAYIDX_1]], align 2
+; CHECK-NEXT:    [[CONV_1:%.*]] = zext i16 [[TMP1]] to i32
+; CHECK-NEXT:    [[ADD_1:%.*]] = add i32 [[ADD]], [[CONV_1]]
+; CHECK-NEXT:    [[MUL_1:%.*]] = mul nuw nsw i32 [[CONV_1]], [[CONV_1]]
+; CHECK-NEXT:    [[ADD11_1:%.*]] = add i32 [[MUL_1]], [[ADD11]]
+; CHECK-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds i16, i16* [[P_ADDR_035]], i64 2
+; CHECK-NEXT:    [[TMP2:%.*]] = load i16, i16* [[ARRAYIDX_2]], align 2
+; CHECK-NEXT:    [[CONV_2:%.*]] = zext i16 [[TMP2]] to i32
+; CHECK-NEXT:    [[ADD_2:%.*]] = add i32 [[ADD_1]], [[CONV_2]]
+; CHECK-NEXT:    [[MUL_2:%.*]] = mul nuw nsw i32 [[CONV_2]], [[CONV_2]]
+; CHECK-NEXT:    [[ADD11_2:%.*]] = add i32 [[MUL_2]], [[ADD11_1]]
+; CHECK-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds i16, i16* [[P_ADDR_035]], i64 3
+; CHECK-NEXT:    [[TMP3:%.*]] = load i16, i16* [[ARRAYIDX_3]], align 2
+; CHECK-NEXT:    [[CONV_3:%.*]] = zext i16 [[TMP3]] to i32
+; CHECK-NEXT:    [[ADD_3:%.*]] = add i32 [[ADD_2]], [[CONV_3]]
+; CHECK-NEXT:    [[MUL_3:%.*]] = mul nuw nsw i32 [[CONV_3]], [[CONV_3]]
+; CHECK-NEXT:    [[ADD11_3:%.*]] = add i32 [[MUL_3]], [[ADD11_2]]
+; CHECK-NEXT:    [[ARRAYIDX_4:%.*]] = getelementptr inbounds i16, i16* [[P_ADDR_035]], i64 4
+; CHECK-NEXT:    [[TMP4:%.*]] = load i16, i16* [[ARRAYIDX_4]], align 2
+; CHECK-NEXT:    [[CONV_4:%.*]] = zext i16 [[TMP4]] to i32
+; CHECK-NEXT:    [[ADD_4:%.*]] = add i32 [[ADD_3]], [[CONV_4]]
+; CHECK-NEXT:    [[MUL_4:%.*]] = mul nuw nsw i32 [[CONV_4]], [[CONV_4]]
+; CHECK-NEXT:    [[ADD11_4:%.*]] = add i32 [[MUL_4]], [[ADD11_3]]
+; CHECK-NEXT:    [[ARRAYIDX_5:%.*]] = getelementptr inbounds i16, i16* [[P_ADDR_035]], i64 5
+; CHECK-NEXT:    [[TMP5:%.*]] = load i16, i16* [[ARRAYIDX_5]], align 2
+; CHECK-NEXT:    [[CONV_5:%.*]] = zext i16 [[TMP5]] to i32
+; CHECK-NEXT:    [[ADD_5:%.*]] = add i32 [[ADD_4]], [[CONV_5]]
+; CHECK-NEXT:    [[MUL_5:%.*]] = mul nuw nsw i32 [[CONV_5]], [[CONV_5]]
+; CHECK-NEXT:    [[ADD11_5:%.*]] = add i32 [[MUL_5]], [[ADD11_4]]
+; CHECK-NEXT:    [[ARRAYIDX_6:%.*]] = getelementptr inbounds i16, i16* [[P_ADDR_035]], i64 6
+; CHECK-NEXT:    [[TMP6:%.*]] = load i16, i16* [[ARRAYIDX_6]], align 2
+; CHECK-NEXT:    [[CONV_6:%.*]] = zext i16 [[TMP6]] to i32
+; CHECK-NEXT:    [[ADD_6:%.*]] = add i32 [[ADD_5]], [[CONV_6]]
+; CHECK-NEXT:    [[MUL_6:%.*]] = mul nuw nsw i32 [[CONV_6]], [[CONV_6]]
+; CHECK-NEXT:    [[ADD11_6:%.*]] = add i32 [[MUL_6]], [[ADD11_5]]
+; CHECK-NEXT:    [[ARRAYIDX_7:%.*]] = getelementptr inbounds i16, i16* [[P_ADDR_035]], i64 7
+; CHECK-NEXT:    [[TMP7:%.*]] = load i16, i16* [[ARRAYIDX_7]], align 2
+; CHECK-NEXT:    [[CONV_7:%.*]] = zext i16 [[TMP7]] to i32
+; CHECK-NEXT:    [[ADD_7:%.*]] = add i32 [[ADD_6]], [[CONV_7]]
+; CHECK-NEXT:    [[MUL_7:%.*]] = mul nuw nsw i32 [[CONV_7]], [[CONV_7]]
+; CHECK-NEXT:    [[ADD11_7:%.*]] = add i32 [[MUL_7]], [[ADD11_6]]
+; CHECK-NEXT:    [[ARRAYIDX_8:%.*]] = getelementptr inbounds i16, i16* [[P_ADDR_035]], i64 8
+; CHECK-NEXT:    [[TMP8:%.*]] = load i16, i16* [[ARRAYIDX_8]], align 2
+; CHECK-NEXT:    [[CONV_8:%.*]] = zext i16 [[TMP8]] to i32
+; CHECK-NEXT:    [[ADD_8:%.*]] = add i32 [[ADD_7]], [[CONV_8]]
+; CHECK-NEXT:    [[MUL_8:%.*]] = mul nuw nsw i32 [[CONV_8]], [[CONV_8]]
+; CHECK-NEXT:    [[ADD11_8:%.*]] = add i32 [[MUL_8]], [[ADD11_7]]
+; CHECK-NEXT:    [[ARRAYIDX_9:%.*]] = getelementptr inbounds i16, i16* [[P_ADDR_035]], i64 9
+; CHECK-NEXT:    [[TMP9:%.*]] = load i16, i16* [[ARRAYIDX_9]], align 2
+; CHECK-NEXT:    [[CONV_9:%.*]] = zext i16 [[TMP9]] to i32
+; CHECK-NEXT:    [[ADD_9:%.*]] = add i32 [[ADD_8]], [[CONV_9]]
+; CHECK-NEXT:    [[MUL_9:%.*]] = mul nuw nsw i32 [[CONV_9]], [[CONV_9]]
+; CHECK-NEXT:    [[ADD11_9:%.*]] = add i32 [[MUL_9]], [[ADD11_8]]
+; CHECK-NEXT:    [[ARRAYIDX_10:%.*]] = getelementptr inbounds i16, i16* [[P_ADDR_035]], i64 10
+; CHECK-NEXT:    [[TMP10:%.*]] = load i16, i16* [[ARRAYIDX_10]], align 2
+; CHECK-NEXT:    [[CONV_10:%.*]] = zext i16 [[TMP10]] to i32
+; CHECK-NEXT:    [[ADD_10:%.*]] = add i32 [[ADD_9]], [[CONV_10]]
+; CHECK-NEXT:    [[MUL_10:%.*]] = mul nuw nsw i32 [[CONV_10]], [[CONV_10]]
+; CHECK-NEXT:    [[ADD11_10:%.*]] = add i32 [[MUL_10]], [[ADD11_9]]
+; CHECK-NEXT:    [[ARRAYIDX_11:%.*]] = getelementptr inbounds i16, i16* [[P_ADDR_035]], i64 11
+; CHECK-NEXT:    [[TMP11:%.*]] = load i16, i16* [[ARRAYIDX_11]], align 2
+; CHECK-NEXT:    [[CONV_11:%.*]] = zext i16 [[TMP11]] to i32
+; CHECK-NEXT:    [[ADD_11:%.*]] = add i32 [[ADD_10]], [[CONV_11]]
+; CHECK-NEXT:    [[MUL_11:%.*]] = mul nuw nsw i32 [[CONV_11]], [[CONV_11]]
+; CHECK-NEXT:    [[ADD11_11:%.*]] = add i32 [[MUL_11]], [[ADD11_10]]
+; CHECK-NEXT:    [[ARRAYIDX_12:%.*]] = getelementptr inbounds i16, i16* [[P_ADDR_035]], i64 12
+; CHECK-NEXT:    [[TMP12:%.*]] = load i16, i16* [[ARRAYIDX_12]], align 2
+; CHECK-NEXT:    [[CONV_12:%.*]] = zext i16 [[TMP12]] to i32
+; CHECK-NEXT:    [[ADD_12:%.*]] = add i32 [[ADD_11]], [[CONV_12]]
+; CHECK-NEXT:    [[MUL_12:%.*]] = mul nuw nsw i32 [[CONV_12]], [[CONV_12]]
+; CHECK-NEXT:    [[ADD11_12:%.*]] = add i32 [[MUL_12]], [[ADD11_11]]
+; CHECK-NEXT:    [[ARRAYIDX_13:%.*]] = getelementptr inbounds i16, i16* [[P_ADDR_035]], i64 13
+; CHECK-NEXT:    [[TMP13:%.*]] = load i16, i16* [[ARRAYIDX_13]], align 2
+; CHECK-NEXT:    [[CONV_13:%.*]] = zext i16 [[TMP13]] to i32
+; CHECK-NEXT:    [[ADD_13:%.*]] = add i32 [[ADD_12]], [[CONV_13]]
+; CHECK-NEXT:    [[MUL_13:%.*]] = mul nuw nsw i32 [[CONV_13]], [[CONV_13]]
+; CHECK-NEXT:    [[ADD11_13:%.*]] = add i32 [[MUL_13]], [[ADD11_12]]
+; CHECK-NEXT:    [[ARRAYIDX_14:%.*]] = getelementptr inbounds i16, i16* [[P_ADDR_035]], i64 14
+; CHECK-NEXT:    [[TMP14:%.*]] = load i16, i16* [[ARRAYIDX_14]], align 2
+; CHECK-NEXT:    [[CONV_14:%.*]] = zext i16 [[TMP14]] to i32
+; CHECK-NEXT:    [[ADD_14:%.*]] = add i32 [[ADD_13]], [[CONV_14]]
+; CHECK-NEXT:    [[MUL_14:%.*]] = mul nuw nsw i32 [[CONV_14]], [[CONV_14]]
+; CHECK-NEXT:    [[ADD11_14:%.*]] = add i32 [[MUL_14]], [[ADD11_13]]
+; CHECK-NEXT:    [[ARRAYIDX_15:%.*]] = getelementptr inbounds i16, i16* [[P_ADDR_035]], i64 15
+; CHECK-NEXT:    [[TMP15:%.*]] = load i16, i16* [[ARRAYIDX_15]], align 2
+; CHECK-NEXT:    [[CONV_15:%.*]] = zext i16 [[TMP15]] to i32
+; CHECK-NEXT:    [[ADD_15]] = add i32 [[ADD_14]], [[CONV_15]]
+; CHECK-NEXT:    [[MUL_15:%.*]] = mul nuw nsw i32 [[CONV_15]], [[CONV_15]]
+; CHECK-NEXT:    [[ADD11_15]] = add i32 [[MUL_15]], [[ADD11_14]]
+; CHECK-NEXT:    [[ADD_PTR]] = getelementptr inbounds i16, i16* [[P_ADDR_035]], i64 [[IDX_EXT]]
+; CHECK-NEXT:    [[INC13]] = add nuw nsw i32 [[Y_038]], 1
+; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC13]], 16
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER]]
+; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    [[CONV15:%.*]] = zext i32 [[ADD_15]] to i64
+; CHECK-NEXT:    [[CONV16:%.*]] = zext i32 [[ADD11_15]] to i64
+; CHECK-NEXT:    [[SHL:%.*]] = shl nuw i64 [[CONV16]], 32
+; CHECK-NEXT:    [[ADD17:%.*]] = or i64 [[SHL]], [[CONV15]]
+; CHECK-NEXT:    ret i64 [[ADD17]]
+;
+entry:
+  %idx.ext = sext i32 %st to i64
+  br label %for.cond1.preheader
+
+for.cond1.preheader:                              ; preds = %entry, %for.cond1.preheader
+  %y.038 = phi i32 [ 0, %entry ], [ %inc13, %for.cond1.preheader ]
+  %sq.037 = phi i32 [ 0, %entry ], [ %add11.15, %for.cond1.preheader ]
+  %sm.036 = phi i32 [ 0, %entry ], [ %add.15, %for.cond1.preheader ]
+  %p.addr.035 = phi i16* [ %p, %entry ], [ %add.ptr, %for.cond1.preheader ]
+  %0 = load i16, i16* %p.addr.035, align 2
+  %conv = zext i16 %0 to i32
+  %add = add i32 %sm.036, %conv
+  %mul = mul nuw nsw i32 %conv, %conv
+  %add11 = add i32 %mul, %sq.037
+  %arrayidx.1 = getelementptr inbounds i16, i16* %p.addr.035, i64 1
+  %1 = load i16, i16* %arrayidx.1, align 2
+  %conv.1 = zext i16 %1 to i32
+  %add.1 = add i32 %add, %conv.1
+  %mul.1 = mul nuw nsw i32 %conv.1, %conv.1
+  %add11.1 = add i32 %mul.1, %add11
+  %arrayidx.2 = getelementptr inbounds i16, i16* %p.addr.035, i64 2
+  %2 = load i16, i16* %arrayidx.2, align 2
+  %conv.2 = zext i16 %2 to i32
+  %add.2 = add i32 %add.1, %conv.2
+  %mul.2 = mul nuw nsw i32 %conv.2, %conv.2
+  %add11.2 = add i32 %mul.2, %add11.1
+  %arrayidx.3 = getelementptr inbounds i16, i16* %p.addr.035, i64 3
+  %3 = load i16, i16* %arrayidx.3, align 2
+  %conv.3 = zext i16 %3 to i32
+  %add.3 = add i32 %add.2, %conv.3
+  %mul.3 = mul nuw nsw i32 %conv.3, %conv.3
+  %add11.3 = add i32 %mul.3, %add11.2
+  %arrayidx.4 = getelementptr inbounds i16, i16* %p.addr.035, i64 4
+  %4 = load i16, i16* %arrayidx.4, align 2
+  %conv.4 = zext i16 %4 to i32
+  %add.4 = add i32 %add.3, %conv.4
+  %mul.4 = mul nuw nsw i32 %conv.4, %conv.4
+  %add11.4 = add i32 %mul.4, %add11.3
+  %arrayidx.5 = getelementptr inbounds i16, i16* %p.addr.035, i64 5
+  %5 = load i16, i16* %arrayidx.5, align 2
+  %conv.5 = zext i16 %5 to i32
+  %add.5 = add i32 %add.4, %conv.5
+  %mul.5 = mul nuw nsw i32 %conv.5, %conv.5
+  %add11.5 = add i32 %mul.5, %add11.4
+  %arrayidx.6 = getelementptr inbounds i16, i16* %p.addr.035, i64 6
+  %6 = load i16, i16* %arrayidx.6, align 2
+  %conv.6 = zext i16 %6 to i32
+  %add.6 = add i32 %add.5, %conv.6
+  %mul.6 = mul nuw nsw i32 %conv.6, %conv.6
+  %add11.6 = add i32 %mul.6, %add11.5
+  %arrayidx.7 = getelementptr inbounds i16, i16* %p.addr.035, i64 7
+  %7 = load i16, i16* %arrayidx.7, align 2
+  %conv.7 = zext i16 %7 to i32
+  %add.7 = add i32 %add.6, %conv.7
+  %mul.7 = mul nuw nsw i32 %conv.7, %conv.7
+  %add11.7 = add i32 %mul.7, %add11.6
+  %arrayidx.8 = getelementptr inbounds i16, i16* %p.addr.035, i64 8
+  %8 = load i16, i16* %arrayidx.8, align 2
+  %conv.8 = zext i16 %8 to i32
+  %add.8 = add i32 %add.7, %conv.8
+  %mul.8 = mul nuw nsw i32 %conv.8, %conv.8
+  %add11.8 = add i32 %mul.8, %add11.7
+  %arrayidx.9 = getelementptr inbounds i16, i16* %p.addr.035, i64 9
+  %9 = load i16, i16* %arrayidx.9, align 2
+  %conv.9 = zext i16 %9 to i32
+  %add.9 = add i32 %add.8, %conv.9
+  %mul.9 = mul nuw nsw i32 %conv.9, %conv.9
+  %add11.9 = add i32 %mul.9, %add11.8
+  %arrayidx.10 = getelementptr inbounds i16, i16* %p.addr.035, i64 10
+  %10 = load i16, i16* %arrayidx.10, align 2
+  %conv.10 = zext i16 %10 to i32
+  %add.10 = add i32 %add.9, %conv.10
+  %mul.10 = mul nuw nsw i32 %conv.10, %conv.10
+  %add11.10 = add i32 %mul.10, %add11.9
+  %arrayidx.11 = getelementptr inbounds i16, i16* %p.addr.035, i64 11
+  %11 = load i16, i16* %arrayidx.11, align 2
+  %conv.11 = zext i16 %11 to i32
+  %add.11 = add i32 %add.10, %conv.11
+  %mul.11 = mul nuw nsw i32 %conv.11, %conv.11
+  %add11.11 = add i32 %mul.11, %add11.10
+  %arrayidx.12 = getelementptr inbounds i16, i16* %p.addr.035, i64 12
+  %12 = load i16, i16* %arrayidx.12, align 2
+  %conv.12 = zext i16 %12 to i32
+  %add.12 = add i32 %add.11, %conv.12
+  %mul.12 = mul nuw nsw i32 %conv.12, %conv.12
+  %add11.12 = add i32 %mul.12, %add11.11
+  %arrayidx.13 = getelementptr inbounds i16, i16* %p.addr.035, i64 13
+  %13 = load i16, i16* %arrayidx.13, align 2
+  %conv.13 = zext i16 %13 to i32
+  %add.13 = add i32 %add.12, %conv.13
+  %mul.13 = mul nuw nsw i32 %conv.13, %conv.13
+  %add11.13 = add i32 %mul.13, %add11.12
+  %arrayidx.14 = getelementptr inbounds i16, i16* %p.addr.035, i64 14
+  %14 = load i16, i16* %arrayidx.14, align 2
+  %conv.14 = zext i16 %14 to i32
+  %add.14 = add i32 %add.13, %conv.14
+  %mul.14 = mul nuw nsw i32 %conv.14, %conv.14
+  %add11.14 = add i32 %mul.14, %add11.13
+  %arrayidx.15 = getelementptr inbounds i16, i16* %p.addr.035, i64 15
+  %15 = load i16, i16* %arrayidx.15, align 2
+  %conv.15 = zext i16 %15 to i32
+  %add.15 = add i32 %add.14, %conv.15
+  %mul.15 = mul nuw nsw i32 %conv.15, %conv.15
+  %add11.15 = add i32 %mul.15, %add11.14
+  %add.ptr = getelementptr inbounds i16, i16* %p.addr.035, i64 %idx.ext
+  %inc13 = add nuw nsw i32 %y.038, 1
+  %exitcond.not = icmp eq i32 %inc13, 16
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.cond1.preheader
+
+for.cond.cleanup:                                 ; preds = %for.cond1.preheader
+  %conv15 = zext i32 %add.15 to i64
+  %conv16 = zext i32 %add11.15 to i64
+  %shl = shl nuw i64 %conv16, 32
+  %add17 = or i64 %shl, %conv15
+  ret i64 %add17
+}
+


        


More information about the llvm-commits mailing list