[llvm] d0fb687 - [NFC][ARM] Add two tests

Sam Parker via llvm-commits llvm-commits at lists.llvm.org
Thu Mar 19 08:18:57 PDT 2020


Author: Sam Parker
Date: 2020-03-19T15:18:33Z
New Revision: d0fb6879c37d571a064535b379b20cc03287d5fd

URL: https://github.com/llvm/llvm-project/commit/d0fb6879c37d571a064535b379b20cc03287d5fd
DIFF: https://github.com/llvm/llvm-project/commit/d0fb6879c37d571a064535b379b20cc03287d5fd.diff

LOG: [NFC][ARM] Add two tests

Add tests for v8m indvar simplify.

Added: 
    llvm/test/CodeGen/ARM/indvar-cost.ll
    llvm/test/CodeGen/ARM/indvar-unroll-imm-cost.ll

Modified: 
    

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/ARM/indvar-cost.ll b/llvm/test/CodeGen/ARM/indvar-cost.ll
new file mode 100644
index 000000000000..14d873d9e3ac
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/indvar-cost.ll
@@ -0,0 +1,508 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -indvars -mtriple=thumbv8m.base -S %s -o - | FileCheck %s --check-prefix=CHECK-T1
+; RUN: opt -indvars -mtriple=thumbv8m.main -S %s -o - | FileCheck %s --check-prefix=CHECK-T2
+
+define dso_local arm_aapcscc void @arm_conv_fast_q15(i16* %pSrcA, i32 %srcALen, i16* %pSrcB, i32 %srcBLen, i16* %pDst, i16** %store.px, i16** %store.py, i32* %store.res) local_unnamed_addr {
+; CHECK-T1-LABEL: @arm_conv_fast_q15(
+; CHECK-T1-NEXT:  entry:
+; CHECK-T1-NEXT:    [[CMP:%.*]] = icmp ult i32 [[SRCALEN:%.*]], [[SRCBLEN:%.*]]
+; CHECK-T1-NEXT:    [[SRCALEN_SRCBLEN:%.*]] = select i1 [[CMP]], i32 [[SRCALEN]], i32 [[SRCBLEN]]
+; CHECK-T1-NEXT:    [[PSRCB_PSRCA:%.*]] = select i1 [[CMP]], i16* [[PSRCB:%.*]], i16* [[PSRCA:%.*]]
+; CHECK-T1-NEXT:    [[PSRCA_PSRCB:%.*]] = select i1 [[CMP]], i16* [[PSRCA]], i16* [[PSRCB]]
+; CHECK-T1-NEXT:    [[SUB:%.*]] = add i32 [[SRCALEN_SRCBLEN]], -1
+; CHECK-T1-NEXT:    [[CMP41080:%.*]] = icmp eq i32 [[SUB]], 0
+; CHECK-T1-NEXT:    br i1 [[CMP41080]], label [[WHILE_END13:%.*]], label [[WHILE_COND5_PREHEADER_PREHEADER:%.*]]
+; CHECK-T1:       while.cond5.preheader.preheader:
+; CHECK-T1-NEXT:    br label [[WHILE_COND5_PREHEADER:%.*]]
+; CHECK-T1:       while.cond5.preheader:
+; CHECK-T1-NEXT:    [[COUNT_01084:%.*]] = phi i32 [ [[INC:%.*]], [[WHILE_END:%.*]] ], [ 1, [[WHILE_COND5_PREHEADER_PREHEADER]] ]
+; CHECK-T1-NEXT:    [[BLOCKSIZE1_01083:%.*]] = phi i32 [ [[DEC12:%.*]], [[WHILE_END]] ], [ [[SUB]], [[WHILE_COND5_PREHEADER_PREHEADER]] ]
+; CHECK-T1-NEXT:    [[PY_01082:%.*]] = phi i16* [ [[ADD_PTR:%.*]], [[WHILE_END]] ], [ [[PSRCA_PSRCB]], [[WHILE_COND5_PREHEADER_PREHEADER]] ]
+; CHECK-T1-NEXT:    [[POUT_01081:%.*]] = phi i16* [ [[INCDEC_PTR11:%.*]], [[WHILE_END]] ], [ [[PDST:%.*]], [[WHILE_COND5_PREHEADER_PREHEADER]] ]
+; CHECK-T1-NEXT:    br label [[WHILE_BODY7:%.*]]
+; CHECK-T1:       while.body7:
+; CHECK-T1-NEXT:    [[K_01078:%.*]] = phi i32 [ [[DEC:%.*]], [[WHILE_BODY7]] ], [ [[COUNT_01084]], [[WHILE_COND5_PREHEADER]] ]
+; CHECK-T1-NEXT:    [[SUM_01077:%.*]] = phi i32 [ [[ADD6_I:%.*]], [[WHILE_BODY7]] ], [ 0, [[WHILE_COND5_PREHEADER]] ]
+; CHECK-T1-NEXT:    [[PY_11076:%.*]] = phi i16* [ [[INCDEC_PTR8:%.*]], [[WHILE_BODY7]] ], [ [[PY_01082]], [[WHILE_COND5_PREHEADER]] ]
+; CHECK-T1-NEXT:    [[PX_11075:%.*]] = phi i16* [ [[INCDEC_PTR:%.*]], [[WHILE_BODY7]] ], [ [[PSRCB_PSRCA]], [[WHILE_COND5_PREHEADER]] ]
+; CHECK-T1-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds i16, i16* [[PX_11075]], i32 1
+; CHECK-T1-NEXT:    [[TMP0:%.*]] = load i16, i16* [[PX_11075]], align 2
+; CHECK-T1-NEXT:    [[CONV:%.*]] = sext i16 [[TMP0]] to i32
+; CHECK-T1-NEXT:    [[INCDEC_PTR8]] = getelementptr inbounds i16, i16* [[PY_11076]], i32 -1
+; CHECK-T1-NEXT:    [[TMP1:%.*]] = load i16, i16* [[PY_11076]], align 2
+; CHECK-T1-NEXT:    [[CONV9:%.*]] = sext i16 [[TMP1]] to i32
+; CHECK-T1-NEXT:    [[MUL_I:%.*]] = mul nsw i32 [[CONV9]], [[CONV]]
+; CHECK-T1-NEXT:    [[SHR3_I:%.*]] = ashr i32 [[CONV]], 16
+; CHECK-T1-NEXT:    [[SHR4_I:%.*]] = ashr i32 [[CONV9]], 16
+; CHECK-T1-NEXT:    [[MUL5_I:%.*]] = mul nsw i32 [[SHR4_I]], [[SHR3_I]]
+; CHECK-T1-NEXT:    [[ADD_I:%.*]] = add i32 [[MUL_I]], [[SUM_01077]]
+; CHECK-T1-NEXT:    [[ADD6_I]] = add i32 [[ADD_I]], [[MUL5_I]]
+; CHECK-T1-NEXT:    [[DEC]] = add nsw i32 [[K_01078]], -1
+; CHECK-T1-NEXT:    [[CMP6:%.*]] = icmp eq i32 [[DEC]], 0
+; CHECK-T1-NEXT:    br i1 [[CMP6]], label [[WHILE_END]], label [[WHILE_BODY7]]
+; CHECK-T1:       while.end:
+; CHECK-T1-NEXT:    [[ADD6_I_LCSSA:%.*]] = phi i32 [ [[ADD6_I]], [[WHILE_BODY7]] ]
+; CHECK-T1-NEXT:    [[TMP2:%.*]] = lshr i32 [[ADD6_I_LCSSA]], 15
+; CHECK-T1-NEXT:    [[CONV10:%.*]] = trunc i32 [[TMP2]] to i16
+; CHECK-T1-NEXT:    [[INCDEC_PTR11]] = getelementptr inbounds i16, i16* [[POUT_01081]], i32 1
+; CHECK-T1-NEXT:    store i16 [[CONV10]], i16* [[POUT_01081]], align 2
+; CHECK-T1-NEXT:    [[ADD_PTR]] = getelementptr inbounds i16, i16* [[PSRCA_PSRCB]], i32 [[COUNT_01084]]
+; CHECK-T1-NEXT:    [[INC]] = add nuw nsw i32 [[COUNT_01084]], 1
+; CHECK-T1-NEXT:    [[DEC12]] = add i32 [[BLOCKSIZE1_01083]], -1
+; CHECK-T1-NEXT:    [[CMP3:%.*]] = icmp ult i32 [[COUNT_01084]], 3
+; CHECK-T1-NEXT:    [[CMP4:%.*]] = icmp ne i32 [[DEC12]], 0
+; CHECK-T1-NEXT:    [[TMP3:%.*]] = and i1 [[CMP4]], [[CMP3]]
+; CHECK-T1-NEXT:    br i1 [[TMP3]], label [[WHILE_COND5_PREHEADER]], label [[WHILE_END13_LOOPEXIT:%.*]]
+; CHECK-T1:       while.end13.loopexit:
+; CHECK-T1-NEXT:    [[INCDEC_PTR11_LCSSA:%.*]] = phi i16* [ [[INCDEC_PTR11]], [[WHILE_END]] ]
+; CHECK-T1-NEXT:    [[ADD_PTR_LCSSA:%.*]] = phi i16* [ [[ADD_PTR]], [[WHILE_END]] ]
+; CHECK-T1-NEXT:    [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[WHILE_END]] ]
+; CHECK-T1-NEXT:    [[DEC12_LCSSA:%.*]] = phi i32 [ [[DEC12]], [[WHILE_END]] ]
+; CHECK-T1-NEXT:    br label [[WHILE_END13]]
+; CHECK-T1:       while.end13:
+; CHECK-T1-NEXT:    [[POUT_0_LCSSA:%.*]] = phi i16* [ [[PDST]], [[ENTRY:%.*]] ], [ [[INCDEC_PTR11_LCSSA]], [[WHILE_END13_LOOPEXIT]] ]
+; CHECK-T1-NEXT:    [[PY_0_LCSSA:%.*]] = phi i16* [ [[PSRCA_PSRCB]], [[ENTRY]] ], [ [[ADD_PTR_LCSSA]], [[WHILE_END13_LOOPEXIT]] ]
+; CHECK-T1-NEXT:    [[BLOCKSIZE1_0_LCSSA:%.*]] = phi i32 [ [[SUB]], [[ENTRY]] ], [ [[DEC12_LCSSA]], [[WHILE_END13_LOOPEXIT]] ]
+; CHECK-T1-NEXT:    [[COUNT_0_LCSSA:%.*]] = phi i32 [ 1, [[ENTRY]] ], [ [[INC_LCSSA]], [[WHILE_END13_LOOPEXIT]] ]
+; CHECK-T1-NEXT:    [[CMP161068:%.*]] = icmp eq i32 [[BLOCKSIZE1_0_LCSSA]], 0
+; CHECK-T1-NEXT:    br i1 [[CMP161068]], label [[EXIT:%.*]], label [[WHILE_BODY18_PREHEADER:%.*]]
+; CHECK-T1:       while.body18.preheader:
+; CHECK-T1-NEXT:    [[ADD_PTR14:%.*]] = getelementptr inbounds i16, i16* [[PY_0_LCSSA]], i32 -1
+; CHECK-T1-NEXT:    br label [[WHILE_BODY18:%.*]]
+; CHECK-T1:       while.body18:
+; CHECK-T1-NEXT:    [[COUNT_11072:%.*]] = phi i32 [ [[INC49:%.*]], [[WHILE_END43:%.*]] ], [ [[COUNT_0_LCSSA]], [[WHILE_BODY18_PREHEADER]] ]
+; CHECK-T1-NEXT:    [[BLOCKSIZE1_11071:%.*]] = phi i32 [ [[DEC50:%.*]], [[WHILE_END43]] ], [ [[BLOCKSIZE1_0_LCSSA]], [[WHILE_BODY18_PREHEADER]] ]
+; CHECK-T1-NEXT:    [[PY_21070:%.*]] = phi i16* [ [[ADD_PTR48:%.*]], [[WHILE_END43]] ], [ [[ADD_PTR14]], [[WHILE_BODY18_PREHEADER]] ]
+; CHECK-T1-NEXT:    [[POUT_11069:%.*]] = phi i16* [ [[INCDEC_PTR46:%.*]], [[WHILE_END43]] ], [ [[POUT_0_LCSSA]], [[WHILE_BODY18_PREHEADER]] ]
+; CHECK-T1-NEXT:    [[SHR19:%.*]] = lshr i32 [[COUNT_11072]], 2
+; CHECK-T1-NEXT:    [[CMP211054:%.*]] = icmp eq i32 [[SHR19]], 0
+; CHECK-T1-NEXT:    br i1 [[CMP211054]], label [[WHILE_END31:%.*]], label [[WHILE_BODY23_PREHEADER:%.*]]
+; CHECK-T1:       while.body23.preheader:
+; CHECK-T1-NEXT:    br label [[WHILE_BODY23:%.*]]
+; CHECK-T1:       while.body23:
+; CHECK-T1-NEXT:    [[K_11058:%.*]] = phi i32 [ [[DEC30:%.*]], [[WHILE_BODY23]] ], [ [[SHR19]], [[WHILE_BODY23_PREHEADER]] ]
+; CHECK-T1-NEXT:    [[SUM_11057:%.*]] = phi i32 [ [[ADD6_I878:%.*]], [[WHILE_BODY23]] ], [ 0, [[WHILE_BODY23_PREHEADER]] ]
+; CHECK-T1-NEXT:    [[PY_31056:%.*]] = phi i16* [ [[ADD_PTR_I884:%.*]], [[WHILE_BODY23]] ], [ [[PY_21070]], [[WHILE_BODY23_PREHEADER]] ]
+; CHECK-T1-NEXT:    [[PX_31055:%.*]] = phi i16* [ [[ADD_PTR_I890:%.*]], [[WHILE_BODY23]] ], [ [[PSRCB_PSRCA]], [[WHILE_BODY23_PREHEADER]] ]
+; CHECK-T1-NEXT:    [[ARRAYIDX_I907:%.*]] = getelementptr inbounds i16, i16* [[PX_31055]], i32 1
+; CHECK-T1-NEXT:    [[TMP4:%.*]] = load i16, i16* [[ARRAYIDX_I907]], align 2
+; CHECK-T1-NEXT:    [[TMP5:%.*]] = load i16, i16* [[PX_31055]], align 2
+; CHECK-T1-NEXT:    [[ADD_PTR_I912:%.*]] = getelementptr inbounds i16, i16* [[PX_31055]], i32 2
+; CHECK-T1-NEXT:    [[ARRAYIDX_I901:%.*]] = getelementptr inbounds i16, i16* [[PY_31056]], i32 1
+; CHECK-T1-NEXT:    [[TMP6:%.*]] = load i16, i16* [[ARRAYIDX_I901]], align 2
+; CHECK-T1-NEXT:    [[TMP7:%.*]] = load i16, i16* [[PY_31056]], align 2
+; CHECK-T1-NEXT:    [[ADD_PTR_I906:%.*]] = getelementptr inbounds i16, i16* [[PY_31056]], i32 -2
+; CHECK-T1-NEXT:    [[SHR_I892:%.*]] = sext i16 [[TMP5]] to i32
+; CHECK-T1-NEXT:    [[SHR1_I893:%.*]] = sext i16 [[TMP6]] to i32
+; CHECK-T1-NEXT:    [[MUL_I894:%.*]] = mul nsw i32 [[SHR1_I893]], [[SHR_I892]]
+; CHECK-T1-NEXT:    [[SHR2_I895:%.*]] = sext i16 [[TMP4]] to i32
+; CHECK-T1-NEXT:    [[SHR4_I897:%.*]] = sext i16 [[TMP7]] to i32
+; CHECK-T1-NEXT:    [[MUL5_I898:%.*]] = mul nsw i32 [[SHR4_I897]], [[SHR2_I895]]
+; CHECK-T1-NEXT:    [[ADD_I899:%.*]] = add i32 [[MUL_I894]], [[SUM_11057]]
+; CHECK-T1-NEXT:    [[ADD6_I900:%.*]] = add i32 [[ADD_I899]], [[MUL5_I898]]
+; CHECK-T1-NEXT:    [[ARRAYIDX_I885:%.*]] = getelementptr inbounds i16, i16* [[PX_31055]], i32 3
+; CHECK-T1-NEXT:    [[TMP8:%.*]] = load i16, i16* [[ARRAYIDX_I885]], align 2
+; CHECK-T1-NEXT:    [[TMP9:%.*]] = load i16, i16* [[ADD_PTR_I912]], align 2
+; CHECK-T1-NEXT:    [[ADD_PTR_I890]] = getelementptr inbounds i16, i16* [[PX_31055]], i32 4
+; CHECK-T1-NEXT:    [[ARRAYIDX_I879:%.*]] = getelementptr inbounds i16, i16* [[PY_31056]], i32 -1
+; CHECK-T1-NEXT:    [[TMP10:%.*]] = load i16, i16* [[ARRAYIDX_I879]], align 2
+; CHECK-T1-NEXT:    [[TMP11:%.*]] = load i16, i16* [[ADD_PTR_I906]], align 2
+; CHECK-T1-NEXT:    [[ADD_PTR_I884]] = getelementptr inbounds i16, i16* [[PY_31056]], i32 -4
+; CHECK-T1-NEXT:    [[SHR_I870:%.*]] = sext i16 [[TMP9]] to i32
+; CHECK-T1-NEXT:    [[SHR1_I871:%.*]] = sext i16 [[TMP10]] to i32
+; CHECK-T1-NEXT:    [[MUL_I872:%.*]] = mul nsw i32 [[SHR1_I871]], [[SHR_I870]]
+; CHECK-T1-NEXT:    [[SHR2_I873:%.*]] = sext i16 [[TMP8]] to i32
+; CHECK-T1-NEXT:    [[SHR4_I875:%.*]] = sext i16 [[TMP11]] to i32
+; CHECK-T1-NEXT:    [[MUL5_I876:%.*]] = mul nsw i32 [[SHR4_I875]], [[SHR2_I873]]
+; CHECK-T1-NEXT:    [[ADD_I877:%.*]] = add i32 [[ADD6_I900]], [[MUL_I872]]
+; CHECK-T1-NEXT:    [[ADD6_I878]] = add i32 [[ADD_I877]], [[MUL5_I876]]
+; CHECK-T1-NEXT:    [[DEC30]] = add nsw i32 [[K_11058]], -1
+; CHECK-T1-NEXT:    [[CMP21:%.*]] = icmp eq i32 [[DEC30]], 0
+; CHECK-T1-NEXT:    br i1 [[CMP21]], label [[WHILE_END31_LOOPEXIT:%.*]], label [[WHILE_BODY23]]
+; CHECK-T1:       while.end31.loopexit:
+; CHECK-T1-NEXT:    [[ADD_PTR_I890_LCSSA:%.*]] = phi i16* [ [[ADD_PTR_I890]], [[WHILE_BODY23]] ]
+; CHECK-T1-NEXT:    [[ADD_PTR_I884_LCSSA:%.*]] = phi i16* [ [[ADD_PTR_I884]], [[WHILE_BODY23]] ]
+; CHECK-T1-NEXT:    [[ADD6_I878_LCSSA:%.*]] = phi i32 [ [[ADD6_I878]], [[WHILE_BODY23]] ]
+; CHECK-T1-NEXT:    br label [[WHILE_END31]]
+; CHECK-T1:       while.end31:
+; CHECK-T1-NEXT:    [[PX_3_LCSSA:%.*]] = phi i16* [ [[PSRCB_PSRCA]], [[WHILE_BODY18]] ], [ [[ADD_PTR_I890_LCSSA]], [[WHILE_END31_LOOPEXIT]] ]
+; CHECK-T1-NEXT:    [[PY_3_LCSSA:%.*]] = phi i16* [ [[PY_21070]], [[WHILE_BODY18]] ], [ [[ADD_PTR_I884_LCSSA]], [[WHILE_END31_LOOPEXIT]] ]
+; CHECK-T1-NEXT:    [[SUM_1_LCSSA:%.*]] = phi i32 [ 0, [[WHILE_BODY18]] ], [ [[ADD6_I878_LCSSA]], [[WHILE_END31_LOOPEXIT]] ]
+; CHECK-T1-NEXT:    [[REM:%.*]] = and i32 [[COUNT_11072]], 3
+; CHECK-T1-NEXT:    [[CMP341062:%.*]] = icmp eq i32 [[REM]], 0
+; CHECK-T1-NEXT:    br i1 [[CMP341062]], label [[WHILE_END43]], label [[WHILE_BODY36_PREHEADER:%.*]]
+; CHECK-T1:       while.body36.preheader:
+; CHECK-T1-NEXT:    [[ADD_PTR32:%.*]] = getelementptr inbounds i16, i16* [[PY_3_LCSSA]], i32 1
+; CHECK-T1-NEXT:    br label [[WHILE_BODY36:%.*]]
+; CHECK-T1:       while.body36:
+; CHECK-T1-NEXT:    [[K_21066:%.*]] = phi i32 [ [[DEC42:%.*]], [[WHILE_BODY36]] ], [ [[REM]], [[WHILE_BODY36_PREHEADER]] ]
+; CHECK-T1-NEXT:    [[SUM_21065:%.*]] = phi i32 [ [[ADD6_I868:%.*]], [[WHILE_BODY36]] ], [ [[SUM_1_LCSSA]], [[WHILE_BODY36_PREHEADER]] ]
+; CHECK-T1-NEXT:    [[PY_41064:%.*]] = phi i16* [ [[INCDEC_PTR39:%.*]], [[WHILE_BODY36]] ], [ [[ADD_PTR32]], [[WHILE_BODY36_PREHEADER]] ]
+; CHECK-T1-NEXT:    [[PX_41063:%.*]] = phi i16* [ [[INCDEC_PTR37:%.*]], [[WHILE_BODY36]] ], [ [[PX_3_LCSSA]], [[WHILE_BODY36_PREHEADER]] ]
+; CHECK-T1-NEXT:    [[INCDEC_PTR37]] = getelementptr inbounds i16, i16* [[PX_41063]], i32 1
+; CHECK-T1-NEXT:    [[TMP12:%.*]] = load i16, i16* [[PX_41063]], align 2
+; CHECK-T1-NEXT:    [[CONV38:%.*]] = sext i16 [[TMP12]] to i32
+; CHECK-T1-NEXT:    [[INCDEC_PTR39]] = getelementptr inbounds i16, i16* [[PY_41064]], i32 -1
+; CHECK-T1-NEXT:    [[TMP13:%.*]] = load i16, i16* [[PY_41064]], align 2
+; CHECK-T1-NEXT:    [[CONV40:%.*]] = sext i16 [[TMP13]] to i32
+; CHECK-T1-NEXT:    [[MUL_I863:%.*]] = mul nsw i32 [[CONV40]], [[CONV38]]
+; CHECK-T1-NEXT:    [[SHR3_I864:%.*]] = ashr i32 [[CONV38]], 16
+; CHECK-T1-NEXT:    [[SHR4_I865:%.*]] = ashr i32 [[CONV40]], 16
+; CHECK-T1-NEXT:    [[MUL5_I866:%.*]] = mul nsw i32 [[SHR4_I865]], [[SHR3_I864]]
+; CHECK-T1-NEXT:    [[ADD_I867:%.*]] = add i32 [[MUL_I863]], [[SUM_21065]]
+; CHECK-T1-NEXT:    [[ADD6_I868]] = add i32 [[ADD_I867]], [[MUL5_I866]]
+; CHECK-T1-NEXT:    [[DEC42]] = add nsw i32 [[K_21066]], -1
+; CHECK-T1-NEXT:    [[CMP34:%.*]] = icmp eq i32 [[DEC42]], 0
+; CHECK-T1-NEXT:    br i1 [[CMP34]], label [[WHILE_END43_LOOPEXIT:%.*]], label [[WHILE_BODY36]]
+; CHECK-T1:       while.end43.loopexit:
+; CHECK-T1-NEXT:    [[ADD6_I868_LCSSA:%.*]] = phi i32 [ [[ADD6_I868]], [[WHILE_BODY36]] ]
+; CHECK-T1-NEXT:    br label [[WHILE_END43]]
+; CHECK-T1:       while.end43:
+; CHECK-T1-NEXT:    [[SUM_2_LCSSA:%.*]] = phi i32 [ [[SUM_1_LCSSA]], [[WHILE_END31]] ], [ [[ADD6_I868_LCSSA]], [[WHILE_END43_LOOPEXIT]] ]
+; CHECK-T1-NEXT:    [[TMP14:%.*]] = lshr i32 [[SUM_2_LCSSA]], 15
+; CHECK-T1-NEXT:    [[CONV45:%.*]] = trunc i32 [[TMP14]] to i16
+; CHECK-T1-NEXT:    [[INCDEC_PTR46]] = getelementptr inbounds i16, i16* [[POUT_11069]], i32 1
+; CHECK-T1-NEXT:    store i16 [[CONV45]], i16* [[POUT_11069]], align 2
+; CHECK-T1-NEXT:    [[SUB47:%.*]] = add i32 [[COUNT_11072]], -1
+; CHECK-T1-NEXT:    [[ADD_PTR48]] = getelementptr inbounds i16, i16* [[PSRCA_PSRCB]], i32 [[SUB47]]
+; CHECK-T1-NEXT:    [[INC49]] = add i32 [[COUNT_11072]], 1
+; CHECK-T1-NEXT:    [[DEC50]] = add i32 [[BLOCKSIZE1_11071]], -1
+; CHECK-T1-NEXT:    [[CMP16:%.*]] = icmp eq i32 [[DEC50]], 0
+; CHECK-T1-NEXT:    br i1 [[CMP16]], label [[EXIT_LOOPEXIT:%.*]], label [[WHILE_BODY18]]
+; CHECK-T1:       exit.loopexit:
+; CHECK-T1-NEXT:    br label [[EXIT]]
+; CHECK-T1:       exit:
+; CHECK-T1-NEXT:    ret void
+;
+; CHECK-T2-LABEL: @arm_conv_fast_q15(
+; CHECK-T2-NEXT:  entry:
+; CHECK-T2-NEXT:    [[CMP:%.*]] = icmp ult i32 [[SRCALEN:%.*]], [[SRCBLEN:%.*]]
+; CHECK-T2-NEXT:    [[SRCALEN_SRCBLEN:%.*]] = select i1 [[CMP]], i32 [[SRCALEN]], i32 [[SRCBLEN]]
+; CHECK-T2-NEXT:    [[PSRCB_PSRCA:%.*]] = select i1 [[CMP]], i16* [[PSRCB:%.*]], i16* [[PSRCA:%.*]]
+; CHECK-T2-NEXT:    [[PSRCA_PSRCB:%.*]] = select i1 [[CMP]], i16* [[PSRCA]], i16* [[PSRCB]]
+; CHECK-T2-NEXT:    [[SUB:%.*]] = add i32 [[SRCALEN_SRCBLEN]], -1
+; CHECK-T2-NEXT:    [[CMP41080:%.*]] = icmp eq i32 [[SUB]], 0
+; CHECK-T2-NEXT:    br i1 [[CMP41080]], label [[WHILE_END13:%.*]], label [[WHILE_COND5_PREHEADER_PREHEADER:%.*]]
+; CHECK-T2:       while.cond5.preheader.preheader:
+; CHECK-T2-NEXT:    br label [[WHILE_COND5_PREHEADER:%.*]]
+; CHECK-T2:       while.cond5.preheader:
+; CHECK-T2-NEXT:    [[COUNT_01084:%.*]] = phi i32 [ [[INC:%.*]], [[WHILE_END:%.*]] ], [ 1, [[WHILE_COND5_PREHEADER_PREHEADER]] ]
+; CHECK-T2-NEXT:    [[BLOCKSIZE1_01083:%.*]] = phi i32 [ [[DEC12:%.*]], [[WHILE_END]] ], [ [[SUB]], [[WHILE_COND5_PREHEADER_PREHEADER]] ]
+; CHECK-T2-NEXT:    [[PY_01082:%.*]] = phi i16* [ [[ADD_PTR:%.*]], [[WHILE_END]] ], [ [[PSRCA_PSRCB]], [[WHILE_COND5_PREHEADER_PREHEADER]] ]
+; CHECK-T2-NEXT:    [[POUT_01081:%.*]] = phi i16* [ [[INCDEC_PTR11:%.*]], [[WHILE_END]] ], [ [[PDST:%.*]], [[WHILE_COND5_PREHEADER_PREHEADER]] ]
+; CHECK-T2-NEXT:    br label [[WHILE_BODY7:%.*]]
+; CHECK-T2:       while.body7:
+; CHECK-T2-NEXT:    [[K_01078:%.*]] = phi i32 [ [[DEC:%.*]], [[WHILE_BODY7]] ], [ [[COUNT_01084]], [[WHILE_COND5_PREHEADER]] ]
+; CHECK-T2-NEXT:    [[SUM_01077:%.*]] = phi i32 [ [[ADD6_I:%.*]], [[WHILE_BODY7]] ], [ 0, [[WHILE_COND5_PREHEADER]] ]
+; CHECK-T2-NEXT:    [[PY_11076:%.*]] = phi i16* [ [[INCDEC_PTR8:%.*]], [[WHILE_BODY7]] ], [ [[PY_01082]], [[WHILE_COND5_PREHEADER]] ]
+; CHECK-T2-NEXT:    [[PX_11075:%.*]] = phi i16* [ [[INCDEC_PTR:%.*]], [[WHILE_BODY7]] ], [ [[PSRCB_PSRCA]], [[WHILE_COND5_PREHEADER]] ]
+; CHECK-T2-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds i16, i16* [[PX_11075]], i32 1
+; CHECK-T2-NEXT:    [[TMP0:%.*]] = load i16, i16* [[PX_11075]], align 2
+; CHECK-T2-NEXT:    [[CONV:%.*]] = sext i16 [[TMP0]] to i32
+; CHECK-T2-NEXT:    [[INCDEC_PTR8]] = getelementptr inbounds i16, i16* [[PY_11076]], i32 -1
+; CHECK-T2-NEXT:    [[TMP1:%.*]] = load i16, i16* [[PY_11076]], align 2
+; CHECK-T2-NEXT:    [[CONV9:%.*]] = sext i16 [[TMP1]] to i32
+; CHECK-T2-NEXT:    [[MUL_I:%.*]] = mul nsw i32 [[CONV9]], [[CONV]]
+; CHECK-T2-NEXT:    [[SHR3_I:%.*]] = ashr i32 [[CONV]], 16
+; CHECK-T2-NEXT:    [[SHR4_I:%.*]] = ashr i32 [[CONV9]], 16
+; CHECK-T2-NEXT:    [[MUL5_I:%.*]] = mul nsw i32 [[SHR4_I]], [[SHR3_I]]
+; CHECK-T2-NEXT:    [[ADD_I:%.*]] = add i32 [[MUL_I]], [[SUM_01077]]
+; CHECK-T2-NEXT:    [[ADD6_I]] = add i32 [[ADD_I]], [[MUL5_I]]
+; CHECK-T2-NEXT:    [[DEC]] = add nsw i32 [[K_01078]], -1
+; CHECK-T2-NEXT:    [[CMP6:%.*]] = icmp eq i32 [[DEC]], 0
+; CHECK-T2-NEXT:    br i1 [[CMP6]], label [[WHILE_END]], label [[WHILE_BODY7]]
+; CHECK-T2:       while.end:
+; CHECK-T2-NEXT:    [[ADD6_I_LCSSA:%.*]] = phi i32 [ [[ADD6_I]], [[WHILE_BODY7]] ]
+; CHECK-T2-NEXT:    [[TMP2:%.*]] = lshr i32 [[ADD6_I_LCSSA]], 15
+; CHECK-T2-NEXT:    [[CONV10:%.*]] = trunc i32 [[TMP2]] to i16
+; CHECK-T2-NEXT:    [[INCDEC_PTR11]] = getelementptr inbounds i16, i16* [[POUT_01081]], i32 1
+; CHECK-T2-NEXT:    store i16 [[CONV10]], i16* [[POUT_01081]], align 2
+; CHECK-T2-NEXT:    [[ADD_PTR]] = getelementptr inbounds i16, i16* [[PSRCA_PSRCB]], i32 [[COUNT_01084]]
+; CHECK-T2-NEXT:    [[INC]] = add nuw nsw i32 [[COUNT_01084]], 1
+; CHECK-T2-NEXT:    [[DEC12]] = add i32 [[BLOCKSIZE1_01083]], -1
+; CHECK-T2-NEXT:    [[CMP3:%.*]] = icmp ult i32 [[COUNT_01084]], 3
+; CHECK-T2-NEXT:    [[CMP4:%.*]] = icmp ne i32 [[DEC12]], 0
+; CHECK-T2-NEXT:    [[TMP3:%.*]] = and i1 [[CMP4]], [[CMP3]]
+; CHECK-T2-NEXT:    br i1 [[TMP3]], label [[WHILE_COND5_PREHEADER]], label [[WHILE_END13_LOOPEXIT:%.*]]
+; CHECK-T2:       while.end13.loopexit:
+; CHECK-T2-NEXT:    [[INCDEC_PTR11_LCSSA:%.*]] = phi i16* [ [[INCDEC_PTR11]], [[WHILE_END]] ]
+; CHECK-T2-NEXT:    [[ADD_PTR_LCSSA:%.*]] = phi i16* [ [[ADD_PTR]], [[WHILE_END]] ]
+; CHECK-T2-NEXT:    [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[WHILE_END]] ]
+; CHECK-T2-NEXT:    [[DEC12_LCSSA:%.*]] = phi i32 [ [[DEC12]], [[WHILE_END]] ]
+; CHECK-T2-NEXT:    br label [[WHILE_END13]]
+; CHECK-T2:       while.end13:
+; CHECK-T2-NEXT:    [[POUT_0_LCSSA:%.*]] = phi i16* [ [[PDST]], [[ENTRY:%.*]] ], [ [[INCDEC_PTR11_LCSSA]], [[WHILE_END13_LOOPEXIT]] ]
+; CHECK-T2-NEXT:    [[PY_0_LCSSA:%.*]] = phi i16* [ [[PSRCA_PSRCB]], [[ENTRY]] ], [ [[ADD_PTR_LCSSA]], [[WHILE_END13_LOOPEXIT]] ]
+; CHECK-T2-NEXT:    [[BLOCKSIZE1_0_LCSSA:%.*]] = phi i32 [ [[SUB]], [[ENTRY]] ], [ [[DEC12_LCSSA]], [[WHILE_END13_LOOPEXIT]] ]
+; CHECK-T2-NEXT:    [[COUNT_0_LCSSA:%.*]] = phi i32 [ 1, [[ENTRY]] ], [ [[INC_LCSSA]], [[WHILE_END13_LOOPEXIT]] ]
+; CHECK-T2-NEXT:    [[CMP161068:%.*]] = icmp eq i32 [[BLOCKSIZE1_0_LCSSA]], 0
+; CHECK-T2-NEXT:    br i1 [[CMP161068]], label [[EXIT:%.*]], label [[WHILE_BODY18_PREHEADER:%.*]]
+; CHECK-T2:       while.body18.preheader:
+; CHECK-T2-NEXT:    [[ADD_PTR14:%.*]] = getelementptr inbounds i16, i16* [[PY_0_LCSSA]], i32 -1
+; CHECK-T2-NEXT:    br label [[WHILE_BODY18:%.*]]
+; CHECK-T2:       while.body18:
+; CHECK-T2-NEXT:    [[COUNT_11072:%.*]] = phi i32 [ [[INC49:%.*]], [[WHILE_END43:%.*]] ], [ [[COUNT_0_LCSSA]], [[WHILE_BODY18_PREHEADER]] ]
+; CHECK-T2-NEXT:    [[BLOCKSIZE1_11071:%.*]] = phi i32 [ [[DEC50:%.*]], [[WHILE_END43]] ], [ [[BLOCKSIZE1_0_LCSSA]], [[WHILE_BODY18_PREHEADER]] ]
+; CHECK-T2-NEXT:    [[PY_21070:%.*]] = phi i16* [ [[ADD_PTR48:%.*]], [[WHILE_END43]] ], [ [[ADD_PTR14]], [[WHILE_BODY18_PREHEADER]] ]
+; CHECK-T2-NEXT:    [[POUT_11069:%.*]] = phi i16* [ [[INCDEC_PTR46:%.*]], [[WHILE_END43]] ], [ [[POUT_0_LCSSA]], [[WHILE_BODY18_PREHEADER]] ]
+; CHECK-T2-NEXT:    [[SHR19:%.*]] = lshr i32 [[COUNT_11072]], 2
+; CHECK-T2-NEXT:    [[CMP211054:%.*]] = icmp eq i32 [[SHR19]], 0
+; CHECK-T2-NEXT:    br i1 [[CMP211054]], label [[WHILE_END31:%.*]], label [[WHILE_BODY23_PREHEADER:%.*]]
+; CHECK-T2:       while.body23.preheader:
+; CHECK-T2-NEXT:    br label [[WHILE_BODY23:%.*]]
+; CHECK-T2:       while.body23:
+; CHECK-T2-NEXT:    [[K_11058:%.*]] = phi i32 [ [[DEC30:%.*]], [[WHILE_BODY23]] ], [ [[SHR19]], [[WHILE_BODY23_PREHEADER]] ]
+; CHECK-T2-NEXT:    [[SUM_11057:%.*]] = phi i32 [ [[ADD6_I878:%.*]], [[WHILE_BODY23]] ], [ 0, [[WHILE_BODY23_PREHEADER]] ]
+; CHECK-T2-NEXT:    [[PY_31056:%.*]] = phi i16* [ [[ADD_PTR_I884:%.*]], [[WHILE_BODY23]] ], [ [[PY_21070]], [[WHILE_BODY23_PREHEADER]] ]
+; CHECK-T2-NEXT:    [[PX_31055:%.*]] = phi i16* [ [[ADD_PTR_I890:%.*]], [[WHILE_BODY23]] ], [ [[PSRCB_PSRCA]], [[WHILE_BODY23_PREHEADER]] ]
+; CHECK-T2-NEXT:    [[ARRAYIDX_I907:%.*]] = getelementptr inbounds i16, i16* [[PX_31055]], i32 1
+; CHECK-T2-NEXT:    [[TMP4:%.*]] = load i16, i16* [[ARRAYIDX_I907]], align 2
+; CHECK-T2-NEXT:    [[TMP5:%.*]] = load i16, i16* [[PX_31055]], align 2
+; CHECK-T2-NEXT:    [[ADD_PTR_I912:%.*]] = getelementptr inbounds i16, i16* [[PX_31055]], i32 2
+; CHECK-T2-NEXT:    [[ARRAYIDX_I901:%.*]] = getelementptr inbounds i16, i16* [[PY_31056]], i32 1
+; CHECK-T2-NEXT:    [[TMP6:%.*]] = load i16, i16* [[ARRAYIDX_I901]], align 2
+; CHECK-T2-NEXT:    [[TMP7:%.*]] = load i16, i16* [[PY_31056]], align 2
+; CHECK-T2-NEXT:    [[ADD_PTR_I906:%.*]] = getelementptr inbounds i16, i16* [[PY_31056]], i32 -2
+; CHECK-T2-NEXT:    [[SHR_I892:%.*]] = sext i16 [[TMP5]] to i32
+; CHECK-T2-NEXT:    [[SHR1_I893:%.*]] = sext i16 [[TMP6]] to i32
+; CHECK-T2-NEXT:    [[MUL_I894:%.*]] = mul nsw i32 [[SHR1_I893]], [[SHR_I892]]
+; CHECK-T2-NEXT:    [[SHR2_I895:%.*]] = sext i16 [[TMP4]] to i32
+; CHECK-T2-NEXT:    [[SHR4_I897:%.*]] = sext i16 [[TMP7]] to i32
+; CHECK-T2-NEXT:    [[MUL5_I898:%.*]] = mul nsw i32 [[SHR4_I897]], [[SHR2_I895]]
+; CHECK-T2-NEXT:    [[ADD_I899:%.*]] = add i32 [[MUL_I894]], [[SUM_11057]]
+; CHECK-T2-NEXT:    [[ADD6_I900:%.*]] = add i32 [[ADD_I899]], [[MUL5_I898]]
+; CHECK-T2-NEXT:    [[ARRAYIDX_I885:%.*]] = getelementptr inbounds i16, i16* [[PX_31055]], i32 3
+; CHECK-T2-NEXT:    [[TMP8:%.*]] = load i16, i16* [[ARRAYIDX_I885]], align 2
+; CHECK-T2-NEXT:    [[TMP9:%.*]] = load i16, i16* [[ADD_PTR_I912]], align 2
+; CHECK-T2-NEXT:    [[ADD_PTR_I890]] = getelementptr inbounds i16, i16* [[PX_31055]], i32 4
+; CHECK-T2-NEXT:    [[ARRAYIDX_I879:%.*]] = getelementptr inbounds i16, i16* [[PY_31056]], i32 -1
+; CHECK-T2-NEXT:    [[TMP10:%.*]] = load i16, i16* [[ARRAYIDX_I879]], align 2
+; CHECK-T2-NEXT:    [[TMP11:%.*]] = load i16, i16* [[ADD_PTR_I906]], align 2
+; CHECK-T2-NEXT:    [[ADD_PTR_I884]] = getelementptr inbounds i16, i16* [[PY_31056]], i32 -4
+; CHECK-T2-NEXT:    [[SHR_I870:%.*]] = sext i16 [[TMP9]] to i32
+; CHECK-T2-NEXT:    [[SHR1_I871:%.*]] = sext i16 [[TMP10]] to i32
+; CHECK-T2-NEXT:    [[MUL_I872:%.*]] = mul nsw i32 [[SHR1_I871]], [[SHR_I870]]
+; CHECK-T2-NEXT:    [[SHR2_I873:%.*]] = sext i16 [[TMP8]] to i32
+; CHECK-T2-NEXT:    [[SHR4_I875:%.*]] = sext i16 [[TMP11]] to i32
+; CHECK-T2-NEXT:    [[MUL5_I876:%.*]] = mul nsw i32 [[SHR4_I875]], [[SHR2_I873]]
+; CHECK-T2-NEXT:    [[ADD_I877:%.*]] = add i32 [[ADD6_I900]], [[MUL_I872]]
+; CHECK-T2-NEXT:    [[ADD6_I878]] = add i32 [[ADD_I877]], [[MUL5_I876]]
+; CHECK-T2-NEXT:    [[DEC30]] = add nsw i32 [[K_11058]], -1
+; CHECK-T2-NEXT:    [[CMP21:%.*]] = icmp eq i32 [[DEC30]], 0
+; CHECK-T2-NEXT:    br i1 [[CMP21]], label [[WHILE_END31_LOOPEXIT:%.*]], label [[WHILE_BODY23]]
+; CHECK-T2:       while.end31.loopexit:
+; CHECK-T2-NEXT:    [[ADD_PTR_I890_LCSSA:%.*]] = phi i16* [ [[ADD_PTR_I890]], [[WHILE_BODY23]] ]
+; CHECK-T2-NEXT:    [[ADD_PTR_I884_LCSSA:%.*]] = phi i16* [ [[ADD_PTR_I884]], [[WHILE_BODY23]] ]
+; CHECK-T2-NEXT:    [[ADD6_I878_LCSSA:%.*]] = phi i32 [ [[ADD6_I878]], [[WHILE_BODY23]] ]
+; CHECK-T2-NEXT:    br label [[WHILE_END31]]
+; CHECK-T2:       while.end31:
+; CHECK-T2-NEXT:    [[PX_3_LCSSA:%.*]] = phi i16* [ [[PSRCB_PSRCA]], [[WHILE_BODY18]] ], [ [[ADD_PTR_I890_LCSSA]], [[WHILE_END31_LOOPEXIT]] ]
+; CHECK-T2-NEXT:    [[PY_3_LCSSA:%.*]] = phi i16* [ [[PY_21070]], [[WHILE_BODY18]] ], [ [[ADD_PTR_I884_LCSSA]], [[WHILE_END31_LOOPEXIT]] ]
+; CHECK-T2-NEXT:    [[SUM_1_LCSSA:%.*]] = phi i32 [ 0, [[WHILE_BODY18]] ], [ [[ADD6_I878_LCSSA]], [[WHILE_END31_LOOPEXIT]] ]
+; CHECK-T2-NEXT:    [[REM:%.*]] = and i32 [[COUNT_11072]], 3
+; CHECK-T2-NEXT:    [[CMP341062:%.*]] = icmp eq i32 [[REM]], 0
+; CHECK-T2-NEXT:    br i1 [[CMP341062]], label [[WHILE_END43]], label [[WHILE_BODY36_PREHEADER:%.*]]
+; CHECK-T2:       while.body36.preheader:
+; CHECK-T2-NEXT:    [[ADD_PTR32:%.*]] = getelementptr inbounds i16, i16* [[PY_3_LCSSA]], i32 1
+; CHECK-T2-NEXT:    br label [[WHILE_BODY36:%.*]]
+; CHECK-T2:       while.body36:
+; CHECK-T2-NEXT:    [[K_21066:%.*]] = phi i32 [ [[DEC42:%.*]], [[WHILE_BODY36]] ], [ [[REM]], [[WHILE_BODY36_PREHEADER]] ]
+; CHECK-T2-NEXT:    [[SUM_21065:%.*]] = phi i32 [ [[ADD6_I868:%.*]], [[WHILE_BODY36]] ], [ [[SUM_1_LCSSA]], [[WHILE_BODY36_PREHEADER]] ]
+; CHECK-T2-NEXT:    [[PY_41064:%.*]] = phi i16* [ [[INCDEC_PTR39:%.*]], [[WHILE_BODY36]] ], [ [[ADD_PTR32]], [[WHILE_BODY36_PREHEADER]] ]
+; CHECK-T2-NEXT:    [[PX_41063:%.*]] = phi i16* [ [[INCDEC_PTR37:%.*]], [[WHILE_BODY36]] ], [ [[PX_3_LCSSA]], [[WHILE_BODY36_PREHEADER]] ]
+; CHECK-T2-NEXT:    [[INCDEC_PTR37]] = getelementptr inbounds i16, i16* [[PX_41063]], i32 1
+; CHECK-T2-NEXT:    [[TMP12:%.*]] = load i16, i16* [[PX_41063]], align 2
+; CHECK-T2-NEXT:    [[CONV38:%.*]] = sext i16 [[TMP12]] to i32
+; CHECK-T2-NEXT:    [[INCDEC_PTR39]] = getelementptr inbounds i16, i16* [[PY_41064]], i32 -1
+; CHECK-T2-NEXT:    [[TMP13:%.*]] = load i16, i16* [[PY_41064]], align 2
+; CHECK-T2-NEXT:    [[CONV40:%.*]] = sext i16 [[TMP13]] to i32
+; CHECK-T2-NEXT:    [[MUL_I863:%.*]] = mul nsw i32 [[CONV40]], [[CONV38]]
+; CHECK-T2-NEXT:    [[SHR3_I864:%.*]] = ashr i32 [[CONV38]], 16
+; CHECK-T2-NEXT:    [[SHR4_I865:%.*]] = ashr i32 [[CONV40]], 16
+; CHECK-T2-NEXT:    [[MUL5_I866:%.*]] = mul nsw i32 [[SHR4_I865]], [[SHR3_I864]]
+; CHECK-T2-NEXT:    [[ADD_I867:%.*]] = add i32 [[MUL_I863]], [[SUM_21065]]
+; CHECK-T2-NEXT:    [[ADD6_I868]] = add i32 [[ADD_I867]], [[MUL5_I866]]
+; CHECK-T2-NEXT:    [[DEC42]] = add nsw i32 [[K_21066]], -1
+; CHECK-T2-NEXT:    [[CMP34:%.*]] = icmp eq i32 [[DEC42]], 0
+; CHECK-T2-NEXT:    br i1 [[CMP34]], label [[WHILE_END43_LOOPEXIT:%.*]], label [[WHILE_BODY36]]
+; CHECK-T2:       while.end43.loopexit:
+; CHECK-T2-NEXT:    [[ADD6_I868_LCSSA:%.*]] = phi i32 [ [[ADD6_I868]], [[WHILE_BODY36]] ]
+; CHECK-T2-NEXT:    br label [[WHILE_END43]]
+; CHECK-T2:       while.end43:
+; CHECK-T2-NEXT:    [[SUM_2_LCSSA:%.*]] = phi i32 [ [[SUM_1_LCSSA]], [[WHILE_END31]] ], [ [[ADD6_I868_LCSSA]], [[WHILE_END43_LOOPEXIT]] ]
+; CHECK-T2-NEXT:    [[TMP14:%.*]] = lshr i32 [[SUM_2_LCSSA]], 15
+; CHECK-T2-NEXT:    [[CONV45:%.*]] = trunc i32 [[TMP14]] to i16
+; CHECK-T2-NEXT:    [[INCDEC_PTR46]] = getelementptr inbounds i16, i16* [[POUT_11069]], i32 1
+; CHECK-T2-NEXT:    store i16 [[CONV45]], i16* [[POUT_11069]], align 2
+; CHECK-T2-NEXT:    [[SUB47:%.*]] = add i32 [[COUNT_11072]], -1
+; CHECK-T2-NEXT:    [[ADD_PTR48]] = getelementptr inbounds i16, i16* [[PSRCA_PSRCB]], i32 [[SUB47]]
+; CHECK-T2-NEXT:    [[INC49]] = add i32 [[COUNT_11072]], 1
+; CHECK-T2-NEXT:    [[DEC50]] = add i32 [[BLOCKSIZE1_11071]], -1
+; CHECK-T2-NEXT:    [[CMP16:%.*]] = icmp eq i32 [[DEC50]], 0
+; CHECK-T2-NEXT:    br i1 [[CMP16]], label [[EXIT_LOOPEXIT:%.*]], label [[WHILE_BODY18]]
+; CHECK-T2:       exit.loopexit:
+; CHECK-T2-NEXT:    br label [[EXIT]]
+; CHECK-T2:       exit:
+; CHECK-T2-NEXT:    ret void
+;
+entry:
+  %cmp = icmp ult i32 %srcALen, %srcBLen
+  %srcALen.srcBLen = select i1 %cmp, i32 %srcALen, i32 %srcBLen
+  %pSrcB.pSrcA = select i1 %cmp, i16* %pSrcB, i16* %pSrcA
+  %pSrcA.pSrcB = select i1 %cmp, i16* %pSrcA, i16* %pSrcB
+  %sub = add i32 %srcALen.srcBLen, -1
+  %cmp41080 = icmp eq i32 %sub, 0
+  br i1 %cmp41080, label %while.end13, label %while.cond5.preheader
+
+while.cond5.preheader:                            ; preds = %while.end, %entry
+  %count.01084 = phi i32 [ %inc, %while.end ], [ 1, %entry ]
+  %blockSize1.01083 = phi i32 [ %dec12, %while.end ], [ %sub, %entry ]
+  %py.01082 = phi i16* [ %add.ptr, %while.end ], [ %pSrcA.pSrcB, %entry ]
+  %pOut.01081 = phi i16* [ %incdec.ptr11, %while.end ], [ %pDst, %entry ]
+  br label %while.body7
+
+while.body7:                                      ; preds = %while.body7, %while.cond5.preheader
+  %k.01078 = phi i32 [ %dec, %while.body7 ], [ %count.01084, %while.cond5.preheader ]
+  %sum.01077 = phi i32 [ %add6.i, %while.body7 ], [ 0, %while.cond5.preheader ]
+  %py.11076 = phi i16* [ %incdec.ptr8, %while.body7 ], [ %py.01082, %while.cond5.preheader ]
+  %px.11075 = phi i16* [ %incdec.ptr, %while.body7 ], [ %pSrcB.pSrcA, %while.cond5.preheader ]
+  %incdec.ptr = getelementptr inbounds i16, i16* %px.11075, i32 1
+  %0 = load i16, i16* %px.11075, align 2
+  %conv = sext i16 %0 to i32
+  %incdec.ptr8 = getelementptr inbounds i16, i16* %py.11076, i32 -1
+  %1 = load i16, i16* %py.11076, align 2
+  %conv9 = sext i16 %1 to i32
+  %mul.i = mul nsw i32 %conv9, %conv
+  %shr3.i = ashr i32 %conv, 16
+  %shr4.i = ashr i32 %conv9, 16
+  %mul5.i = mul nsw i32 %shr4.i, %shr3.i
+  %add.i = add i32 %mul.i, %sum.01077
+  %add6.i = add i32 %add.i, %mul5.i
+  %dec = add nsw i32 %k.01078, -1
+  %cmp6 = icmp eq i32 %dec, 0
+  br i1 %cmp6, label %while.end, label %while.body7
+
+while.end:                                        ; preds = %while.body7
+  %2 = lshr i32 %add6.i, 15
+  %conv10 = trunc i32 %2 to i16
+  %incdec.ptr11 = getelementptr inbounds i16, i16* %pOut.01081, i32 1
+  store i16 %conv10, i16* %pOut.01081, align 2
+  %add.ptr = getelementptr inbounds i16, i16* %pSrcA.pSrcB, i32 %count.01084
+  %inc = add nuw nsw i32 %count.01084, 1
+  %dec12 = add i32 %blockSize1.01083, -1
+  %cmp3 = icmp ult i32 %count.01084, 3
+  %cmp4 = icmp ne i32 %dec12, 0
+  %3 = and i1 %cmp4, %cmp3
+  br i1 %3, label %while.cond5.preheader, label %while.end13
+
+while.end13:                                      ; preds = %while.end, %entry
+  %pOut.0.lcssa = phi i16* [ %pDst, %entry ], [ %incdec.ptr11, %while.end ]
+  %py.0.lcssa = phi i16* [ %pSrcA.pSrcB, %entry ], [ %add.ptr, %while.end ]
+  %blockSize1.0.lcssa = phi i32 [ %sub, %entry ], [ %dec12, %while.end ]
+  %count.0.lcssa = phi i32 [ 1, %entry ], [ %inc, %while.end ]
+  %cmp161068 = icmp eq i32 %blockSize1.0.lcssa, 0
+  br i1 %cmp161068, label %exit, label %while.body18.preheader
+
+while.body18.preheader:                           ; preds = %while.end13
+  %add.ptr14 = getelementptr inbounds i16, i16* %py.0.lcssa, i32 -1
+  br label %while.body18
+
+while.body18:                                     ; preds = %while.end43, %while.body18.preheader
+  %count.11072 = phi i32 [ %inc49, %while.end43 ], [ %count.0.lcssa, %while.body18.preheader ]
+  %blockSize1.11071 = phi i32 [ %dec50, %while.end43 ], [ %blockSize1.0.lcssa, %while.body18.preheader ]
+  %py.21070 = phi i16* [ %add.ptr48, %while.end43 ], [ %add.ptr14, %while.body18.preheader ]
+  %pOut.11069 = phi i16* [ %incdec.ptr46, %while.end43 ], [ %pOut.0.lcssa, %while.body18.preheader ]
+  %shr19 = lshr i32 %count.11072, 2
+  %cmp211054 = icmp eq i32 %shr19, 0
+  br i1 %cmp211054, label %while.end31, label %while.body23
+
+while.body23:                                     ; preds = %while.body23, %while.body18
+  %k.11058 = phi i32 [ %dec30, %while.body23 ], [ %shr19, %while.body18 ]
+  %sum.11057 = phi i32 [ %add6.i878, %while.body23 ], [ 0, %while.body18 ]
+  %py.31056 = phi i16* [ %add.ptr.i884, %while.body23 ], [ %py.21070, %while.body18 ]
+  %px.31055 = phi i16* [ %add.ptr.i890, %while.body23 ], [ %pSrcB.pSrcA, %while.body18 ]
+  %arrayidx.i907 = getelementptr inbounds i16, i16* %px.31055, i32 1
+  %4 = load i16, i16* %arrayidx.i907, align 2
+  %5 = load i16, i16* %px.31055, align 2
+  %add.ptr.i912 = getelementptr inbounds i16, i16* %px.31055, i32 2
+  %arrayidx.i901 = getelementptr inbounds i16, i16* %py.31056, i32 1
+  %6 = load i16, i16* %arrayidx.i901, align 2
+  %7 = load i16, i16* %py.31056, align 2
+  %add.ptr.i906 = getelementptr inbounds i16, i16* %py.31056, i32 -2
+  %shr.i892 = sext i16 %5 to i32
+  %shr1.i893 = sext i16 %6 to i32
+  %mul.i894 = mul nsw i32 %shr1.i893, %shr.i892
+  %shr2.i895 = sext i16 %4 to i32
+  %shr4.i897 = sext i16 %7 to i32
+  %mul5.i898 = mul nsw i32 %shr4.i897, %shr2.i895
+  %add.i899 = add i32 %mul.i894, %sum.11057
+  %add6.i900 = add i32 %add.i899, %mul5.i898
+  %arrayidx.i885 = getelementptr inbounds i16, i16* %px.31055, i32 3
+  %8 = load i16, i16* %arrayidx.i885, align 2
+  %9 = load i16, i16* %add.ptr.i912, align 2
+  %add.ptr.i890 = getelementptr inbounds i16, i16* %px.31055, i32 4
+  %arrayidx.i879 = getelementptr inbounds i16, i16* %py.31056, i32 -1
+  %10 = load i16, i16* %arrayidx.i879, align 2
+  %11 = load i16, i16* %add.ptr.i906, align 2
+  %add.ptr.i884 = getelementptr inbounds i16, i16* %py.31056, i32 -4
+  %shr.i870 = sext i16 %9 to i32
+  %shr1.i871 = sext i16 %10 to i32
+  %mul.i872 = mul nsw i32 %shr1.i871, %shr.i870
+  %shr2.i873 = sext i16 %8 to i32
+  %shr4.i875 = sext i16 %11 to i32
+  %mul5.i876 = mul nsw i32 %shr4.i875, %shr2.i873
+  %add.i877 = add i32 %add6.i900, %mul.i872
+  %add6.i878 = add i32 %add.i877, %mul5.i876
+  %dec30 = add nsw i32 %k.11058, -1
+  %cmp21 = icmp eq i32 %dec30, 0
+  br i1 %cmp21, label %while.end31, label %while.body23
+
+while.end31:                                      ; preds = %while.body23, %while.body18
+  %px.3.lcssa = phi i16* [ %pSrcB.pSrcA, %while.body18 ], [ %add.ptr.i890, %while.body23 ]
+  %py.3.lcssa = phi i16* [ %py.21070, %while.body18 ], [ %add.ptr.i884, %while.body23 ]
+  %sum.1.lcssa = phi i32 [ 0, %while.body18 ], [ %add6.i878, %while.body23 ]
+  %rem = and i32 %count.11072, 3
+  %cmp341062 = icmp eq i32 %rem, 0
+  br i1 %cmp341062, label %while.end43, label %while.body36.preheader
+
+while.body36.preheader:                           ; preds = %while.end31
+  %add.ptr32 = getelementptr inbounds i16, i16* %py.3.lcssa, i32 1
+  br label %while.body36
+
+while.body36:                                     ; preds = %while.body36, %while.body36.preheader
+  %k.21066 = phi i32 [ %dec42, %while.body36 ], [ %rem, %while.body36.preheader ]
+  %sum.21065 = phi i32 [ %add6.i868, %while.body36 ], [ %sum.1.lcssa, %while.body36.preheader ]
+  %py.41064 = phi i16* [ %incdec.ptr39, %while.body36 ], [ %add.ptr32, %while.body36.preheader ]
+  %px.41063 = phi i16* [ %incdec.ptr37, %while.body36 ], [ %px.3.lcssa, %while.body36.preheader ]
+  %incdec.ptr37 = getelementptr inbounds i16, i16* %px.41063, i32 1
+  %12 = load i16, i16* %px.41063, align 2
+  %conv38 = sext i16 %12 to i32
+  %incdec.ptr39 = getelementptr inbounds i16, i16* %py.41064, i32 -1
+  %13 = load i16, i16* %py.41064, align 2
+  %conv40 = sext i16 %13 to i32
+  %mul.i863 = mul nsw i32 %conv40, %conv38
+  %shr3.i864 = ashr i32 %conv38, 16
+  %shr4.i865 = ashr i32 %conv40, 16
+  %mul5.i866 = mul nsw i32 %shr4.i865, %shr3.i864
+  %add.i867 = add i32 %mul.i863, %sum.21065
+  %add6.i868 = add i32 %add.i867, %mul5.i866
+  %dec42 = add nsw i32 %k.21066, -1
+  %cmp34 = icmp eq i32 %dec42, 0
+  br i1 %cmp34, label %while.end43, label %while.body36
+
+while.end43:                                      ; preds = %while.body36, %while.end31
+  %sum.2.lcssa = phi i32 [ %sum.1.lcssa, %while.end31 ], [ %add6.i868, %while.body36 ]
+  %14 = lshr i32 %sum.2.lcssa, 15
+  %conv45 = trunc i32 %14 to i16
+  %incdec.ptr46 = getelementptr inbounds i16, i16* %pOut.11069, i32 1
+  store i16 %conv45, i16* %pOut.11069, align 2
+  %sub47 = add i32 %count.11072, -1
+  %add.ptr48 = getelementptr inbounds i16, i16* %pSrcA.pSrcB, i32 %sub47
+  %inc49 = add i32 %count.11072, 1
+  %dec50 = add i32 %blockSize1.11071, -1
+  %cmp16 = icmp eq i32 %dec50, 0
+  br i1 %cmp16, label %exit, label %while.body18
+
+exit:                                             ; preds = %while.end43, %while.end13
+  ret void
+}

diff  --git a/llvm/test/CodeGen/ARM/indvar-unroll-imm-cost.ll b/llvm/test/CodeGen/ARM/indvar-unroll-imm-cost.ll
new file mode 100644
index 000000000000..36749a03553e
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/indvar-unroll-imm-cost.ll
@@ -0,0 +1,578 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -indvars -loop-unroll -mtriple=thumbv8m.main %s -S -o - | FileCheck %s
+
+define dso_local arm_aapcscc void @test(i32* nocapture %pDest, i16* nocapture readonly %pSrcA, i16* nocapture readonly %pSrcB, i32 %blkCnt) local_unnamed_addr #0 {
+; CHECK-LABEL: @test(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP88:%.*]] = icmp eq i32 [[BLKCNT:%.*]], 0
+; CHECK-NEXT:    br i1 [[CMP88]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]]
+; CHECK:       for.body.preheader:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.cond.cleanup.loopexit:
+; CHECK-NEXT:    br label [[FOR_COND_CLEANUP]]
+; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    ret void
+; CHECK:       for.body:
+; CHECK-NEXT:    [[I_092:%.*]] = phi i32 [ [[INC42:%.*]], [[FOR_END40:%.*]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[PDEST_ADDR_091:%.*]] = phi i32* [ [[PDEST_ADDR_2_LCSSA:%.*]], [[FOR_END40]] ], [ [[PDEST:%.*]], [[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[PSRCA_ADDR_090:%.*]] = phi i16* [ [[PSRCA_ADDR_2_LCSSA:%.*]], [[FOR_END40]] ], [ [[PSRCA:%.*]], [[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[PSRCB_ADDR_089:%.*]] = phi i16* [ [[PSRCB_ADDR_2_LCSSA:%.*]], [[FOR_END40]] ], [ [[PSRCB:%.*]], [[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i32 [[I_092]], 2
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[TMP0]], -1
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr i32 [[TMP1]], 2
+; CHECK-NEXT:    [[TMP3:%.*]] = add nuw nsw i32 [[TMP2]], 1
+; CHECK-NEXT:    [[TMP4:%.*]] = lshr i32 [[I_092]], 2
+; CHECK-NEXT:    [[TMP5:%.*]] = add nuw nsw i32 [[TMP4]], 3
+; CHECK-NEXT:    [[TMP6:%.*]] = and i32 [[TMP5]], 2147483644
+; CHECK-NEXT:    [[CMP272:%.*]] = icmp eq i32 [[TMP4]], 0
+; CHECK-NEXT:    br i1 [[CMP272]], label [[FOR_END:%.*]], label [[FOR_BODY3_PREHEADER:%.*]]
+; CHECK:       for.body3.preheader:
+; CHECK-NEXT:    [[XTRAITER:%.*]] = and i32 [[TMP3]], 3
+; CHECK-NEXT:    [[TMP7:%.*]] = icmp ult i32 [[TMP2]], 3
+; CHECK-NEXT:    br i1 [[TMP7]], label [[FOR_END_LOOPEXIT_UNR_LCSSA:%.*]], label [[FOR_BODY3_PREHEADER_NEW:%.*]]
+; CHECK:       for.body3.preheader.new:
+; CHECK-NEXT:    [[UNROLL_ITER:%.*]] = sub i32 [[TMP3]], [[XTRAITER]]
+; CHECK-NEXT:    br label [[FOR_BODY3:%.*]]
+; CHECK:       for.body3:
+; CHECK-NEXT:    [[J_076:%.*]] = phi i32 [ 0, [[FOR_BODY3_PREHEADER_NEW]] ], [ [[ADD24_3:%.*]], [[FOR_BODY3]] ]
+; CHECK-NEXT:    [[PDEST_ADDR_175:%.*]] = phi i32* [ [[PDEST_ADDR_091]], [[FOR_BODY3_PREHEADER_NEW]] ], [ [[INCDEC_PTR_3:%.*]], [[FOR_BODY3]] ]
+; CHECK-NEXT:    [[PSRCA_ADDR_174:%.*]] = phi i16* [ [[PSRCA_ADDR_090]], [[FOR_BODY3_PREHEADER_NEW]] ], [ [[ADD_PTR_3:%.*]], [[FOR_BODY3]] ]
+; CHECK-NEXT:    [[PSRCB_ADDR_173:%.*]] = phi i16* [ [[PSRCB_ADDR_089]], [[FOR_BODY3_PREHEADER_NEW]] ], [ [[ADD_PTR23_3:%.*]], [[FOR_BODY3]] ]
+; CHECK-NEXT:    [[NITER:%.*]] = phi i32 [ [[UNROLL_ITER]], [[FOR_BODY3_PREHEADER_NEW]] ], [ [[NITER_NSUB_3:%.*]], [[FOR_BODY3]] ]
+; CHECK-NEXT:    [[TMP8:%.*]] = load i16, i16* [[PSRCA_ADDR_174]], align 2
+; CHECK-NEXT:    [[CONV:%.*]] = sext i16 [[TMP8]] to i32
+; CHECK-NEXT:    [[TMP9:%.*]] = load i16, i16* [[PSRCB_ADDR_173]], align 2
+; CHECK-NEXT:    [[CONV5:%.*]] = sext i16 [[TMP9]] to i32
+; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[CONV5]], [[CONV]]
+; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, i16* [[PSRCA_ADDR_174]], i32 1
+; CHECK-NEXT:    [[TMP10:%.*]] = load i16, i16* [[ARRAYIDX6]], align 2
+; CHECK-NEXT:    [[CONV7:%.*]] = sext i16 [[TMP10]] to i32
+; CHECK-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds i16, i16* [[PSRCB_ADDR_173]], i32 1
+; CHECK-NEXT:    [[TMP11:%.*]] = load i16, i16* [[ARRAYIDX8]], align 2
+; CHECK-NEXT:    [[CONV9:%.*]] = sext i16 [[TMP11]] to i32
+; CHECK-NEXT:    [[MUL10:%.*]] = mul nsw i32 [[CONV9]], [[CONV7]]
+; CHECK-NEXT:    [[ARRAYIDX11:%.*]] = getelementptr inbounds i16, i16* [[PSRCA_ADDR_174]], i32 2
+; CHECK-NEXT:    [[TMP12:%.*]] = load i16, i16* [[ARRAYIDX11]], align 2
+; CHECK-NEXT:    [[CONV12:%.*]] = sext i16 [[TMP12]] to i32
+; CHECK-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds i16, i16* [[PSRCB_ADDR_173]], i32 3
+; CHECK-NEXT:    [[TMP13:%.*]] = load i16, i16* [[ARRAYIDX13]], align 2
+; CHECK-NEXT:    [[CONV14:%.*]] = sext i16 [[TMP13]] to i32
+; CHECK-NEXT:    [[MUL15:%.*]] = mul nsw i32 [[CONV14]], [[CONV12]]
+; CHECK-NEXT:    [[ARRAYIDX17:%.*]] = getelementptr inbounds i16, i16* [[PSRCA_ADDR_174]], i32 3
+; CHECK-NEXT:    [[TMP14:%.*]] = load i16, i16* [[ARRAYIDX17]], align 2
+; CHECK-NEXT:    [[CONV18:%.*]] = sext i16 [[TMP14]] to i32
+; CHECK-NEXT:    [[ADD21:%.*]] = add i32 [[MUL10]], [[MUL]]
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[ADD21]], [[CONV14]]
+; CHECK-NEXT:    [[ADD16:%.*]] = add i32 [[ADD]], [[MUL15]]
+; CHECK-NEXT:    [[ADD22:%.*]] = add i32 [[ADD16]], [[CONV18]]
+; CHECK-NEXT:    store i32 [[ADD22]], i32* [[PDEST_ADDR_175]], align 4
+; CHECK-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds i16, i16* [[PSRCA_ADDR_174]], i32 4
+; CHECK-NEXT:    [[ADD_PTR23:%.*]] = getelementptr inbounds i16, i16* [[PSRCB_ADDR_173]], i32 4
+; CHECK-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, i32* [[PDEST_ADDR_175]], i32 1
+; CHECK-NEXT:    [[ADD24:%.*]] = add nuw nsw i32 [[J_076]], 4
+; CHECK-NEXT:    [[NITER_NSUB:%.*]] = sub i32 [[NITER]], 1
+; CHECK-NEXT:    [[TMP15:%.*]] = load i16, i16* [[ADD_PTR]], align 2
+; CHECK-NEXT:    [[CONV_1:%.*]] = sext i16 [[TMP15]] to i32
+; CHECK-NEXT:    [[TMP16:%.*]] = load i16, i16* [[ADD_PTR23]], align 2
+; CHECK-NEXT:    [[CONV5_1:%.*]] = sext i16 [[TMP16]] to i32
+; CHECK-NEXT:    [[MUL_1:%.*]] = mul nsw i32 [[CONV5_1]], [[CONV_1]]
+; CHECK-NEXT:    [[ARRAYIDX6_1:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR]], i32 1
+; CHECK-NEXT:    [[TMP17:%.*]] = load i16, i16* [[ARRAYIDX6_1]], align 2
+; CHECK-NEXT:    [[CONV7_1:%.*]] = sext i16 [[TMP17]] to i32
+; CHECK-NEXT:    [[ARRAYIDX8_1:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR23]], i32 1
+; CHECK-NEXT:    [[TMP18:%.*]] = load i16, i16* [[ARRAYIDX8_1]], align 2
+; CHECK-NEXT:    [[CONV9_1:%.*]] = sext i16 [[TMP18]] to i32
+; CHECK-NEXT:    [[MUL10_1:%.*]] = mul nsw i32 [[CONV9_1]], [[CONV7_1]]
+; CHECK-NEXT:    [[ARRAYIDX11_1:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR]], i32 2
+; CHECK-NEXT:    [[TMP19:%.*]] = load i16, i16* [[ARRAYIDX11_1]], align 2
+; CHECK-NEXT:    [[CONV12_1:%.*]] = sext i16 [[TMP19]] to i32
+; CHECK-NEXT:    [[ARRAYIDX13_1:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR23]], i32 3
+; CHECK-NEXT:    [[TMP20:%.*]] = load i16, i16* [[ARRAYIDX13_1]], align 2
+; CHECK-NEXT:    [[CONV14_1:%.*]] = sext i16 [[TMP20]] to i32
+; CHECK-NEXT:    [[MUL15_1:%.*]] = mul nsw i32 [[CONV14_1]], [[CONV12_1]]
+; CHECK-NEXT:    [[ARRAYIDX17_1:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR]], i32 3
+; CHECK-NEXT:    [[TMP21:%.*]] = load i16, i16* [[ARRAYIDX17_1]], align 2
+; CHECK-NEXT:    [[CONV18_1:%.*]] = sext i16 [[TMP21]] to i32
+; CHECK-NEXT:    [[ADD21_1:%.*]] = add i32 [[MUL10_1]], [[MUL_1]]
+; CHECK-NEXT:    [[ADD_1:%.*]] = add i32 [[ADD21_1]], [[CONV14_1]]
+; CHECK-NEXT:    [[ADD16_1:%.*]] = add i32 [[ADD_1]], [[MUL15_1]]
+; CHECK-NEXT:    [[ADD22_1:%.*]] = add i32 [[ADD16_1]], [[CONV18_1]]
+; CHECK-NEXT:    store i32 [[ADD22_1]], i32* [[INCDEC_PTR]], align 4
+; CHECK-NEXT:    [[ADD_PTR_1:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR]], i32 4
+; CHECK-NEXT:    [[ADD_PTR23_1:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR23]], i32 4
+; CHECK-NEXT:    [[INCDEC_PTR_1:%.*]] = getelementptr inbounds i32, i32* [[INCDEC_PTR]], i32 1
+; CHECK-NEXT:    [[ADD24_1:%.*]] = add nuw nsw i32 [[ADD24]], 4
+; CHECK-NEXT:    [[NITER_NSUB_1:%.*]] = sub i32 [[NITER_NSUB]], 1
+; CHECK-NEXT:    [[TMP22:%.*]] = load i16, i16* [[ADD_PTR_1]], align 2
+; CHECK-NEXT:    [[CONV_2:%.*]] = sext i16 [[TMP22]] to i32
+; CHECK-NEXT:    [[TMP23:%.*]] = load i16, i16* [[ADD_PTR23_1]], align 2
+; CHECK-NEXT:    [[CONV5_2:%.*]] = sext i16 [[TMP23]] to i32
+; CHECK-NEXT:    [[MUL_2:%.*]] = mul nsw i32 [[CONV5_2]], [[CONV_2]]
+; CHECK-NEXT:    [[ARRAYIDX6_2:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_1]], i32 1
+; CHECK-NEXT:    [[TMP24:%.*]] = load i16, i16* [[ARRAYIDX6_2]], align 2
+; CHECK-NEXT:    [[CONV7_2:%.*]] = sext i16 [[TMP24]] to i32
+; CHECK-NEXT:    [[ARRAYIDX8_2:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR23_1]], i32 1
+; CHECK-NEXT:    [[TMP25:%.*]] = load i16, i16* [[ARRAYIDX8_2]], align 2
+; CHECK-NEXT:    [[CONV9_2:%.*]] = sext i16 [[TMP25]] to i32
+; CHECK-NEXT:    [[MUL10_2:%.*]] = mul nsw i32 [[CONV9_2]], [[CONV7_2]]
+; CHECK-NEXT:    [[ARRAYIDX11_2:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_1]], i32 2
+; CHECK-NEXT:    [[TMP26:%.*]] = load i16, i16* [[ARRAYIDX11_2]], align 2
+; CHECK-NEXT:    [[CONV12_2:%.*]] = sext i16 [[TMP26]] to i32
+; CHECK-NEXT:    [[ARRAYIDX13_2:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR23_1]], i32 3
+; CHECK-NEXT:    [[TMP27:%.*]] = load i16, i16* [[ARRAYIDX13_2]], align 2
+; CHECK-NEXT:    [[CONV14_2:%.*]] = sext i16 [[TMP27]] to i32
+; CHECK-NEXT:    [[MUL15_2:%.*]] = mul nsw i32 [[CONV14_2]], [[CONV12_2]]
+; CHECK-NEXT:    [[ARRAYIDX17_2:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_1]], i32 3
+; CHECK-NEXT:    [[TMP28:%.*]] = load i16, i16* [[ARRAYIDX17_2]], align 2
+; CHECK-NEXT:    [[CONV18_2:%.*]] = sext i16 [[TMP28]] to i32
+; CHECK-NEXT:    [[ADD21_2:%.*]] = add i32 [[MUL10_2]], [[MUL_2]]
+; CHECK-NEXT:    [[ADD_2:%.*]] = add i32 [[ADD21_2]], [[CONV14_2]]
+; CHECK-NEXT:    [[ADD16_2:%.*]] = add i32 [[ADD_2]], [[MUL15_2]]
+; CHECK-NEXT:    [[ADD22_2:%.*]] = add i32 [[ADD16_2]], [[CONV18_2]]
+; CHECK-NEXT:    store i32 [[ADD22_2]], i32* [[INCDEC_PTR_1]], align 4
+; CHECK-NEXT:    [[ADD_PTR_2:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_1]], i32 4
+; CHECK-NEXT:    [[ADD_PTR23_2:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR23_1]], i32 4
+; CHECK-NEXT:    [[INCDEC_PTR_2:%.*]] = getelementptr inbounds i32, i32* [[INCDEC_PTR_1]], i32 1
+; CHECK-NEXT:    [[ADD24_2:%.*]] = add nuw nsw i32 [[ADD24_1]], 4
+; CHECK-NEXT:    [[NITER_NSUB_2:%.*]] = sub i32 [[NITER_NSUB_1]], 1
+; CHECK-NEXT:    [[TMP29:%.*]] = load i16, i16* [[ADD_PTR_2]], align 2
+; CHECK-NEXT:    [[CONV_3:%.*]] = sext i16 [[TMP29]] to i32
+; CHECK-NEXT:    [[TMP30:%.*]] = load i16, i16* [[ADD_PTR23_2]], align 2
+; CHECK-NEXT:    [[CONV5_3:%.*]] = sext i16 [[TMP30]] to i32
+; CHECK-NEXT:    [[MUL_3:%.*]] = mul nsw i32 [[CONV5_3]], [[CONV_3]]
+; CHECK-NEXT:    [[ARRAYIDX6_3:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_2]], i32 1
+; CHECK-NEXT:    [[TMP31:%.*]] = load i16, i16* [[ARRAYIDX6_3]], align 2
+; CHECK-NEXT:    [[CONV7_3:%.*]] = sext i16 [[TMP31]] to i32
+; CHECK-NEXT:    [[ARRAYIDX8_3:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR23_2]], i32 1
+; CHECK-NEXT:    [[TMP32:%.*]] = load i16, i16* [[ARRAYIDX8_3]], align 2
+; CHECK-NEXT:    [[CONV9_3:%.*]] = sext i16 [[TMP32]] to i32
+; CHECK-NEXT:    [[MUL10_3:%.*]] = mul nsw i32 [[CONV9_3]], [[CONV7_3]]
+; CHECK-NEXT:    [[ARRAYIDX11_3:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_2]], i32 2
+; CHECK-NEXT:    [[TMP33:%.*]] = load i16, i16* [[ARRAYIDX11_3]], align 2
+; CHECK-NEXT:    [[CONV12_3:%.*]] = sext i16 [[TMP33]] to i32
+; CHECK-NEXT:    [[ARRAYIDX13_3:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR23_2]], i32 3
+; CHECK-NEXT:    [[TMP34:%.*]] = load i16, i16* [[ARRAYIDX13_3]], align 2
+; CHECK-NEXT:    [[CONV14_3:%.*]] = sext i16 [[TMP34]] to i32
+; CHECK-NEXT:    [[MUL15_3:%.*]] = mul nsw i32 [[CONV14_3]], [[CONV12_3]]
+; CHECK-NEXT:    [[ARRAYIDX17_3:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_2]], i32 3
+; CHECK-NEXT:    [[TMP35:%.*]] = load i16, i16* [[ARRAYIDX17_3]], align 2
+; CHECK-NEXT:    [[CONV18_3:%.*]] = sext i16 [[TMP35]] to i32
+; CHECK-NEXT:    [[ADD21_3:%.*]] = add i32 [[MUL10_3]], [[MUL_3]]
+; CHECK-NEXT:    [[ADD_3:%.*]] = add i32 [[ADD21_3]], [[CONV14_3]]
+; CHECK-NEXT:    [[ADD16_3:%.*]] = add i32 [[ADD_3]], [[MUL15_3]]
+; CHECK-NEXT:    [[ADD22_3:%.*]] = add i32 [[ADD16_3]], [[CONV18_3]]
+; CHECK-NEXT:    store i32 [[ADD22_3]], i32* [[INCDEC_PTR_2]], align 4
+; CHECK-NEXT:    [[ADD_PTR_3]] = getelementptr inbounds i16, i16* [[ADD_PTR_2]], i32 4
+; CHECK-NEXT:    [[ADD_PTR23_3]] = getelementptr inbounds i16, i16* [[ADD_PTR23_2]], i32 4
+; CHECK-NEXT:    [[INCDEC_PTR_3]] = getelementptr inbounds i32, i32* [[INCDEC_PTR_2]], i32 1
+; CHECK-NEXT:    [[ADD24_3]] = add nuw nsw i32 [[ADD24_2]], 4
+; CHECK-NEXT:    [[NITER_NSUB_3]] = sub i32 [[NITER_NSUB_2]], 1
+; CHECK-NEXT:    [[NITER_NCMP_3:%.*]] = icmp ne i32 [[NITER_NSUB_3]], 0
+; CHECK-NEXT:    br i1 [[NITER_NCMP_3]], label [[FOR_BODY3]], label [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT:%.*]]
+; CHECK:       for.end.loopexit.unr-lcssa.loopexit:
+; CHECK-NEXT:    [[ADD_PTR_LCSSA_PH_PH:%.*]] = phi i16* [ [[ADD_PTR_3]], [[FOR_BODY3]] ]
+; CHECK-NEXT:    [[ADD_PTR23_LCSSA_PH_PH:%.*]] = phi i16* [ [[ADD_PTR23_3]], [[FOR_BODY3]] ]
+; CHECK-NEXT:    [[INCDEC_PTR_LCSSA_PH_PH:%.*]] = phi i32* [ [[INCDEC_PTR_3]], [[FOR_BODY3]] ]
+; CHECK-NEXT:    [[J_076_UNR_PH:%.*]] = phi i32 [ [[ADD24_3]], [[FOR_BODY3]] ]
+; CHECK-NEXT:    [[PDEST_ADDR_175_UNR_PH:%.*]] = phi i32* [ [[INCDEC_PTR_3]], [[FOR_BODY3]] ]
+; CHECK-NEXT:    [[PSRCA_ADDR_174_UNR_PH:%.*]] = phi i16* [ [[ADD_PTR_3]], [[FOR_BODY3]] ]
+; CHECK-NEXT:    [[PSRCB_ADDR_173_UNR_PH:%.*]] = phi i16* [ [[ADD_PTR23_3]], [[FOR_BODY3]] ]
+; CHECK-NEXT:    br label [[FOR_END_LOOPEXIT_UNR_LCSSA]]
+; CHECK:       for.end.loopexit.unr-lcssa:
+; CHECK-NEXT:    [[ADD_PTR_LCSSA_PH:%.*]] = phi i16* [ undef, [[FOR_BODY3_PREHEADER]] ], [ [[ADD_PTR_LCSSA_PH_PH]], [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ]
+; CHECK-NEXT:    [[ADD_PTR23_LCSSA_PH:%.*]] = phi i16* [ undef, [[FOR_BODY3_PREHEADER]] ], [ [[ADD_PTR23_LCSSA_PH_PH]], [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ]
+; CHECK-NEXT:    [[INCDEC_PTR_LCSSA_PH:%.*]] = phi i32* [ undef, [[FOR_BODY3_PREHEADER]] ], [ [[INCDEC_PTR_LCSSA_PH_PH]], [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ]
+; CHECK-NEXT:    [[J_076_UNR:%.*]] = phi i32 [ 0, [[FOR_BODY3_PREHEADER]] ], [ [[J_076_UNR_PH]], [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ]
+; CHECK-NEXT:    [[PDEST_ADDR_175_UNR:%.*]] = phi i32* [ [[PDEST_ADDR_091]], [[FOR_BODY3_PREHEADER]] ], [ [[PDEST_ADDR_175_UNR_PH]], [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ]
+; CHECK-NEXT:    [[PSRCA_ADDR_174_UNR:%.*]] = phi i16* [ [[PSRCA_ADDR_090]], [[FOR_BODY3_PREHEADER]] ], [ [[PSRCA_ADDR_174_UNR_PH]], [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ]
+; CHECK-NEXT:    [[PSRCB_ADDR_173_UNR:%.*]] = phi i16* [ [[PSRCB_ADDR_089]], [[FOR_BODY3_PREHEADER]] ], [ [[PSRCB_ADDR_173_UNR_PH]], [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ]
+; CHECK-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i32 [[XTRAITER]], 0
+; CHECK-NEXT:    br i1 [[LCMP_MOD]], label [[FOR_BODY3_EPIL_PREHEADER:%.*]], label [[FOR_END_LOOPEXIT:%.*]]
+; CHECK:       for.body3.epil.preheader:
+; CHECK-NEXT:    br label [[FOR_BODY3_EPIL:%.*]]
+; CHECK:       for.body3.epil:
+; CHECK-NEXT:    [[TMP36:%.*]] = load i16, i16* [[PSRCA_ADDR_174_UNR]], align 2
+; CHECK-NEXT:    [[CONV_EPIL:%.*]] = sext i16 [[TMP36]] to i32
+; CHECK-NEXT:    [[TMP37:%.*]] = load i16, i16* [[PSRCB_ADDR_173_UNR]], align 2
+; CHECK-NEXT:    [[CONV5_EPIL:%.*]] = sext i16 [[TMP37]] to i32
+; CHECK-NEXT:    [[MUL_EPIL:%.*]] = mul nsw i32 [[CONV5_EPIL]], [[CONV_EPIL]]
+; CHECK-NEXT:    [[ARRAYIDX6_EPIL:%.*]] = getelementptr inbounds i16, i16* [[PSRCA_ADDR_174_UNR]], i32 1
+; CHECK-NEXT:    [[TMP38:%.*]] = load i16, i16* [[ARRAYIDX6_EPIL]], align 2
+; CHECK-NEXT:    [[CONV7_EPIL:%.*]] = sext i16 [[TMP38]] to i32
+; CHECK-NEXT:    [[ARRAYIDX8_EPIL:%.*]] = getelementptr inbounds i16, i16* [[PSRCB_ADDR_173_UNR]], i32 1
+; CHECK-NEXT:    [[TMP39:%.*]] = load i16, i16* [[ARRAYIDX8_EPIL]], align 2
+; CHECK-NEXT:    [[CONV9_EPIL:%.*]] = sext i16 [[TMP39]] to i32
+; CHECK-NEXT:    [[MUL10_EPIL:%.*]] = mul nsw i32 [[CONV9_EPIL]], [[CONV7_EPIL]]
+; CHECK-NEXT:    [[ARRAYIDX11_EPIL:%.*]] = getelementptr inbounds i16, i16* [[PSRCA_ADDR_174_UNR]], i32 2
+; CHECK-NEXT:    [[TMP40:%.*]] = load i16, i16* [[ARRAYIDX11_EPIL]], align 2
+; CHECK-NEXT:    [[CONV12_EPIL:%.*]] = sext i16 [[TMP40]] to i32
+; CHECK-NEXT:    [[ARRAYIDX13_EPIL:%.*]] = getelementptr inbounds i16, i16* [[PSRCB_ADDR_173_UNR]], i32 3
+; CHECK-NEXT:    [[TMP41:%.*]] = load i16, i16* [[ARRAYIDX13_EPIL]], align 2
+; CHECK-NEXT:    [[CONV14_EPIL:%.*]] = sext i16 [[TMP41]] to i32
+; CHECK-NEXT:    [[MUL15_EPIL:%.*]] = mul nsw i32 [[CONV14_EPIL]], [[CONV12_EPIL]]
+; CHECK-NEXT:    [[ARRAYIDX17_EPIL:%.*]] = getelementptr inbounds i16, i16* [[PSRCA_ADDR_174_UNR]], i32 3
+; CHECK-NEXT:    [[TMP42:%.*]] = load i16, i16* [[ARRAYIDX17_EPIL]], align 2
+; CHECK-NEXT:    [[CONV18_EPIL:%.*]] = sext i16 [[TMP42]] to i32
+; CHECK-NEXT:    [[ADD21_EPIL:%.*]] = add i32 [[MUL10_EPIL]], [[MUL_EPIL]]
+; CHECK-NEXT:    [[ADD_EPIL:%.*]] = add i32 [[ADD21_EPIL]], [[CONV14_EPIL]]
+; CHECK-NEXT:    [[ADD16_EPIL:%.*]] = add i32 [[ADD_EPIL]], [[MUL15_EPIL]]
+; CHECK-NEXT:    [[ADD22_EPIL:%.*]] = add i32 [[ADD16_EPIL]], [[CONV18_EPIL]]
+; CHECK-NEXT:    store i32 [[ADD22_EPIL]], i32* [[PDEST_ADDR_175_UNR]], align 4
+; CHECK-NEXT:    [[ADD_PTR_EPIL:%.*]] = getelementptr inbounds i16, i16* [[PSRCA_ADDR_174_UNR]], i32 4
+; CHECK-NEXT:    [[ADD_PTR23_EPIL:%.*]] = getelementptr inbounds i16, i16* [[PSRCB_ADDR_173_UNR]], i32 4
+; CHECK-NEXT:    [[INCDEC_PTR_EPIL:%.*]] = getelementptr inbounds i32, i32* [[PDEST_ADDR_175_UNR]], i32 1
+; CHECK-NEXT:    [[ADD24_EPIL:%.*]] = add nuw nsw i32 [[J_076_UNR]], 4
+; CHECK-NEXT:    [[EPIL_ITER_SUB:%.*]] = sub i32 [[XTRAITER]], 1
+; CHECK-NEXT:    [[EPIL_ITER_CMP:%.*]] = icmp ne i32 [[EPIL_ITER_SUB]], 0
+; CHECK-NEXT:    br i1 [[EPIL_ITER_CMP]], label [[FOR_BODY3_EPIL_1:%.*]], label [[FOR_END_LOOPEXIT_EPILOG_LCSSA:%.*]]
+; CHECK:       for.end.loopexit.epilog-lcssa:
+; CHECK-NEXT:    [[ADD_PTR_LCSSA_PH1:%.*]] = phi i16* [ [[ADD_PTR_EPIL]], [[FOR_BODY3_EPIL]] ], [ [[ADD_PTR_EPIL_1:%.*]], [[FOR_BODY3_EPIL_1]] ], [ [[ADD_PTR_EPIL_2:%.*]], [[FOR_BODY3_EPIL_2:%.*]] ]
+; CHECK-NEXT:    [[ADD_PTR23_LCSSA_PH2:%.*]] = phi i16* [ [[ADD_PTR23_EPIL]], [[FOR_BODY3_EPIL]] ], [ [[ADD_PTR23_EPIL_1:%.*]], [[FOR_BODY3_EPIL_1]] ], [ [[ADD_PTR23_EPIL_2:%.*]], [[FOR_BODY3_EPIL_2]] ]
+; CHECK-NEXT:    [[INCDEC_PTR_LCSSA_PH3:%.*]] = phi i32* [ [[INCDEC_PTR_EPIL]], [[FOR_BODY3_EPIL]] ], [ [[INCDEC_PTR_EPIL_1:%.*]], [[FOR_BODY3_EPIL_1]] ], [ [[INCDEC_PTR_EPIL_2:%.*]], [[FOR_BODY3_EPIL_2]] ]
+; CHECK-NEXT:    br label [[FOR_END_LOOPEXIT]]
+; CHECK:       for.end.loopexit:
+; CHECK-NEXT:    [[ADD_PTR_LCSSA:%.*]] = phi i16* [ [[ADD_PTR_LCSSA_PH]], [[FOR_END_LOOPEXIT_UNR_LCSSA]] ], [ [[ADD_PTR_LCSSA_PH1]], [[FOR_END_LOOPEXIT_EPILOG_LCSSA]] ]
+; CHECK-NEXT:    [[ADD_PTR23_LCSSA:%.*]] = phi i16* [ [[ADD_PTR23_LCSSA_PH]], [[FOR_END_LOOPEXIT_UNR_LCSSA]] ], [ [[ADD_PTR23_LCSSA_PH2]], [[FOR_END_LOOPEXIT_EPILOG_LCSSA]] ]
+; CHECK-NEXT:    [[INCDEC_PTR_LCSSA:%.*]] = phi i32* [ [[INCDEC_PTR_LCSSA_PH]], [[FOR_END_LOOPEXIT_UNR_LCSSA]] ], [ [[INCDEC_PTR_LCSSA_PH3]], [[FOR_END_LOOPEXIT_EPILOG_LCSSA]] ]
+; CHECK-NEXT:    br label [[FOR_END]]
+; CHECK:       for.end:
+; CHECK-NEXT:    [[PSRCB_ADDR_1_LCSSA:%.*]] = phi i16* [ [[PSRCB_ADDR_089]], [[FOR_BODY]] ], [ [[ADD_PTR23_LCSSA]], [[FOR_END_LOOPEXIT]] ]
+; CHECK-NEXT:    [[PSRCA_ADDR_1_LCSSA:%.*]] = phi i16* [ [[PSRCA_ADDR_090]], [[FOR_BODY]] ], [ [[ADD_PTR_LCSSA]], [[FOR_END_LOOPEXIT]] ]
+; CHECK-NEXT:    [[PDEST_ADDR_1_LCSSA:%.*]] = phi i32* [ [[PDEST_ADDR_091]], [[FOR_BODY]] ], [ [[INCDEC_PTR_LCSSA]], [[FOR_END_LOOPEXIT]] ]
+; CHECK-NEXT:    [[J_0_LCSSA:%.*]] = phi i32 [ 0, [[FOR_BODY]] ], [ [[TMP6]], [[FOR_END_LOOPEXIT]] ]
+; CHECK-NEXT:    [[REM:%.*]] = and i32 [[TMP4]], 3
+; CHECK-NEXT:    [[ADD25:%.*]] = or i32 [[J_0_LCSSA]], [[REM]]
+; CHECK-NEXT:    [[CMP2780:%.*]] = icmp ugt i32 [[ADD25]], [[J_0_LCSSA]]
+; CHECK-NEXT:    br i1 [[CMP2780]], label [[FOR_BODY29_PREHEADER:%.*]], label [[FOR_END40]]
+; CHECK:       for.body29.preheader:
+; CHECK-NEXT:    [[TMP43:%.*]] = sub nsw i32 [[ADD25]], [[J_0_LCSSA]]
+; CHECK-NEXT:    [[TMP44:%.*]] = sub i32 [[ADD25]], [[J_0_LCSSA]]
+; CHECK-NEXT:    [[TMP45:%.*]] = add i32 [[ADD25]], -1
+; CHECK-NEXT:    [[TMP46:%.*]] = sub i32 [[TMP45]], [[J_0_LCSSA]]
+; CHECK-NEXT:    [[XTRAITER4:%.*]] = and i32 [[TMP44]], 3
+; CHECK-NEXT:    [[LCMP_MOD5:%.*]] = icmp ne i32 [[XTRAITER4]], 0
+; CHECK-NEXT:    br i1 [[LCMP_MOD5]], label [[FOR_BODY29_PROL_PREHEADER:%.*]], label [[FOR_BODY29_PROL_LOOPEXIT:%.*]]
+; CHECK:       for.body29.prol.preheader:
+; CHECK-NEXT:    br label [[FOR_BODY29_PROL:%.*]]
+; CHECK:       for.body29.prol:
+; CHECK-NEXT:    [[ARRAYIDX30_PROL:%.*]] = getelementptr inbounds i16, i16* [[PSRCA_ADDR_1_LCSSA]], i32 [[J_0_LCSSA]]
+; CHECK-NEXT:    [[TMP47:%.*]] = load i16, i16* [[ARRAYIDX30_PROL]], align 2
+; CHECK-NEXT:    [[CONV31_PROL:%.*]] = sext i16 [[TMP47]] to i32
+; CHECK-NEXT:    [[ARRAYIDX32_PROL:%.*]] = getelementptr inbounds i16, i16* [[PSRCB_ADDR_1_LCSSA]], i32 [[J_0_LCSSA]]
+; CHECK-NEXT:    [[TMP48:%.*]] = load i16, i16* [[ARRAYIDX32_PROL]], align 2
+; CHECK-NEXT:    [[CONV33_PROL:%.*]] = sext i16 [[TMP48]] to i32
+; CHECK-NEXT:    [[MUL34_PROL:%.*]] = mul nsw i32 [[CONV33_PROL]], [[CONV31_PROL]]
+; CHECK-NEXT:    [[TMP49:%.*]] = load i32, i32* [[PDEST_ADDR_1_LCSSA]], align 4
+; CHECK-NEXT:    [[ADD35_PROL:%.*]] = add nsw i32 [[MUL34_PROL]], [[TMP49]]
+; CHECK-NEXT:    store i32 [[ADD35_PROL]], i32* [[PDEST_ADDR_1_LCSSA]], align 4
+; CHECK-NEXT:    [[INCDEC_PTR36_PROL:%.*]] = getelementptr inbounds i16, i16* [[PSRCA_ADDR_1_LCSSA]], i32 1
+; CHECK-NEXT:    [[INCDEC_PTR37_PROL:%.*]] = getelementptr inbounds i16, i16* [[PSRCB_ADDR_1_LCSSA]], i32 1
+; CHECK-NEXT:    [[INCDEC_PTR38_PROL:%.*]] = getelementptr inbounds i32, i32* [[PDEST_ADDR_1_LCSSA]], i32 1
+; CHECK-NEXT:    [[INC_PROL:%.*]] = add nuw i32 [[J_0_LCSSA]], 1
+; CHECK-NEXT:    [[PROL_ITER_SUB:%.*]] = sub i32 [[XTRAITER4]], 1
+; CHECK-NEXT:    [[PROL_ITER_CMP:%.*]] = icmp ne i32 [[PROL_ITER_SUB]], 0
+; CHECK-NEXT:    br i1 [[PROL_ITER_CMP]], label [[FOR_BODY29_PROL_1:%.*]], label [[FOR_BODY29_PROL_LOOPEXIT_UNR_LCSSA:%.*]]
+; CHECK:       for.body29.prol.loopexit.unr-lcssa:
+; CHECK-NEXT:    [[J_184_UNR_PH:%.*]] = phi i32 [ [[INC_PROL]], [[FOR_BODY29_PROL]] ], [ [[INC_PROL_1:%.*]], [[FOR_BODY29_PROL_1]] ], [ [[INC_PROL_2:%.*]], [[FOR_BODY29_PROL_2:%.*]] ]
+; CHECK-NEXT:    [[PDEST_ADDR_283_UNR_PH:%.*]] = phi i32* [ [[INCDEC_PTR38_PROL]], [[FOR_BODY29_PROL]] ], [ [[INCDEC_PTR38_PROL_1:%.*]], [[FOR_BODY29_PROL_1]] ], [ [[INCDEC_PTR38_PROL_2:%.*]], [[FOR_BODY29_PROL_2]] ]
+; CHECK-NEXT:    [[PSRCA_ADDR_282_UNR_PH:%.*]] = phi i16* [ [[INCDEC_PTR36_PROL]], [[FOR_BODY29_PROL]] ], [ [[INCDEC_PTR36_PROL_1:%.*]], [[FOR_BODY29_PROL_1]] ], [ [[INCDEC_PTR36_PROL_2:%.*]], [[FOR_BODY29_PROL_2]] ]
+; CHECK-NEXT:    [[PSRCB_ADDR_281_UNR_PH:%.*]] = phi i16* [ [[INCDEC_PTR37_PROL]], [[FOR_BODY29_PROL]] ], [ [[INCDEC_PTR37_PROL_1:%.*]], [[FOR_BODY29_PROL_1]] ], [ [[INCDEC_PTR37_PROL_2:%.*]], [[FOR_BODY29_PROL_2]] ]
+; CHECK-NEXT:    br label [[FOR_BODY29_PROL_LOOPEXIT]]
+; CHECK:       for.body29.prol.loopexit:
+; CHECK-NEXT:    [[J_184_UNR:%.*]] = phi i32 [ [[J_0_LCSSA]], [[FOR_BODY29_PREHEADER]] ], [ [[J_184_UNR_PH]], [[FOR_BODY29_PROL_LOOPEXIT_UNR_LCSSA]] ]
+; CHECK-NEXT:    [[PDEST_ADDR_283_UNR:%.*]] = phi i32* [ [[PDEST_ADDR_1_LCSSA]], [[FOR_BODY29_PREHEADER]] ], [ [[PDEST_ADDR_283_UNR_PH]], [[FOR_BODY29_PROL_LOOPEXIT_UNR_LCSSA]] ]
+; CHECK-NEXT:    [[PSRCA_ADDR_282_UNR:%.*]] = phi i16* [ [[PSRCA_ADDR_1_LCSSA]], [[FOR_BODY29_PREHEADER]] ], [ [[PSRCA_ADDR_282_UNR_PH]], [[FOR_BODY29_PROL_LOOPEXIT_UNR_LCSSA]] ]
+; CHECK-NEXT:    [[PSRCB_ADDR_281_UNR:%.*]] = phi i16* [ [[PSRCB_ADDR_1_LCSSA]], [[FOR_BODY29_PREHEADER]] ], [ [[PSRCB_ADDR_281_UNR_PH]], [[FOR_BODY29_PROL_LOOPEXIT_UNR_LCSSA]] ]
+; CHECK-NEXT:    [[TMP50:%.*]] = icmp ult i32 [[TMP46]], 3
+; CHECK-NEXT:    br i1 [[TMP50]], label [[FOR_END40_LOOPEXIT:%.*]], label [[FOR_BODY29_PREHEADER_NEW:%.*]]
+; CHECK:       for.body29.preheader.new:
+; CHECK-NEXT:    br label [[FOR_BODY29:%.*]]
+; CHECK:       for.body29:
+; CHECK-NEXT:    [[J_184:%.*]] = phi i32 [ [[J_184_UNR]], [[FOR_BODY29_PREHEADER_NEW]] ], [ [[INC_3:%.*]], [[FOR_BODY29]] ]
+; CHECK-NEXT:    [[PDEST_ADDR_283:%.*]] = phi i32* [ [[PDEST_ADDR_283_UNR]], [[FOR_BODY29_PREHEADER_NEW]] ], [ [[INCDEC_PTR38_3:%.*]], [[FOR_BODY29]] ]
+; CHECK-NEXT:    [[PSRCA_ADDR_282:%.*]] = phi i16* [ [[PSRCA_ADDR_282_UNR]], [[FOR_BODY29_PREHEADER_NEW]] ], [ [[INCDEC_PTR36_3:%.*]], [[FOR_BODY29]] ]
+; CHECK-NEXT:    [[PSRCB_ADDR_281:%.*]] = phi i16* [ [[PSRCB_ADDR_281_UNR]], [[FOR_BODY29_PREHEADER_NEW]] ], [ [[INCDEC_PTR37_3:%.*]], [[FOR_BODY29]] ]
+; CHECK-NEXT:    [[ARRAYIDX30:%.*]] = getelementptr inbounds i16, i16* [[PSRCA_ADDR_282]], i32 [[J_184]]
+; CHECK-NEXT:    [[TMP51:%.*]] = load i16, i16* [[ARRAYIDX30]], align 2
+; CHECK-NEXT:    [[CONV31:%.*]] = sext i16 [[TMP51]] to i32
+; CHECK-NEXT:    [[ARRAYIDX32:%.*]] = getelementptr inbounds i16, i16* [[PSRCB_ADDR_281]], i32 [[J_184]]
+; CHECK-NEXT:    [[TMP52:%.*]] = load i16, i16* [[ARRAYIDX32]], align 2
+; CHECK-NEXT:    [[CONV33:%.*]] = sext i16 [[TMP52]] to i32
+; CHECK-NEXT:    [[MUL34:%.*]] = mul nsw i32 [[CONV33]], [[CONV31]]
+; CHECK-NEXT:    [[TMP53:%.*]] = load i32, i32* [[PDEST_ADDR_283]], align 4
+; CHECK-NEXT:    [[ADD35:%.*]] = add nsw i32 [[MUL34]], [[TMP53]]
+; CHECK-NEXT:    store i32 [[ADD35]], i32* [[PDEST_ADDR_283]], align 4
+; CHECK-NEXT:    [[INCDEC_PTR36:%.*]] = getelementptr inbounds i16, i16* [[PSRCA_ADDR_282]], i32 1
+; CHECK-NEXT:    [[INCDEC_PTR37:%.*]] = getelementptr inbounds i16, i16* [[PSRCB_ADDR_281]], i32 1
+; CHECK-NEXT:    [[INCDEC_PTR38:%.*]] = getelementptr inbounds i32, i32* [[PDEST_ADDR_283]], i32 1
+; CHECK-NEXT:    [[INC:%.*]] = add nuw i32 [[J_184]], 1
+; CHECK-NEXT:    [[ARRAYIDX30_1:%.*]] = getelementptr inbounds i16, i16* [[INCDEC_PTR36]], i32 [[INC]]
+; CHECK-NEXT:    [[TMP54:%.*]] = load i16, i16* [[ARRAYIDX30_1]], align 2
+; CHECK-NEXT:    [[CONV31_1:%.*]] = sext i16 [[TMP54]] to i32
+; CHECK-NEXT:    [[ARRAYIDX32_1:%.*]] = getelementptr inbounds i16, i16* [[INCDEC_PTR37]], i32 [[INC]]
+; CHECK-NEXT:    [[TMP55:%.*]] = load i16, i16* [[ARRAYIDX32_1]], align 2
+; CHECK-NEXT:    [[CONV33_1:%.*]] = sext i16 [[TMP55]] to i32
+; CHECK-NEXT:    [[MUL34_1:%.*]] = mul nsw i32 [[CONV33_1]], [[CONV31_1]]
+; CHECK-NEXT:    [[TMP56:%.*]] = load i32, i32* [[INCDEC_PTR38]], align 4
+; CHECK-NEXT:    [[ADD35_1:%.*]] = add nsw i32 [[MUL34_1]], [[TMP56]]
+; CHECK-NEXT:    store i32 [[ADD35_1]], i32* [[INCDEC_PTR38]], align 4
+; CHECK-NEXT:    [[INCDEC_PTR36_1:%.*]] = getelementptr inbounds i16, i16* [[INCDEC_PTR36]], i32 1
+; CHECK-NEXT:    [[INCDEC_PTR37_1:%.*]] = getelementptr inbounds i16, i16* [[INCDEC_PTR37]], i32 1
+; CHECK-NEXT:    [[INCDEC_PTR38_1:%.*]] = getelementptr inbounds i32, i32* [[INCDEC_PTR38]], i32 1
+; CHECK-NEXT:    [[INC_1:%.*]] = add nuw i32 [[INC]], 1
+; CHECK-NEXT:    [[ARRAYIDX30_2:%.*]] = getelementptr inbounds i16, i16* [[INCDEC_PTR36_1]], i32 [[INC_1]]
+; CHECK-NEXT:    [[TMP57:%.*]] = load i16, i16* [[ARRAYIDX30_2]], align 2
+; CHECK-NEXT:    [[CONV31_2:%.*]] = sext i16 [[TMP57]] to i32
+; CHECK-NEXT:    [[ARRAYIDX32_2:%.*]] = getelementptr inbounds i16, i16* [[INCDEC_PTR37_1]], i32 [[INC_1]]
+; CHECK-NEXT:    [[TMP58:%.*]] = load i16, i16* [[ARRAYIDX32_2]], align 2
+; CHECK-NEXT:    [[CONV33_2:%.*]] = sext i16 [[TMP58]] to i32
+; CHECK-NEXT:    [[MUL34_2:%.*]] = mul nsw i32 [[CONV33_2]], [[CONV31_2]]
+; CHECK-NEXT:    [[TMP59:%.*]] = load i32, i32* [[INCDEC_PTR38_1]], align 4
+; CHECK-NEXT:    [[ADD35_2:%.*]] = add nsw i32 [[MUL34_2]], [[TMP59]]
+; CHECK-NEXT:    store i32 [[ADD35_2]], i32* [[INCDEC_PTR38_1]], align 4
+; CHECK-NEXT:    [[INCDEC_PTR36_2:%.*]] = getelementptr inbounds i16, i16* [[INCDEC_PTR36_1]], i32 1
+; CHECK-NEXT:    [[INCDEC_PTR37_2:%.*]] = getelementptr inbounds i16, i16* [[INCDEC_PTR37_1]], i32 1
+; CHECK-NEXT:    [[INCDEC_PTR38_2:%.*]] = getelementptr inbounds i32, i32* [[INCDEC_PTR38_1]], i32 1
+; CHECK-NEXT:    [[INC_2:%.*]] = add nuw i32 [[INC_1]], 1
+; CHECK-NEXT:    [[ARRAYIDX30_3:%.*]] = getelementptr inbounds i16, i16* [[INCDEC_PTR36_2]], i32 [[INC_2]]
+; CHECK-NEXT:    [[TMP60:%.*]] = load i16, i16* [[ARRAYIDX30_3]], align 2
+; CHECK-NEXT:    [[CONV31_3:%.*]] = sext i16 [[TMP60]] to i32
+; CHECK-NEXT:    [[ARRAYIDX32_3:%.*]] = getelementptr inbounds i16, i16* [[INCDEC_PTR37_2]], i32 [[INC_2]]
+; CHECK-NEXT:    [[TMP61:%.*]] = load i16, i16* [[ARRAYIDX32_3]], align 2
+; CHECK-NEXT:    [[CONV33_3:%.*]] = sext i16 [[TMP61]] to i32
+; CHECK-NEXT:    [[MUL34_3:%.*]] = mul nsw i32 [[CONV33_3]], [[CONV31_3]]
+; CHECK-NEXT:    [[TMP62:%.*]] = load i32, i32* [[INCDEC_PTR38_2]], align 4
+; CHECK-NEXT:    [[ADD35_3:%.*]] = add nsw i32 [[MUL34_3]], [[TMP62]]
+; CHECK-NEXT:    store i32 [[ADD35_3]], i32* [[INCDEC_PTR38_2]], align 4
+; CHECK-NEXT:    [[INCDEC_PTR36_3]] = getelementptr inbounds i16, i16* [[INCDEC_PTR36_2]], i32 1
+; CHECK-NEXT:    [[INCDEC_PTR37_3]] = getelementptr inbounds i16, i16* [[INCDEC_PTR37_2]], i32 1
+; CHECK-NEXT:    [[INCDEC_PTR38_3]] = getelementptr inbounds i32, i32* [[INCDEC_PTR38_2]], i32 1
+; CHECK-NEXT:    [[INC_3]] = add nuw i32 [[INC_2]], 1
+; CHECK-NEXT:    [[EXITCOND_3:%.*]] = icmp eq i32 [[INC_3]], [[ADD25]]
+; CHECK-NEXT:    br i1 [[EXITCOND_3]], label [[FOR_END40_LOOPEXIT_UNR_LCSSA:%.*]], label [[FOR_BODY29]]
+; CHECK:       for.end40.loopexit.unr-lcssa:
+; CHECK-NEXT:    br label [[FOR_END40_LOOPEXIT]]
+; CHECK:       for.end40.loopexit:
+; CHECK-NEXT:    [[SCEVGEP93:%.*]] = getelementptr i16, i16* [[PSRCB_ADDR_1_LCSSA]], i32 [[TMP43]]
+; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i16, i16* [[PSRCA_ADDR_1_LCSSA]], i32 [[TMP43]]
+; CHECK-NEXT:    [[SCEVGEP94:%.*]] = getelementptr i32, i32* [[PDEST_ADDR_1_LCSSA]], i32 [[TMP43]]
+; CHECK-NEXT:    br label [[FOR_END40]]
+; CHECK:       for.end40:
+; CHECK-NEXT:    [[PSRCB_ADDR_2_LCSSA]] = phi i16* [ [[PSRCB_ADDR_1_LCSSA]], [[FOR_END]] ], [ [[SCEVGEP93]], [[FOR_END40_LOOPEXIT]] ]
+; CHECK-NEXT:    [[PSRCA_ADDR_2_LCSSA]] = phi i16* [ [[PSRCA_ADDR_1_LCSSA]], [[FOR_END]] ], [ [[SCEVGEP]], [[FOR_END40_LOOPEXIT]] ]
+; CHECK-NEXT:    [[PDEST_ADDR_2_LCSSA]] = phi i32* [ [[PDEST_ADDR_1_LCSSA]], [[FOR_END]] ], [ [[SCEVGEP94]], [[FOR_END40_LOOPEXIT]] ]
+; CHECK-NEXT:    [[INC42]] = add nuw i32 [[I_092]], 1
+; CHECK-NEXT:    [[EXITCOND95:%.*]] = icmp eq i32 [[INC42]], [[BLKCNT]]
+; CHECK-NEXT:    br i1 [[EXITCOND95]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[FOR_BODY]]
+; CHECK:       for.body3.epil.1:
+; CHECK-NEXT:    [[TMP63:%.*]] = load i16, i16* [[ADD_PTR_EPIL]], align 2
+; CHECK-NEXT:    [[CONV_EPIL_1:%.*]] = sext i16 [[TMP63]] to i32
+; CHECK-NEXT:    [[TMP64:%.*]] = load i16, i16* [[ADD_PTR23_EPIL]], align 2
+; CHECK-NEXT:    [[CONV5_EPIL_1:%.*]] = sext i16 [[TMP64]] to i32
+; CHECK-NEXT:    [[MUL_EPIL_1:%.*]] = mul nsw i32 [[CONV5_EPIL_1]], [[CONV_EPIL_1]]
+; CHECK-NEXT:    [[ARRAYIDX6_EPIL_1:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_EPIL]], i32 1
+; CHECK-NEXT:    [[TMP65:%.*]] = load i16, i16* [[ARRAYIDX6_EPIL_1]], align 2
+; CHECK-NEXT:    [[CONV7_EPIL_1:%.*]] = sext i16 [[TMP65]] to i32
+; CHECK-NEXT:    [[ARRAYIDX8_EPIL_1:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR23_EPIL]], i32 1
+; CHECK-NEXT:    [[TMP66:%.*]] = load i16, i16* [[ARRAYIDX8_EPIL_1]], align 2
+; CHECK-NEXT:    [[CONV9_EPIL_1:%.*]] = sext i16 [[TMP66]] to i32
+; CHECK-NEXT:    [[MUL10_EPIL_1:%.*]] = mul nsw i32 [[CONV9_EPIL_1]], [[CONV7_EPIL_1]]
+; CHECK-NEXT:    [[ARRAYIDX11_EPIL_1:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_EPIL]], i32 2
+; CHECK-NEXT:    [[TMP67:%.*]] = load i16, i16* [[ARRAYIDX11_EPIL_1]], align 2
+; CHECK-NEXT:    [[CONV12_EPIL_1:%.*]] = sext i16 [[TMP67]] to i32
+; CHECK-NEXT:    [[ARRAYIDX13_EPIL_1:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR23_EPIL]], i32 3
+; CHECK-NEXT:    [[TMP68:%.*]] = load i16, i16* [[ARRAYIDX13_EPIL_1]], align 2
+; CHECK-NEXT:    [[CONV14_EPIL_1:%.*]] = sext i16 [[TMP68]] to i32
+; CHECK-NEXT:    [[MUL15_EPIL_1:%.*]] = mul nsw i32 [[CONV14_EPIL_1]], [[CONV12_EPIL_1]]
+; CHECK-NEXT:    [[ARRAYIDX17_EPIL_1:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_EPIL]], i32 3
+; CHECK-NEXT:    [[TMP69:%.*]] = load i16, i16* [[ARRAYIDX17_EPIL_1]], align 2
+; CHECK-NEXT:    [[CONV18_EPIL_1:%.*]] = sext i16 [[TMP69]] to i32
+; CHECK-NEXT:    [[ADD21_EPIL_1:%.*]] = add i32 [[MUL10_EPIL_1]], [[MUL_EPIL_1]]
+; CHECK-NEXT:    [[ADD_EPIL_1:%.*]] = add i32 [[ADD21_EPIL_1]], [[CONV14_EPIL_1]]
+; CHECK-NEXT:    [[ADD16_EPIL_1:%.*]] = add i32 [[ADD_EPIL_1]], [[MUL15_EPIL_1]]
+; CHECK-NEXT:    [[ADD22_EPIL_1:%.*]] = add i32 [[ADD16_EPIL_1]], [[CONV18_EPIL_1]]
+; CHECK-NEXT:    store i32 [[ADD22_EPIL_1]], i32* [[INCDEC_PTR_EPIL]], align 4
+; CHECK-NEXT:    [[ADD_PTR_EPIL_1]] = getelementptr inbounds i16, i16* [[ADD_PTR_EPIL]], i32 4
+; CHECK-NEXT:    [[ADD_PTR23_EPIL_1]] = getelementptr inbounds i16, i16* [[ADD_PTR23_EPIL]], i32 4
+; CHECK-NEXT:    [[INCDEC_PTR_EPIL_1]] = getelementptr inbounds i32, i32* [[INCDEC_PTR_EPIL]], i32 1
+; CHECK-NEXT:    [[ADD24_EPIL_1:%.*]] = add nuw nsw i32 [[ADD24_EPIL]], 4
+; CHECK-NEXT:    [[EPIL_ITER_SUB_1:%.*]] = sub i32 [[EPIL_ITER_SUB]], 1
+; CHECK-NEXT:    [[EPIL_ITER_CMP_1:%.*]] = icmp ne i32 [[EPIL_ITER_SUB_1]], 0
+; CHECK-NEXT:    br i1 [[EPIL_ITER_CMP_1]], label [[FOR_BODY3_EPIL_2]], label [[FOR_END_LOOPEXIT_EPILOG_LCSSA]]
+; CHECK:       for.body3.epil.2:
+; CHECK-NEXT:    [[TMP70:%.*]] = load i16, i16* [[ADD_PTR_EPIL_1]], align 2
+; CHECK-NEXT:    [[CONV_EPIL_2:%.*]] = sext i16 [[TMP70]] to i32
+; CHECK-NEXT:    [[TMP71:%.*]] = load i16, i16* [[ADD_PTR23_EPIL_1]], align 2
+; CHECK-NEXT:    [[CONV5_EPIL_2:%.*]] = sext i16 [[TMP71]] to i32
+; CHECK-NEXT:    [[MUL_EPIL_2:%.*]] = mul nsw i32 [[CONV5_EPIL_2]], [[CONV_EPIL_2]]
+; CHECK-NEXT:    [[ARRAYIDX6_EPIL_2:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_EPIL_1]], i32 1
+; CHECK-NEXT:    [[TMP72:%.*]] = load i16, i16* [[ARRAYIDX6_EPIL_2]], align 2
+; CHECK-NEXT:    [[CONV7_EPIL_2:%.*]] = sext i16 [[TMP72]] to i32
+; CHECK-NEXT:    [[ARRAYIDX8_EPIL_2:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR23_EPIL_1]], i32 1
+; CHECK-NEXT:    [[TMP73:%.*]] = load i16, i16* [[ARRAYIDX8_EPIL_2]], align 2
+; CHECK-NEXT:    [[CONV9_EPIL_2:%.*]] = sext i16 [[TMP73]] to i32
+; CHECK-NEXT:    [[MUL10_EPIL_2:%.*]] = mul nsw i32 [[CONV9_EPIL_2]], [[CONV7_EPIL_2]]
+; CHECK-NEXT:    [[ARRAYIDX11_EPIL_2:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_EPIL_1]], i32 2
+; CHECK-NEXT:    [[TMP74:%.*]] = load i16, i16* [[ARRAYIDX11_EPIL_2]], align 2
+; CHECK-NEXT:    [[CONV12_EPIL_2:%.*]] = sext i16 [[TMP74]] to i32
+; CHECK-NEXT:    [[ARRAYIDX13_EPIL_2:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR23_EPIL_1]], i32 3
+; CHECK-NEXT:    [[TMP75:%.*]] = load i16, i16* [[ARRAYIDX13_EPIL_2]], align 2
+; CHECK-NEXT:    [[CONV14_EPIL_2:%.*]] = sext i16 [[TMP75]] to i32
+; CHECK-NEXT:    [[MUL15_EPIL_2:%.*]] = mul nsw i32 [[CONV14_EPIL_2]], [[CONV12_EPIL_2]]
+; CHECK-NEXT:    [[ARRAYIDX17_EPIL_2:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_EPIL_1]], i32 3
+; CHECK-NEXT:    [[TMP76:%.*]] = load i16, i16* [[ARRAYIDX17_EPIL_2]], align 2
+; CHECK-NEXT:    [[CONV18_EPIL_2:%.*]] = sext i16 [[TMP76]] to i32
+; CHECK-NEXT:    [[ADD21_EPIL_2:%.*]] = add i32 [[MUL10_EPIL_2]], [[MUL_EPIL_2]]
+; CHECK-NEXT:    [[ADD_EPIL_2:%.*]] = add i32 [[ADD21_EPIL_2]], [[CONV14_EPIL_2]]
+; CHECK-NEXT:    [[ADD16_EPIL_2:%.*]] = add i32 [[ADD_EPIL_2]], [[MUL15_EPIL_2]]
+; CHECK-NEXT:    [[ADD22_EPIL_2:%.*]] = add i32 [[ADD16_EPIL_2]], [[CONV18_EPIL_2]]
+; CHECK-NEXT:    store i32 [[ADD22_EPIL_2]], i32* [[INCDEC_PTR_EPIL_1]], align 4
+; CHECK-NEXT:    [[ADD_PTR_EPIL_2]] = getelementptr inbounds i16, i16* [[ADD_PTR_EPIL_1]], i32 4
+; CHECK-NEXT:    [[ADD_PTR23_EPIL_2]] = getelementptr inbounds i16, i16* [[ADD_PTR23_EPIL_1]], i32 4
+; CHECK-NEXT:    [[INCDEC_PTR_EPIL_2]] = getelementptr inbounds i32, i32* [[INCDEC_PTR_EPIL_1]], i32 1
+; CHECK-NEXT:    [[ADD24_EPIL_2:%.*]] = add nuw nsw i32 [[ADD24_EPIL_1]], 4
+; CHECK-NEXT:    [[EPIL_ITER_SUB_2:%.*]] = sub i32 [[EPIL_ITER_SUB_1]], 1
+; CHECK-NEXT:    br label [[FOR_END_LOOPEXIT_EPILOG_LCSSA]]
+; CHECK:       for.body29.prol.1:
+; CHECK-NEXT:    [[ARRAYIDX30_PROL_1:%.*]] = getelementptr inbounds i16, i16* [[INCDEC_PTR36_PROL]], i32 [[INC_PROL]]
+; CHECK-NEXT:    [[TMP77:%.*]] = load i16, i16* [[ARRAYIDX30_PROL_1]], align 2
+; CHECK-NEXT:    [[CONV31_PROL_1:%.*]] = sext i16 [[TMP77]] to i32
+; CHECK-NEXT:    [[ARRAYIDX32_PROL_1:%.*]] = getelementptr inbounds i16, i16* [[INCDEC_PTR37_PROL]], i32 [[INC_PROL]]
+; CHECK-NEXT:    [[TMP78:%.*]] = load i16, i16* [[ARRAYIDX32_PROL_1]], align 2
+; CHECK-NEXT:    [[CONV33_PROL_1:%.*]] = sext i16 [[TMP78]] to i32
+; CHECK-NEXT:    [[MUL34_PROL_1:%.*]] = mul nsw i32 [[CONV33_PROL_1]], [[CONV31_PROL_1]]
+; CHECK-NEXT:    [[TMP79:%.*]] = load i32, i32* [[INCDEC_PTR38_PROL]], align 4
+; CHECK-NEXT:    [[ADD35_PROL_1:%.*]] = add nsw i32 [[MUL34_PROL_1]], [[TMP79]]
+; CHECK-NEXT:    store i32 [[ADD35_PROL_1]], i32* [[INCDEC_PTR38_PROL]], align 4
+; CHECK-NEXT:    [[INCDEC_PTR36_PROL_1]] = getelementptr inbounds i16, i16* [[INCDEC_PTR36_PROL]], i32 1
+; CHECK-NEXT:    [[INCDEC_PTR37_PROL_1]] = getelementptr inbounds i16, i16* [[INCDEC_PTR37_PROL]], i32 1
+; CHECK-NEXT:    [[INCDEC_PTR38_PROL_1]] = getelementptr inbounds i32, i32* [[INCDEC_PTR38_PROL]], i32 1
+; CHECK-NEXT:    [[INC_PROL_1]] = add nuw i32 [[INC_PROL]], 1
+; CHECK-NEXT:    [[PROL_ITER_SUB_1:%.*]] = sub i32 [[PROL_ITER_SUB]], 1
+; CHECK-NEXT:    [[PROL_ITER_CMP_1:%.*]] = icmp ne i32 [[PROL_ITER_SUB_1]], 0
+; CHECK-NEXT:    br i1 [[PROL_ITER_CMP_1]], label [[FOR_BODY29_PROL_2]], label [[FOR_BODY29_PROL_LOOPEXIT_UNR_LCSSA]]
+; CHECK:       for.body29.prol.2:
+; CHECK-NEXT:    [[ARRAYIDX30_PROL_2:%.*]] = getelementptr inbounds i16, i16* [[INCDEC_PTR36_PROL_1]], i32 [[INC_PROL_1]]
+; CHECK-NEXT:    [[TMP80:%.*]] = load i16, i16* [[ARRAYIDX30_PROL_2]], align 2
+; CHECK-NEXT:    [[CONV31_PROL_2:%.*]] = sext i16 [[TMP80]] to i32
+; CHECK-NEXT:    [[ARRAYIDX32_PROL_2:%.*]] = getelementptr inbounds i16, i16* [[INCDEC_PTR37_PROL_1]], i32 [[INC_PROL_1]]
+; CHECK-NEXT:    [[TMP81:%.*]] = load i16, i16* [[ARRAYIDX32_PROL_2]], align 2
+; CHECK-NEXT:    [[CONV33_PROL_2:%.*]] = sext i16 [[TMP81]] to i32
+; CHECK-NEXT:    [[MUL34_PROL_2:%.*]] = mul nsw i32 [[CONV33_PROL_2]], [[CONV31_PROL_2]]
+; CHECK-NEXT:    [[TMP82:%.*]] = load i32, i32* [[INCDEC_PTR38_PROL_1]], align 4
+; CHECK-NEXT:    [[ADD35_PROL_2:%.*]] = add nsw i32 [[MUL34_PROL_2]], [[TMP82]]
+; CHECK-NEXT:    store i32 [[ADD35_PROL_2]], i32* [[INCDEC_PTR38_PROL_1]], align 4
+; CHECK-NEXT:    [[INCDEC_PTR36_PROL_2]] = getelementptr inbounds i16, i16* [[INCDEC_PTR36_PROL_1]], i32 1
+; CHECK-NEXT:    [[INCDEC_PTR37_PROL_2]] = getelementptr inbounds i16, i16* [[INCDEC_PTR37_PROL_1]], i32 1
+; CHECK-NEXT:    [[INCDEC_PTR38_PROL_2]] = getelementptr inbounds i32, i32* [[INCDEC_PTR38_PROL_1]], i32 1
+; CHECK-NEXT:    [[INC_PROL_2]] = add nuw i32 [[INC_PROL_1]], 1
+; CHECK-NEXT:    [[PROL_ITER_SUB_2:%.*]] = sub i32 [[PROL_ITER_SUB_1]], 1
+; CHECK-NEXT:    br label [[FOR_BODY29_PROL_LOOPEXIT_UNR_LCSSA]]
+;
+entry:
+  %cmp88 = icmp eq i32 %blkCnt, 0
+  br i1 %cmp88, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.end40, %entry
+  ret void
+
+for.body:                                         ; preds = %for.end40, %entry
+  %i.092 = phi i32 [ %inc42, %for.end40 ], [ 0, %entry ]
+  %pDest.addr.091 = phi i32* [ %pDest.addr.2.lcssa, %for.end40 ], [ %pDest, %entry ]
+  %pSrcA.addr.090 = phi i16* [ %pSrcA.addr.2.lcssa, %for.end40 ], [ %pSrcA, %entry ]
+  %pSrcB.addr.089 = phi i16* [ %pSrcB.addr.2.lcssa, %for.end40 ], [ %pSrcB, %entry ]
+  %0 = lshr i32 %i.092, 2
+  %1 = add nuw nsw i32 %0, 3
+  %2 = and i32 %1, 2147483644
+  %cmp272 = icmp eq i32 %0, 0
+  br i1 %cmp272, label %for.end, label %for.body3
+
+for.body3:                                        ; preds = %for.body3, %for.body
+  %j.076 = phi i32 [ %add24, %for.body3 ], [ 0, %for.body ]
+  %pDest.addr.175 = phi i32* [ %incdec.ptr, %for.body3 ], [ %pDest.addr.091, %for.body ]
+  %pSrcA.addr.174 = phi i16* [ %add.ptr, %for.body3 ], [ %pSrcA.addr.090, %for.body ]
+  %pSrcB.addr.173 = phi i16* [ %add.ptr23, %for.body3 ], [ %pSrcB.addr.089, %for.body ]
+  %3 = load i16, i16* %pSrcA.addr.174, align 2
+  %conv = sext i16 %3 to i32
+  %4 = load i16, i16* %pSrcB.addr.173, align 2
+  %conv5 = sext i16 %4 to i32
+  %mul = mul nsw i32 %conv5, %conv
+  %arrayidx6 = getelementptr inbounds i16, i16* %pSrcA.addr.174, i32 1
+  %5 = load i16, i16* %arrayidx6, align 2
+  %conv7 = sext i16 %5 to i32
+  %arrayidx8 = getelementptr inbounds i16, i16* %pSrcB.addr.173, i32 1
+  %6 = load i16, i16* %arrayidx8, align 2
+  %conv9 = sext i16 %6 to i32
+  %mul10 = mul nsw i32 %conv9, %conv7
+  %arrayidx11 = getelementptr inbounds i16, i16* %pSrcA.addr.174, i32 2
+  %7 = load i16, i16* %arrayidx11, align 2
+  %conv12 = sext i16 %7 to i32
+  %arrayidx13 = getelementptr inbounds i16, i16* %pSrcB.addr.173, i32 3
+  %8 = load i16, i16* %arrayidx13, align 2
+  %conv14 = sext i16 %8 to i32
+  %mul15 = mul nsw i32 %conv14, %conv12
+  %arrayidx17 = getelementptr inbounds i16, i16* %pSrcA.addr.174, i32 3
+  %9 = load i16, i16* %arrayidx17, align 2
+  %conv18 = sext i16 %9 to i32
+  %add21 = add i32 %mul10, %mul
+  %add = add i32 %add21, %conv14
+  %add16 = add i32 %add, %mul15
+  %add22 = add i32 %add16, %conv18
+  store i32 %add22, i32* %pDest.addr.175, align 4
+  %add.ptr = getelementptr inbounds i16, i16* %pSrcA.addr.174, i32 4
+  %add.ptr23 = getelementptr inbounds i16, i16* %pSrcB.addr.173, i32 4
+  %incdec.ptr = getelementptr inbounds i32, i32* %pDest.addr.175, i32 1
+  %add24 = add nuw nsw i32 %j.076, 4
+  %cmp2 = icmp ult i32 %add24, %0
+  br i1 %cmp2, label %for.body3, label %for.end
+
+for.end:                                          ; preds = %for.body3, %for.body
+  %pSrcB.addr.1.lcssa = phi i16* [ %pSrcB.addr.089, %for.body ], [ %add.ptr23, %for.body3 ]
+  %pSrcA.addr.1.lcssa = phi i16* [ %pSrcA.addr.090, %for.body ], [ %add.ptr, %for.body3 ]
+  %pDest.addr.1.lcssa = phi i32* [ %pDest.addr.091, %for.body ], [ %incdec.ptr, %for.body3 ]
+  %j.0.lcssa = phi i32 [ 0, %for.body ], [ %2, %for.body3 ]
+  %rem = and i32 %0, 3
+  %add25 = or i32 %j.0.lcssa, %rem
+  %cmp2780 = icmp ugt i32 %add25, %j.0.lcssa
+  br i1 %cmp2780, label %for.body29.preheader, label %for.end40
+
+for.body29.preheader:                             ; preds = %for.end
+  %10 = sub nsw i32 %add25, %j.0.lcssa
+  %scevgep93 = getelementptr i16, i16* %pSrcB.addr.1.lcssa, i32 %10
+  br label %for.body29
+
+for.body29:                                       ; preds = %for.body29, %for.body29.preheader
+  %j.184 = phi i32 [ %inc, %for.body29 ], [ %j.0.lcssa, %for.body29.preheader ]
+  %pDest.addr.283 = phi i32* [ %incdec.ptr38, %for.body29 ], [ %pDest.addr.1.lcssa, %for.body29.preheader ]
+  %pSrcA.addr.282 = phi i16* [ %incdec.ptr36, %for.body29 ], [ %pSrcA.addr.1.lcssa, %for.body29.preheader ]
+  %pSrcB.addr.281 = phi i16* [ %incdec.ptr37, %for.body29 ], [ %pSrcB.addr.1.lcssa, %for.body29.preheader ]
+  %arrayidx30 = getelementptr inbounds i16, i16* %pSrcA.addr.282, i32 %j.184
+  %11 = load i16, i16* %arrayidx30, align 2
+  %conv31 = sext i16 %11 to i32
+  %arrayidx32 = getelementptr inbounds i16, i16* %pSrcB.addr.281, i32 %j.184
+  %12 = load i16, i16* %arrayidx32, align 2
+  %conv33 = sext i16 %12 to i32
+  %mul34 = mul nsw i32 %conv33, %conv31
+  %13 = load i32, i32* %pDest.addr.283, align 4
+  %add35 = add nsw i32 %mul34, %13
+  store i32 %add35, i32* %pDest.addr.283, align 4
+  %incdec.ptr36 = getelementptr inbounds i16, i16* %pSrcA.addr.282, i32 1
+  %incdec.ptr37 = getelementptr inbounds i16, i16* %pSrcB.addr.281, i32 1
+  %incdec.ptr38 = getelementptr inbounds i32, i32* %pDest.addr.283, i32 1
+  %inc = add nuw i32 %j.184, 1
+  %exitcond = icmp eq i32 %inc, %add25
+  br i1 %exitcond, label %for.end40.loopexit, label %for.body29
+
+for.end40.loopexit:                               ; preds = %for.body29
+  %scevgep = getelementptr i16, i16* %pSrcA.addr.1.lcssa, i32 %10
+  %scevgep94 = getelementptr i32, i32* %pDest.addr.1.lcssa, i32 %10
+  br label %for.end40
+
+for.end40:                                        ; preds = %for.end40.loopexit, %for.end
+  %pSrcB.addr.2.lcssa = phi i16* [ %pSrcB.addr.1.lcssa, %for.end ], [ %scevgep93, %for.end40.loopexit ]
+  %pSrcA.addr.2.lcssa = phi i16* [ %pSrcA.addr.1.lcssa, %for.end ], [ %scevgep, %for.end40.loopexit ]
+  %pDest.addr.2.lcssa = phi i32* [ %pDest.addr.1.lcssa, %for.end ], [ %scevgep94, %for.end40.loopexit ]
+  %inc42 = add nuw i32 %i.092, 1
+  %exitcond95 = icmp eq i32 %inc42, %blkCnt
+  br i1 %exitcond95, label %for.cond.cleanup, label %for.body
+}


        


More information about the llvm-commits mailing list