[llvm] r328023 - [Hexagon] Add a few more lit tests, NFC

Tue Apr 10 20:48:56 PDT 2018

Hello Krzysztof,

The LLVM :: CodeGen/Hexagon/late_instr.ll test has failed on the
llvm-clang-x86_64-expensive-checks-win buildbot:
http://lab.llvm.org:8011/builders/llvm-clang-x86_64-
expensive-checks-win/builds/8989/steps/test-check-all/logs/stdio

Could you take care of this, please?

Log File contents

FAIL: LLVM :: CodeGen/Hexagon/late_instr.ll (20125 of 37095)
******************** TEST 'LLVM :: CodeGen/Hexagon/late_instr.ll'
FAILED ********************
Script:
--
C:\ps4-buildslave2\llvm-clang-x86_64-expensive-checks-win\build\bin\llc.EXE
-march=hexagon -disable-hsdr <
C:\ps4-buildslave2\llvm-clang-x86_64-expensive-checks-win\llvm\test\CodeGen\Hexagon\late_instr.ll
| C:\ps4-buildslave2\llvm-clang-x86_64-expensive-checks-win\build\bin\FileCheck.EXE
C:\ps4-buildslave2\llvm-clang-x86_64-expensive-checks-win\llvm\test\CodeGen\Hexagon\late_instr.ll
--
Exit Code: 1

Command Output (stdout):
--
$ "C:\ps4-buildslave2\llvm-clang-x86_64-expensive-checks-win\build\bin\llc.EXE"
"-march=hexagon" "-disable-hsdr"
$ "C:\ps4-buildslave2\llvm-clang-x86_64-expensive-checks-win\build\bin\FileCheck.EXE"
"C:\ps4-buildslave2\llvm-clang-x86_64-expensive-checks-win\llvm\test\CodeGen\Hexagon\late_instr.ll"
# command stderr:
<stdin>:196:2: error: CHECK-NOT: string occurred!

 }

 ^

C:\ps4-buildslave2\llvm-clang-x86_64-expensive-checks-win\llvm\test\CodeGen\Hexagon\late_instr.ll:6:14:
note: CHECK-NOT: pattern specified here

; CHECK-NOT: }

             ^

error: command failed with exit status: 1

On Tue, Mar 20, 2018 at 12:35 PM, Krzysztof Parzyszek via llvm-commits <
llvm-commits at lists.llvm.org> wrote:

> Author: kparzysz
> Date: Tue Mar 20 12:35:09 2018
> New Revision: 328023
>
> URL: http://llvm.org/viewvc/llvm-project?rev=328023&view=rev
> Log:
> [Hexagon] Add a few more lit tests, NFC
>
> Added:
>     llvm/trunk/test/CodeGen/Hexagon/hexagon-cond-jumpr31.ll
>     llvm/trunk/test/CodeGen/Hexagon/jump-prob.ll
>     llvm/trunk/test/CodeGen/Hexagon/late_instr.ll
>     llvm/trunk/test/CodeGen/Hexagon/mlong-calls.ll
>     llvm/trunk/test/CodeGen/Hexagon/simplify64bitops_7223.ll
>     llvm/trunk/test/CodeGen/Hexagon/swp-carried-1.ll
>     llvm/trunk/test/CodeGen/Hexagon/swp-change-deps.ll
>     llvm/trunk/test/CodeGen/Hexagon/swp-epilog-numphis.ll
>     llvm/trunk/test/CodeGen/Hexagon/swp-epilog-phi9.ll
>     llvm/trunk/test/CodeGen/Hexagon/swp-phi-ref.ll
>     llvm/trunk/test/CodeGen/Hexagon/swp-phi-start.ll
>     llvm/trunk/test/CodeGen/Hexagon/swp-rename.ll
>     llvm/trunk/test/CodeGen/Hexagon/swp-xxh2.ll
>     llvm/trunk/test/CodeGen/Hexagon/vect-downscale.ll
>
> Added: llvm/trunk/test/CodeGen/Hexagon/hexagon-cond-jumpr31.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/
> CodeGen/Hexagon/hexagon-cond-jumpr31.ll?rev=328023&view=auto
> ============================================================
> ==================
> --- llvm/trunk/test/CodeGen/Hexagon/hexagon-cond-jumpr31.ll (added)
> +++ llvm/trunk/test/CodeGen/Hexagon/hexagon-cond-jumpr31.ll Tue Mar 20
> 12:35:09 2018
> @@ -0,0 +1,24 @@
> +; RUN: llc -march=hexagon -O3 < %s | FileCheck %s
> +; CHECK: if (!p{{[0-3]}}.new) jumpr:nt r31
> +; CHECK-NOT: .falign
> +
> + at g0 = common global i8 0, align 1
> + at g1 = common global i32 0, align 4
> +
> +define i32 @f0(i32* nocapture %a0) {
> +b0:
> +  %v0 = load i8, i8* @g0, align 1
> +  %v1 = icmp eq i8 %v0, 65
> +  br i1 %v1, label %b1, label %b2
> +
> +b1:                                               ; preds = %b0
> +  %v2 = load i32, i32* %a0, align 4
> +  %v3 = add nsw i32 %v2, 9
> +  %v4 = load i32, i32* @g1, align 4
> +  %v5 = sub i32 %v3, %v4
> +  store i32 %v5, i32* %a0, align 4
> +  br label %b2
> +
> +b2:                                               ; preds = %b1, %b0
> +  ret i32 undef
> +}
>
> Added: llvm/trunk/test/CodeGen/Hexagon/jump-prob.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/
> CodeGen/Hexagon/jump-prob.ll?rev=328023&view=auto
> ============================================================
> ==================
> --- llvm/trunk/test/CodeGen/Hexagon/jump-prob.ll (added)
> +++ llvm/trunk/test/CodeGen/Hexagon/jump-prob.ll Tue Mar 20 12:35:09 2018
> @@ -0,0 +1,164 @@
> +; RUN: llc -march=hexagon < %s | FileCheck %s
> +
> +; CHECK: {
> +; CHECK: jump .LBB0_
> +; CHECK: r{{[0-9]+}} =
> +; CHECK: memw
> +; CHECK: }
> +
> +target triple = "hexagon-unknown--elf"
> +
> +%s.0 = type { i8, i8, i8, [6 x i32] }
> +%s.1 = type { %s.2 }
> +%s.2 = type { i32, i8* }
> +%s.3 = type <{ i8*, i8*, i16, i8, i8, i8 }>
> +
> + at g0 = internal global [2 x %s.0] [%s.0 { i8 0, i8 6, i8 7, [6 x i32]
> zeroinitializer }, %s.0 { i8 0, i8 6, i8 7, [6 x i32] zeroinitializer }],
> align 8
> + at g1 = internal constant [60 x i8] c"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
> xxxxxxxxxxxxxxxxxxxxxxxxxxxxx\00", section "xxxxxxxxxxx.rodata.", align 4
> + at g2 = internal constant %s.1 { %s.2 { i32 24, i8* getelementptr inbounds
> ([60 x i8], [60 x i8]* @g1, i32 0, i32 0) } }, section
> ".rodata.xxxxxxxxxx.", align 4
> + at g3 = internal constant [115 x i8] c"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
> xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\00",
> section "xxxxxxxxxxx.rodata.", align 4
> + at g4 = internal constant %s.3 <{ i8* getelementptr inbounds ([120 x i8],
> [120 x i8]* @g5, i32 0, i32 0), i8* getelementptr inbounds ([31 x i8], [31
> x i8]* @g6, i32 0, i32 0), i16 215, i8 4, i8 0, i8 1 }>, align 1
> + at g5 = private unnamed_addr constant [120 x i8] c"
> xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
> xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\00", align 1
> + at g6 = private unnamed_addr constant [31 x i8] c"
> xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\00", align 1
> + at g7 = internal constant %s.3 <{ i8* getelementptr inbounds ([120 x i8],
> [120 x i8]* @g5, i32 0, i32 0), i8* getelementptr inbounds ([91 x i8], [91
> x i8]* @g8, i32 0, i32 0), i16 225, i8 2, i8 2, i8 2 }>, align 1
> + at g8 = private unnamed_addr constant [91 x i8] c"
> xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
> xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\00", align 1
> + at g9 = internal constant %s.3 <{ i8* getelementptr inbounds ([120 x i8],
> [120 x i8]* @g5, i32 0, i32 0), i8* getelementptr inbounds ([109 x i8],
> [109 x i8]* @g10, i32 0, i32 0), i16 233, i8 2, i8 2, i8 4 }>, align 1
> + at g10 = private unnamed_addr constant [109 x i8] c"
> xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
> xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\00", align 1
> + at g11 = internal constant [116 x i8] c"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
> xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\00",
> section "xxxxxxxxxxx.rodata.", align 4
> + at g12 = internal constant [134 x i8] c"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
> xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
> xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\00", section
> "xxxxxxxxxxx.rodata.", align 4
> + at g13 = internal constant %s.3 <{ i8* getelementptr inbounds ([120 x i8],
> [120 x i8]* @g5, i32 0, i32 0), i8* getelementptr inbounds ([31 x i8], [31
> x i8]* @g6, i32 0, i32 0), i16 264, i8 4, i8 0, i8 1 }>, align 1
> + at g14 = internal constant [116 x i8] c"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
> xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\00",
> section "xxxxxxxxxxx.rodata.", align 4
> + at g15 = internal constant [134 x i8] c"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
> xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
> xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\00", section
> "xxxxxxxxxxx.rodata.", align 4
> +
> +; Function Attrs: nounwind
> +define zeroext i8 @f0(i8 zeroext %a0, i8 zeroext %a1, i8* nocapture %a2)
> #0 {
> +b0:
> +  store i8 -1, i8* %a2, align 1, !tbaa !0
> +  %v0 = zext i8 %a0 to i32
> +  %v1 = icmp ugt i8 %a0, 7
> +  %v2 = zext i8 %a1 to i32
> +  %v3 = icmp ugt i8 %a1, 5
> +  %v4 = or i1 %v1, %v3
> +  br i1 %v4, label %b1, label %b2
> +
> +b1:                                               ; preds = %b0
> +  tail call void @f1(%s.1* @g2, i32 2, i32 %v0, i32 %v2)
> +  br label %b12
> +
> +b2:                                               ; preds = %b0
> +  %v5 = load i8, i8* getelementptr inbounds ([2 x %s.0], [2 x %s.0]* @g0,
> i32 0, i32 0, i32 2), align 2, !tbaa !0
> +  %v6 = icmp eq i8 %v5, %a0
> +  %v7 = load i8, i8* getelementptr inbounds ([2 x %s.0], [2 x %s.0]* @g0,
> i32 0, i32 1, i32 2), align 2, !tbaa !0
> +  %v8 = icmp eq i8 %v7, %a0
> +  %v9 = and i1 %v6, %v8
> +  br i1 %v9, label %b3, label %b4
> +
> +b3:                                               ; preds = %b2
> +  %v10 = getelementptr inbounds [2 x %s.0], [2 x %s.0]* @g0, i32 0, i32
> 0, i32 3, i32 %v2
> +  %v11 = load i32, i32* %v10, align 4, !tbaa !3
> +  %v12 = getelementptr inbounds [2 x %s.0], [2 x %s.0]* @g0, i32 0, i32
> 1, i32 3, i32 %v2
> +  %v13 = load i32, i32* %v12, align 4, !tbaa !3
> +  tail call void @f1(%s.1* @g2, i32 2, i32 %v0, i32 %v2)
> +  br label %b12
> +
> +b4:                                               ; preds = %b2
> +  %v14 = load i8, i8* getelementptr inbounds ([2 x %s.0], [2 x %s.0]*
> @g0, i32 0, i32 0, i32 0), align 8, !tbaa !0
> +  %v15 = icmp eq i8 %v14, 1
> +  %v16 = and i1 %v15, %v6
> +  br i1 %v16, label %b5, label %b8
> +
> +b5:                                               ; preds = %b4
> +  store i8 0, i8* %a2, align 1, !tbaa !0
> +  %v17 = getelementptr inbounds [2 x %s.0], [2 x %s.0]* @g0, i32 0, i32
> 0, i32 3, i32 %v2
> +  %v18 = tail call i32 asm sideeffect "1:     $0 = memw_locked($2)\0A
>    $0 = add($0, $3)\0A       memw_locked($2, p0) = $0\0A       if !p0 jump
> 1b\0A", "=&r,=*m,r,r,*m,~{p0}"(i32* %v17, i32* %v17, i32 1, i32* %v17) #0,
> !srcloc !5
> +  %v19 = load i32, i32* %v17, align 4, !tbaa !3
> +  %v20 = icmp eq i32 %v19, 255
> +  br i1 %v20, label %b6, label %b7
> +
> +b6:                                               ; preds = %b5
> +  tail call void @f2(%s.3* @g4, i32 %v2) #2
> +  unreachable
> +
> +b7:                                               ; preds = %b5
> +  store i8 %a1, i8* getelementptr inbounds ([2 x %s.0], [2 x %s.0]* @g0,
> i32 0, i32 0, i32 1), align 1, !tbaa !0
> +  %v21 = load i8, i8* %a2, align 1, !tbaa !0
> +  %v22 = zext i8 %v21 to i32
> +  tail call void @f3(%s.3* @g7, i32 %v2, i32 %v22) #0
> +  %v23 = load i32, i32* bitcast ([2 x %s.0]* @g0 to i32*), align 8
> +  %v24 = and i32 %v23, 255
> +  %v25 = lshr i32 %v23, 8
> +  %v26 = and i32 %v25, 255
> +  %v27 = lshr i32 %v23, 16
> +  %v28 = and i32 %v27, 255
> +  %v29 = load i32, i32* %v17, align 4, !tbaa !3
> +  tail call void @f4(%s.3* @g9, i32 %v24, i32 %v26, i32 %v28, i32 %v29) #0
> +  %v30 = load i8, i8* %a2, align 1, !tbaa !0
> +  %v31 = zext i8 %v30 to i32
> +  tail call void @f1(%s.1* @g2, i32 2, i32 %v0, i32 %v2)
> +  %v32 = load i32, i32* bitcast ([2 x %s.0]* @g0 to i32*), align 8
> +  %v33 = and i32 %v32, 255
> +  %v34 = lshr i32 %v32, 8
> +  %v35 = and i32 %v34, 255
> +  %v36 = lshr i32 %v32, 16
> +  %v37 = and i32 %v36, 255
> +  %v38 = load i32, i32* %v17, align 4, !tbaa !3
> +  tail call void @f1(%s.1* @g2, i32 2, i32 %v0, i32 %v2)
> +  br label %b12
> +
> +b8:                                               ; preds = %b4
> +  %v39 = load i8, i8* getelementptr inbounds ([2 x %s.0], [2 x %s.0]*
> @g0, i32 0, i32 1, i32 0), align 4, !tbaa !0
> +  %v40 = icmp eq i8 %v39, 1
> +  %v41 = and i1 %v40, %v8
> +  br i1 %v41, label %b9, label %b12
> +
> +b9:                                               ; preds = %b8
> +  store i8 1, i8* %a2, align 1, !tbaa !0
> +  %v42 = getelementptr inbounds [2 x %s.0], [2 x %s.0]* @g0, i32 0, i32
> 1, i32 3, i32 %v2
> +  %v43 = tail call i32 asm sideeffect "1:     $0 = memw_locked($2)\0A
>    $0 = add($0, $3)\0A       memw_locked($2, p0) = $0\0A       if !p0 jump
> 1b\0A", "=&r,=*m,r,r,*m,~{p0}"(i32* %v42, i32* %v42, i32 1, i32* %v42) #0,
> !srcloc !5
> +  %v44 = load i32, i32* %v42, align 4, !tbaa !3
> +  %v45 = icmp eq i32 %v44, 255
> +  br i1 %v45, label %b10, label %b11
> +
> +b10:                                              ; preds = %b9
> +  tail call void @f2(%s.3* @g13, i32 %v2) #2
> +  unreachable
> +
> +b11:                                              ; preds = %b9
> +  store i8 %a1, i8* getelementptr inbounds ([2 x %s.0], [2 x %s.0]* @g0,
> i32 0, i32 1, i32 1), align 1, !tbaa !0
> +  %v46 = load i8, i8* %a2, align 1, !tbaa !0
> +  %v47 = zext i8 %v46 to i32
> +  tail call void @f1(%s.1* @g2, i32 2, i32 %v0, i32 %v2)
> +  %v48 = load i32, i32* bitcast (i8* getelementptr inbounds ([2 x %s.0],
> [2 x %s.0]* @g0, i32 0, i32 1, i32 0) to i32*), align 4
> +  %v49 = and i32 %v48, 255
> +  %v50 = lshr i32 %v48, 8
> +  %v51 = and i32 %v50, 255
> +  %v52 = lshr i32 %v48, 16
> +  %v53 = and i32 %v52, 255
> +  %v54 = load i32, i32* %v42, align 4, !tbaa !3
> +  tail call void @f1(%s.1* @g2, i32 2, i32 %v0, i32 %v2)
> +  br label %b12
> +
> +b12:                                              ; preds = %b11, %b8,
> %b7, %b3, %b1
> +  %v55 = phi i8 [ 0, %b1 ], [ 0, %b3 ], [ 1, %b7 ], [ 1, %b11 ], [ 0, %b8
> ]
> +  ret i8 %v55
> +}
> +
> +declare void @f1(%s.1*, i32, i32, i32)
> +
> +; Function Attrs: noreturn
> +declare void @f2(%s.3*, i32) #1
> +
> +declare void @f3(%s.3*, i32, i32)
> +
> +declare void @f4(%s.3*, i32, i32, i32, i32)
> +
> +attributes #0 = { nounwind "target-cpu"="hexagonv55" }
> +attributes #1 = { noreturn }
> +attributes #2 = { noreturn nounwind }
> +
> +!0 = !{!1, !1, i64 0}
> +!1 = !{!"omnipotent char", !2}
> +!2 = !{!"Simple C/C++ TBAA"}
> +!3 = !{!4, !4, i64 0}
> +!4 = !{!"long", !1}
> +!5 = !{i32 86170, i32 86211, i32 86247, i32 86291}
>
> Added: llvm/trunk/test/CodeGen/Hexagon/late_instr.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/
> CodeGen/Hexagon/late_instr.ll?rev=328023&view=auto
> ============================================================
> ==================
> --- llvm/trunk/test/CodeGen/Hexagon/late_instr.ll (added)
> +++ llvm/trunk/test/CodeGen/Hexagon/late_instr.ll Tue Mar 20 12:35:09 2018
> @@ -0,0 +1,231 @@
> +; RUN: llc -march=hexagon -disable-hsdr < %s | FileCheck %s
> +
> +; Check if instruction vandqrt.acc and its predecessor are scheduled in
> consecutive packets.
> +; CHECK: or(q{{[0-3]+}},q{{[0-3]+}})
> +; CHECK: }
> +; CHECK-NOT: }
> +; CHECK: |= vand(q{{[0-3]+}},r{{[0-9]+}})
> +; CHECK: endloop0
> +
> +target triple = "hexagon-unknown-linux-gnu"
> +
> +; Function Attrs: nounwind
> +define void @f0(i8* noalias nocapture readonly %a0, i32 %a1, i32 %a2, i32
> %a3, i32* noalias nocapture %a4, i32 %a5) #0 {
> +b0:
> +  %v0 = mul i32 %a2, 3
> +  %v1 = bitcast i32* %a4 to <16 x i32>*
> +  %v2 = mul i32 %a5, -2
> +  %v3 = add i32 %v2, %a1
> +  %v4 = and i32 %a5, 63
> +  %v5 = add i32 %v3, %v4
> +  %v6 = tail call <16 x i32> @llvm.hexagon.V6.lvsplatw(i32 -1)
> +  %v7 = lshr i32 %v5, 6
> +  %v8 = and i32 %v7, 7
> +  %v9 = and i32 %v5, 511
> +  %v10 = icmp eq i32 %v9, 0
> +  %v11 = shl i32 -1, %v8
> +  %v12 = select i1 %v10, i32 0, i32 %v11
> +  %v13 = tail call i32 @llvm.hexagon.S2.vsplatrb(i32 %v12)
> +  %v14 = tail call <16 x i32> @llvm.hexagon.V6.lvsplatw(i32 %v13)
> +  %v15 = tail call <16 x i32> @llvm.hexagon.V6.vnot(<16 x i32> %v14)
> +  %v16 = tail call <512 x i1> @llvm.hexagon.V6.pred.scalar2(i32 %v5)
> +  %v17 = shl i32 1, %v8
> +  %v18 = tail call i32 @llvm.hexagon.S2.vsplatrb(i32 %v17)
> +  %v19 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt.acc(<16 x i32>
> %v15, <512 x i1> %v16, i32 %v18)
> +  %v20 = tail call i32 @llvm.hexagon.S2.vsplatrb(i32 %a3)
> +  %v21 = tail call <16 x i32> @llvm.hexagon.V6.lvsplatw(i32 %v20)
> +  %v22 = icmp sgt i32 %v5, 0
> +  br i1 %v22, label %b1, label %b8
> +
> +b1:                                               ; preds = %b0
> +  %v23 = getelementptr inbounds i8, i8* %a0, i32 %a5
> +  %v24 = bitcast i8* %v23 to <16 x i32>*
> +  %v25 = load <16 x i32>, <16 x i32>* %v24, align 64, !tbaa !0
> +  %v26 = add i32 %a5, 64
> +  %v27 = getelementptr inbounds i8, i8* %a0, i32 %v26
> +  %v28 = bitcast i8* %v27 to <16 x i32>*
> +  %v29 = add i32 %a5, -64
> +  %v30 = getelementptr inbounds i8, i8* %a0, i32 %v29
> +  %v31 = bitcast i8* %v30 to <16 x i32>*
> +  %v32 = load <16 x i32>, <16 x i32>* %v31, align 64, !tbaa !0
> +  %v33 = tail call <512 x i1> @llvm.hexagon.V6.pred.scalar2(i32 %a5)
> +  %v34 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt(<512 x i1> %v33,
> i32 16843009)
> +  %v35 = tail call <16 x i32> @llvm.hexagon.V6.vnot(<16 x i32> %v34)
> +  %v36 = add i32 %v0, %a5
> +  %v37 = getelementptr inbounds i8, i8* %a0, i32 %v36
> +  %v38 = bitcast i8* %v37 to <16 x i32>*
> +  %v39 = sub i32 %a5, %v0
> +  %v40 = getelementptr inbounds i8, i8* %a0, i32 %v39
> +  %v41 = bitcast i8* %v40 to <16 x i32>*
> +  %v42 = tail call <16 x i32> @llvm.hexagon.V6.vd0()
> +  %v43 = add i32 %v4, %a1
> +  %v44 = mul i32 %a5, 2
> +  %v45 = sub i32 %v43, %v44
> +  %v46 = xor i32 %v45, -1
> +  %v47 = icmp sgt i32 %v46, -513
> +  %v48 = select i1 %v47, i32 %v46, i32 -513
> +  %v49 = add i32 %v48, %a1
> +  %v50 = add i32 %v49, %v4
> +  %v51 = add i32 %v50, 512
> +  %v52 = sub i32 %v51, %v44
> +  %v53 = lshr i32 %v52, 9
> +  %v54 = mul nuw nsw i32 %v53, 16
> +  %v55 = add nuw nsw i32 %v54, 16
> +  %v56 = getelementptr i32, i32* %a4, i32 %v55
> +  br label %b2
> +
> +b2:                                               ; preds = %b6, %b1
> +  %v57 = phi i32 [ %v46, %b1 ], [ %v125, %b6 ]
> +  %v58 = phi i32 [ %v5, %b1 ], [ %v123, %b6 ]
> +  %v59 = phi <16 x i32>* [ %v1, %b1 ], [ %v122, %b6 ]
> +  %v60 = phi <16 x i32>* [ %v38, %b1 ], [ %v114, %b6 ]
> +  %v61 = phi <16 x i32>* [ %v41, %b1 ], [ %v115, %b6 ]
> +  %v62 = phi <16 x i32>* [ %v28, %b1 ], [ %v116, %b6 ]
> +  %v63 = phi i32 [ 512, %b1 ], [ %v69, %b6 ]
> +  %v64 = phi i32 [ -2139062144, %b1 ], [ %v117, %b6 ]
> +  %v65 = phi <16 x i32> [ %v32, %b1 ], [ %v118, %b6 ]
> +  %v66 = phi <16 x i32> [ %v25, %b1 ], [ %v119, %b6 ]
> +  %v67 = phi <16 x i32> [ %v35, %b1 ], [ %v6, %b6 ]
> +  %v68 = icmp slt i32 %v58, %v63
> +  %v69 = select i1 %v68, i32 %v58, i32 %v63
> +  %v70 = icmp sgt i32 %v69, 0
> +  br i1 %v70, label %b3, label %b6
> +
> +b3:                                               ; preds = %b2
> +  %v71 = xor i32 %v63, -1
> +  %v72 = icmp sgt i32 %v57, %v71
> +  %v73 = select i1 %v72, i32 %v57, i32 %v71
> +  %v74 = icmp sgt i32 %v73, -65
> +  %v75 = add i32 %v73, 63
> +  %v76 = select i1 %v74, i32 %v75, i32 -2
> +  %v77 = sub i32 %v76, %v73
> +  %v78 = lshr i32 %v77, 6
> +  br label %b4
> +
> +b4:                                               ; preds = %b4, %b3
> +  %v79 = phi i32 [ %v69, %b3 ], [ %v108, %b4 ]
> +  %v80 = phi <16 x i32>* [ %v60, %b3 ], [ %v89, %b4 ]
> +  %v81 = phi <16 x i32>* [ %v61, %b3 ], [ %v87, %b4 ]
> +  %v82 = phi <16 x i32>* [ %v62, %b3 ], [ %v92, %b4 ]
> +  %v83 = phi i32 [ %v64, %b3 ], [ %v106, %b4 ]
> +  %v84 = phi <16 x i32> [ %v65, %b3 ], [ %v85, %b4 ]
> +  %v85 = phi <16 x i32> [ %v66, %b3 ], [ %v93, %b4 ]
> +  %v86 = phi <16 x i32> [ %v42, %b3 ], [ %v107, %b4 ]
> +  %v87 = getelementptr inbounds <16 x i32>, <16 x i32>* %v81, i32 1
> +  %v88 = load <16 x i32>, <16 x i32>* %v81, align 64, !tbaa !0
> +  %v89 = getelementptr inbounds <16 x i32>, <16 x i32>* %v80, i32 1
> +  %v90 = load <16 x i32>, <16 x i32>* %v80, align 64, !tbaa !0
> +  %v91 = tail call <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32> %v85,
> <16 x i32> %v84, i32 3)
> +  %v92 = getelementptr inbounds <16 x i32>, <16 x i32>* %v82, i32 1
> +  %v93 = load <16 x i32>, <16 x i32>* %v82, align 64, !tbaa !0
> +  %v94 = tail call <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32> %v93,
> <16 x i32> %v85, i32 3)
> +  %v95 = tail call <16 x i32> @llvm.hexagon.V6.vsububsat(<16 x i32> %v85,
> <16 x i32> %v21)
> +  %v96 = tail call <16 x i32> @llvm.hexagon.V6.vaddubsat(<16 x i32> %v85,
> <16 x i32> %v21)
> +  %v97 = tail call <16 x i32> @llvm.hexagon.V6.vmaxub(<16 x i32> %v88,
> <16 x i32> %v90)
> +  %v98 = tail call <16 x i32> @llvm.hexagon.V6.vminub(<16 x i32> %v88,
> <16 x i32> %v90)
> +  %v99 = tail call <16 x i32> @llvm.hexagon.V6.vmaxub(<16 x i32> %v94,
> <16 x i32> %v91)
> +  %v100 = tail call <16 x i32> @llvm.hexagon.V6.vminub(<16 x i32> %v94,
> <16 x i32> %v91)
> +  %v101 = tail call <16 x i32> @llvm.hexagon.V6.vminub(<16 x i32> %v97,
> <16 x i32> %v99)
> +  %v102 = tail call <16 x i32> @llvm.hexagon.V6.vmaxub(<16 x i32> %v98,
> <16 x i32> %v100)
> +  %v103 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v101,
> <16 x i32> %v96)
> +  %v104 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v95,
> <16 x i32> %v102)
> +  %v105 = tail call <512 x i1> @llvm.hexagon.V6.pred.or(<512 x i1> %v103,
> <512 x i1> %v104)
> +  %v106 = tail call i32 @llvm.hexagon.S6.rol.i.r(i32 %v83, i32 1)
> +  %v107 = tail call <16 x i32> @llvm.hexagon.V6.vandqrt.acc(<16 x i32>
> %v86, <512 x i1> %v105, i32 %v106)
> +  %v108 = add nsw i32 %v79, -64
> +  %v109 = icmp sgt i32 %v79, 64
> +  br i1 %v109, label %b4, label %b5
> +
> +b5:                                               ; preds = %b4
> +  %v110 = add nuw nsw i32 %v78, 1
> +  %v111 = getelementptr <16 x i32>, <16 x i32>* %v62, i32 %v110
> +  %v112 = getelementptr <16 x i32>, <16 x i32>* %v60, i32 %v110
> +  %v113 = getelementptr <16 x i32>, <16 x i32>* %v61, i32 %v110
> +  br label %b6
> +
> +b6:                                               ; preds = %b5, %b2
> +  %v114 = phi <16 x i32>* [ %v112, %b5 ], [ %v60, %b2 ]
> +  %v115 = phi <16 x i32>* [ %v113, %b5 ], [ %v61, %b2 ]
> +  %v116 = phi <16 x i32>* [ %v111, %b5 ], [ %v62, %b2 ]
> +  %v117 = phi i32 [ %v106, %b5 ], [ %v64, %b2 ]
> +  %v118 = phi <16 x i32> [ %v85, %b5 ], [ %v65, %b2 ]
> +  %v119 = phi <16 x i32> [ %v93, %b5 ], [ %v66, %b2 ]
> +  %v120 = phi <16 x i32> [ %v107, %b5 ], [ %v42, %b2 ]
> +  %v121 = tail call <16 x i32> @llvm.hexagon.V6.vand(<16 x i32> %v120,
> <16 x i32> %v67)
> +  %v122 = getelementptr inbounds <16 x i32>, <16 x i32>* %v59, i32 1
> +  store <16 x i32> %v121, <16 x i32>* %v59, align 64, !tbaa !0
> +  %v123 = add nsw i32 %v58, -512
> +  %v124 = icmp sgt i32 %v58, 512
> +  %v125 = add i32 %v57, 512
> +  br i1 %v124, label %b2, label %b7
> +
> +b7:                                               ; preds = %b6
> +  %v126 = bitcast i32* %v56 to <16 x i32>*
> +  br label %b8
> +
> +b8:                                               ; preds = %b7, %b0
> +  %v127 = phi <16 x i32>* [ %v126, %b7 ], [ %v1, %b0 ]
> +  %v128 = getelementptr inbounds <16 x i32>, <16 x i32>* %v127, i32 -1
> +  %v129 = load <16 x i32>, <16 x i32>* %v128, align 64, !tbaa !0
> +  %v130 = tail call <16 x i32> @llvm.hexagon.V6.vand(<16 x i32> %v129,
> <16 x i32> %v19)
> +  store <16 x i32> %v130, <16 x i32>* %v128, align 64, !tbaa !0
> +  ret void
> +}
> +
> +; Function Attrs: nounwind readnone
> +declare <16 x i32> @llvm.hexagon.V6.lvsplatw(i32) #1
> +
> +; Function Attrs: nounwind readnone
> +declare <16 x i32> @llvm.hexagon.V6.vnot(<16 x i32>) #1
> +
> +; Function Attrs: nounwind readnone
> +declare <16 x i32> @llvm.hexagon.V6.vandqrt(<512 x i1>, i32) #1
> +
> +; Function Attrs: nounwind readnone
> +declare <512 x i1> @llvm.hexagon.V6.pred.scalar2(i32) #1
> +
> +; Function Attrs: nounwind readnone
> +declare i32 @llvm.hexagon.S2.vsplatrb(i32) #1
> +
> +; Function Attrs: nounwind readnone
> +declare <16 x i32> @llvm.hexagon.V6.vandqrt.acc(<16 x i32>, <512 x i1>,
> i32) #1
> +
> +; Function Attrs: nounwind readnone
> +declare <16 x i32> @llvm.hexagon.V6.vd0() #1
> +
> +; Function Attrs: nounwind readnone
> +declare <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32>, <16 x i32>,
> i32) #1
> +
> +; Function Attrs: nounwind readnone
> +declare <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32>, <16 x i32>, i32)
> #1
> +
> +; Function Attrs: nounwind readnone
> +declare <16 x i32> @llvm.hexagon.V6.vsububsat(<16 x i32>, <16 x i32>) #1
> +
> +; Function Attrs: nounwind readnone
> +declare <16 x i32> @llvm.hexagon.V6.vaddubsat(<16 x i32>, <16 x i32>) #1
> +
> +; Function Attrs: nounwind readnone
> +declare <16 x i32> @llvm.hexagon.V6.vmaxub(<16 x i32>, <16 x i32>) #1
> +
> +; Function Attrs: nounwind readnone
> +declare <16 x i32> @llvm.hexagon.V6.vminub(<16 x i32>, <16 x i32>) #1
> +
> +; Function Attrs: nounwind readnone
> +declare <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32>, <16 x i32>) #1
> +
> +; Function Attrs: nounwind readnone
> +declare <512 x i1> @llvm.hexagon.V6.pred.or(<512 x i1>, <512 x i1>) #1
> +
> +; Function Attrs: nounwind readnone
> +declare i32 @llvm.hexagon.S6.rol.i.r(i32, i32) #1
> +
> +; Function Attrs: nounwind readnone
> +declare <16 x i32> @llvm.hexagon.V6.vand(<16 x i32>, <16 x i32>) #1
> +
> +attributes #0 = { nounwind "target-cpu"="hexagonv60"
> "target-features"="+hvxv60,+hvx-length64b" }
> +attributes #1 = { nounwind readnone }
> +
> +!0 = !{!1, !1, i64 0}
> +!1 = !{!"omnipotent char", !2, i64 0}
> +!2 = !{!"Simple C/C++ TBAA"}
>
> Added: llvm/trunk/test/CodeGen/Hexagon/mlong-calls.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/
> CodeGen/Hexagon/mlong-calls.ll?rev=328023&view=auto
> ============================================================
> ==================
> --- llvm/trunk/test/CodeGen/Hexagon/mlong-calls.ll (added)
> +++ llvm/trunk/test/CodeGen/Hexagon/mlong-calls.ll Tue Mar 20 12:35:09
> 2018
> @@ -0,0 +1,41 @@
> +; RUN: llc -hexagon-long-calls -march=hexagon
> -enable-save-restore-long=true < %s | FileCheck %s
> +
> +; CHECK: call ##f1
> +; CHECK: jump ##__restore
> +
> +; Function Attrs: minsize nounwind
> +define i64 @f0(i32 %a0, i32 %a1) #0 {
> +b0:
> +  %v0 = add nsw i32 %a0, 5
> +  %v1 = tail call i64 @f1(i32 %v0) #1
> +  %v2 = sext i32 %a1 to i64
> +  %v3 = add nsw i64 %v1, %v2
> +  ret i64 %v3
> +}
> +
> +; Function Attrs: minsize nounwind
> +declare i64 @f1(i32) #0
> +
> +; Function Attrs: nounwind
> +define i64 @f2(i32 %a0, i32 %a1) #1 {
> +b0:
> +  %v0 = add nsw i32 %a0, 5
> +  %v1 = tail call i64 @f1(i32 %v0) #1
> +  ret i64 %v1
> +}
> +
> +; Function Attrs: noreturn nounwind
> +define i64 @f3(i32 %a0, i32 %a1) #2 {
> +b0:
> +  %v0 = add nsw i32 %a0, 5
> +  %v1 = tail call i64 @f4(i32 %v0) #2
> +  unreachable
> +}
> +
> +; Function Attrs: noreturn
> +declare i64 @f4(i32) #3
> +
> +attributes #0 = { minsize nounwind }
> +attributes #1 = { nounwind }
> +attributes #2 = { noreturn nounwind }
> +attributes #3 = { noreturn }
>
> Added: llvm/trunk/test/CodeGen/Hexagon/simplify64bitops_7223.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Hexagon/
> simplify64bitops_7223.ll?rev=328023&view=auto
> ============================================================
> ==================
> --- llvm/trunk/test/CodeGen/Hexagon/simplify64bitops_7223.ll (added)
> +++ llvm/trunk/test/CodeGen/Hexagon/simplify64bitops_7223.ll Tue Mar 20
> 12:35:09 2018
> @@ -0,0 +1,61 @@
> +; RUN: llc -march=hexagon -enable-pipeliner=false < %s | FileCheck %s
> +; RUN: llc -march=hexagon -enable-pipeliner < %s
> +; REQUIRES: asserts
> +; CHECK-NOT: and(
> +; CHECK-NOT: or(
> +; CHECK-NOT: combine(0
> +; CHECK: add
> +; CHECK: add(
> +; CHECK-NEXT: memuh(
> +; CHECK-NEXT: endloop
> +
> +%s.22 = type { i64 }
> +
> + at g0 = common global i32 0, align 4
> +
> +; Function Attrs: nounwind
> +define i64 @f0(%s.22* nocapture %a0, i32 %a1) #0 {
> +b0:
> +  %v0 = bitcast %s.22* %a0 to i16*
> +  %v1 = load i16, i16* %v0, align 2, !tbaa !0
> +  %v2 = zext i16 %v1 to i64
> +  %v3 = icmp sgt i32 %a1, 0
> +  br i1 %v3, label %b1, label %b4
> +
> +b1:                                               ; preds = %b0
> +  br label %b2
> +
> +b2:                                               ; preds = %b2, %b1
> +  %v4 = phi i16* [ %v8, %b2 ], [ %v0, %b1 ]
> +  %v5 = phi i32 [ %v10, %b2 ], [ undef, %b1 ]
> +  %v6 = phi i32 [ %v15, %b2 ], [ 0, %b1 ]
> +  %v7 = phi i64 [ %v14, %b2 ], [ %v2, %b1 ]
> +  %v8 = getelementptr inbounds i16, i16* %v4, i32 1
> +  %v9 = trunc i64 %v7 to i32
> +  %v10 = add i32 %v5, %v9
> +  %v11 = load i16, i16* %v8, align 2, !tbaa !0
> +  %v12 = zext i16 %v11 to i64
> +  %v13 = and i64 %v7, -4294967296
> +  %v14 = or i64 %v12, %v13
> +  %v15 = add nsw i32 %v6, 1
> +  %v16 = icmp eq i32 %v15, %a1
> +  br i1 %v16, label %b3, label %b2
> +
> +b3:                                               ; preds = %b2
> +  br label %b4
> +
> +b4:                                               ; preds = %b3, %b0
> +  %v17 = phi i32 [ undef, %b0 ], [ %v10, %b3 ]
> +  %v18 = phi i64 [ %v2, %b0 ], [ %v14, %b3 ]
> +  store volatile i32 %v17, i32* @g0, align 4, !tbaa !4
> +  ret i64 %v18
> +}
> +
> +attributes #0 = { nounwind }
> +
> +!0 = !{!1, !1, i64 0}
> +!1 = !{!"short", !2}
> +!2 = !{!"omnipotent char", !3}
> +!3 = !{!"Simple C/C++ TBAA"}
> +!4 = !{!5, !5, i64 0}
> +!5 = !{!"long", !2}
>
> Added: llvm/trunk/test/CodeGen/Hexagon/swp-carried-1.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/
> CodeGen/Hexagon/swp-carried-1.ll?rev=328023&view=auto
> ============================================================
> ==================
> --- llvm/trunk/test/CodeGen/Hexagon/swp-carried-1.ll (added)
> +++ llvm/trunk/test/CodeGen/Hexagon/swp-carried-1.ll Tue Mar 20 12:35:09
> 2018
> @@ -0,0 +1,62 @@
> +; RUN: llc -march=hexagon -rdf-opt=0 -disable-hexagon-misched < %s |
> FileCheck %s
> +
> +; Test that we generate the correct code when a loop carried value
> +; is scheduled one stage earlier than it's use. The code in
> +; isLoopCarried was returning false in this case, and the generated
> +; code was missing an copy.
> +
> +; CHECK: loop0(.LBB0_[[LOOP:.]],
> +; CHECK: .LBB0_[[LOOP]]:
> +; CHECK: += mpy([[REG0:(r[0-9]+)]],r{{[0-9]+}})
> +; CHECK: [[REG0]] = r{{[0-9]+}}
> +; CHECK-NOT: [[REG0]] = memw
> +; CHECK: endloop0
> +
> + at g0 = external global [256 x i32], align 8
> +
> +define void @f0() #0 {
> +b0:
> +  br label %b1
> +
> +b1:                                               ; preds = %b1, %b0
> +  br i1 undef, label %b2, label %b1
> +
> +b2:                                               ; preds = %b1
> +  br label %b3
> +
> +b3:                                               ; preds = %b3, %b2
> +  %v0 = phi i32* [ getelementptr inbounds ([256 x i32], [256 x i32]* @g0,
> i32 0, i32 0), %b2 ], [ %v1, %b3 ]
> +  %v1 = getelementptr i32, i32* %v0, i32 6
> +  br i1 undef, label %b4, label %b3
> +
> +b4:                                               ; preds = %b3
> +  br i1 undef, label %b6, label %b5
> +
> +b5:                                               ; preds = %b5, %b4
> +  %v2 = phi i64 [ %v19, %b5 ], [ undef, %b4 ]
> +  %v3 = phi i32* [ %v8, %b5 ], [ %v1, %b4 ]
> +  %v4 = phi i32 [ %v9, %b5 ], [ undef, %b4 ]
> +  %v5 = phi i32 [ %v11, %b5 ], [ undef, %b4 ]
> +  %v6 = phi i32 [ %v5, %b5 ], [ undef, %b4 ]
> +  %v7 = phi i32 [ %v10, %b5 ], [ 0, %b4 ]
> +  %v8 = getelementptr i32, i32* %v3, i32 1
> +  %v9 = add nsw i32 %v4, 1
> +  %v10 = load i32, i32* %v8, align 4
> +  %v11 = load i32, i32* null, align 4
> +  %v12 = sext i32 %v6 to i64
> +  %v13 = sext i32 %v10 to i64
> +  %v14 = sext i32 %v7 to i64
> +  %v15 = mul nsw i64 %v14, %v12
> +  %v16 = add i64 %v12, %v2
> +  %v17 = add i64 %v16, %v13
> +  %v18 = add i64 %v17, 0
> +  %v19 = add i64 %v18, %v15
> +  %v20 = icmp eq i32 %v9, 128
> +  br i1 %v20, label %b6, label %b5
> +
> +b6:                                               ; preds = %b5, %b4
> +  %v21 = phi i64 [ undef, %b4 ], [ %v19, %b5 ]
> +  unreachable
> +}
> +
> +attributes #0 = { nounwind "target-cpu"="hexagonv62" }
>
> Added: llvm/trunk/test/CodeGen/Hexagon/swp-change-deps.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/
> CodeGen/Hexagon/swp-change-deps.ll?rev=328023&view=auto
> ============================================================
> ==================
> --- llvm/trunk/test/CodeGen/Hexagon/swp-change-deps.ll (added)
> +++ llvm/trunk/test/CodeGen/Hexagon/swp-change-deps.ll Tue Mar 20
> 12:35:09 2018
> @@ -0,0 +1,61 @@
> +; RUN: llc -march=hexagon < %s | FileCheck %s
> +
> +; Test that we generate the correct offsets for loads in the prolog
> +; after removing dependences on a post-increment instructions of the
> +; base register.
> +
> +; CHECK: memh([[REG0:(r[0-9]+)]]+#0)
> +; CHECK: memh([[REG0]]+#2)
> +; CHECK: loop0
> +
> +; Function Attrs: nounwind readnone
> +declare i32 @llvm.hexagon.A2.sath(i32) #1
> +
> +; Function Attrs: nounwind readnone
> +declare i32 @llvm.hexagon.S2.asr.r.r.sat(i32, i32) #1
> +
> +; Function Attrs: nounwind readnone
> +declare i32 @llvm.hexagon.A2.asrh(i32) #1
> +
> +; Function Attrs: nounwind readnone
> +declare i32 @llvm.hexagon.A2.addsat(i32, i32) #1
> +
> +; Function Attrs: nounwind readnone
> +declare i32 @llvm.hexagon.M2.mpy.sat.ll.s1(i32, i32) #1
> +
> +define void @f0() #0 align 2 {
> +b0:
> +  br label %b1
> +
> +b1:                                               ; preds = %b0
> +  br label %b2
> +
> +b2:                                               ; preds = %b2, %b1
> +  %v0 = phi i16* [ undef, %b1 ], [ %v14, %b2 ]
> +  %v1 = phi i32 [ 0, %b1 ], [ %v12, %b2 ]
> +  %v2 = load i16, i16* %v0, align 2
> +  %v3 = sext i16 %v2 to i32
> +  %v4 = call i32 @llvm.hexagon.M2.mpy.sat.ll.s1(i32 undef, i32 %v3)
> +  %v5 = call i32 @llvm.hexagon.S2.asr.r.r.sat(i32 %v4, i32 undef)
> +  %v6 = call i32 @llvm.hexagon.A2.addsat(i32 %v5, i32 32768)
> +  %v7 = call i32 @llvm.hexagon.A2.asrh(i32 %v6)
> +  %v8 = call i32 @llvm.hexagon.S2.asr.r.r.sat(i32 %v7, i32 undef)
> +  %v9 = call i32 @llvm.hexagon.A2.sath(i32 %v8)
> +  %v10 = trunc i32 %v9 to i16
> +  store i16 %v10, i16* null, align 2
> +  %v11 = trunc i32 %v7 to i16
> +  store i16 %v11, i16* %v0, align 2
> +  %v12 = add nsw i32 %v1, 1
> +  %v13 = icmp slt i32 %v12, undef
> +  %v14 = getelementptr i16, i16* %v0, i32 1
> +  br i1 %v13, label %b2, label %b3
> +
> +b3:                                               ; preds = %b2
> +  unreachable
> +
> +b4:                                               ; No predecessors!
> +  unreachable
> +}
> +
> +attributes #0 = { nounwind "target-cpu"="hexagonv55" }
> +attributes #1 = { nounwind readnone }
>
> Added: llvm/trunk/test/CodeGen/Hexagon/swp-epilog-numphis.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/
> CodeGen/Hexagon/swp-epilog-numphis.ll?rev=328023&view=auto
> ============================================================
> ==================
> --- llvm/trunk/test/CodeGen/Hexagon/swp-epilog-numphis.ll (added)
> +++ llvm/trunk/test/CodeGen/Hexagon/swp-epilog-numphis.ll Tue Mar 20
> 12:35:09 2018
> @@ -0,0 +1,82 @@
> +; RUN: llc -march=hexagon < %s | FileCheck %s
> +
> +; CHECK: endloop0
> +; CHECK: vmem
> +; CHECK: vmem([[REG:r([0-9]+)]]+#1) =
> +; CHECK: vmem([[REG]]+#0) =
> +
> +define void @f0(i32 %a0) local_unnamed_addr #0 {
> +b0:
> +  br label %b1
> +
> +b1:                                               ; preds = %b1, %b0
> +  %v0 = phi i32 [ %v33, %b1 ], [ %a0, %b0 ]
> +  %v1 = phi <16 x i32>* [ %v32, %b1 ], [ undef, %b0 ]
> +  %v2 = phi <16 x i32>* [ %v23, %b1 ], [ undef, %b0 ]
> +  %v3 = phi <16 x i32>* [ %v10, %b1 ], [ undef, %b0 ]
> +  %v4 = phi <16 x i32>* [ %v8, %b1 ], [ null, %b0 ]
> +  %v5 = phi <32 x i32> [ %v12, %b1 ], [ undef, %b0 ]
> +  %v6 = tail call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %v5)
> +  %v7 = tail call <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32> %v6,
> <16 x i32> undef, i32 6)
> +  %v8 = getelementptr inbounds <16 x i32>, <16 x i32>* %v4, i32 1
> +  %v9 = load <16 x i32>, <16 x i32>* %v4, align 64
> +  %v10 = getelementptr inbounds <16 x i32>, <16 x i32>* %v3, i32 1
> +  %v11 = load <16 x i32>, <16 x i32>* %v3, align 64
> +  %v12 = tail call <32 x i32> @llvm.hexagon.V6.vsububh(<16 x i32> %v11,
> <16 x i32> %v9)
> +  %v13 = tail call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %v12)
> +  %v14 = tail call <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32> %v13, <16
> x i32> undef)
> +  %v15 = tail call <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32> %v14,
> <16 x i32> undef, i32 4)
> +  %v16 = tail call <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32> %v14, <16
> x i32> %v15)
> +  %v17 = tail call <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32> %v14,
> <16 x i32> undef, i32 4)
> +  %v18 = tail call <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32> %v16,
> <16 x i32> undef, i32 2)
> +  %v19 = tail call <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32> undef,
> <16 x i32> %v17)
> +  %v20 = tail call <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32> %v18, <16
> x i32> %v19)
> +  %v21 = getelementptr inbounds <16 x i32>, <16 x i32>* %v2, i32 1
> +  %v22 = load <16 x i32>, <16 x i32>* %v2, align 64
> +  %v23 = getelementptr inbounds <16 x i32>, <16 x i32>* %v2, i32 2
> +  %v24 = load <16 x i32>, <16 x i32>* %v21, align 64
> +  %v25 = tail call <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32> %v22, <16
> x i32> %v7)
> +  %v26 = tail call <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32> %v24, <16
> x i32> undef)
> +  %v27 = tail call <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32> %v25, <16
> x i32> %v20)
> +  %v28 = tail call <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32> %v26, <16
> x i32> %v20)
> +  store <16 x i32> %v27, <16 x i32>* %v2, align 64
> +  store <16 x i32> %v28, <16 x i32>* %v21, align 64
> +  %v29 = tail call <16 x i32> @llvm.hexagon.V6.vmpyhsrs(<16 x i32> %v27,
> i32 17760527)
> +  %v30 = tail call <16 x i32> @llvm.hexagon.V6.vmpyhsrs(<16 x i32> %v28,
> i32 17760527)
> +  %v31 = tail call <16 x i32> @llvm.hexagon.V6.vsathub(<16 x i32> %v30,
> <16 x i32> %v29)
> +  %v32 = getelementptr inbounds <16 x i32>, <16 x i32>* %v1, i32 1
> +  store <16 x i32> %v31, <16 x i32>* %v1, align 64
> +  %v33 = add nsw i32 %v0, -64
> +  %v34 = icmp sgt i32 %v0, 192
> +  br i1 %v34, label %b1, label %b2
> +
> +b2:                                               ; preds = %b1
> +  unreachable
> +}
> +
> +; Function Attrs: nounwind readnone
> +declare <32 x i32> @llvm.hexagon.V6.vsububh(<16 x i32>, <16 x i32>) #1
> +
> +; Function Attrs: nounwind readnone
> +declare <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32>, <16 x i32>) #1
> +
> +; Function Attrs: nounwind readnone
> +declare <16 x i32> @llvm.hexagon.V6.lo(<32 x i32>) #1
> +
> +; Function Attrs: nounwind readnone
> +declare <16 x i32> @llvm.hexagon.V6.hi(<32 x i32>) #1
> +
> +; Function Attrs: nounwind readnone
> +declare <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32>, <16 x i32>,
> i32) #1
> +
> +; Function Attrs: nounwind readnone
> +declare <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32>, <16 x i32>, i32)
> #1
> +
> +; Function Attrs: nounwind readnone
> +declare <16 x i32> @llvm.hexagon.V6.vmpyhsrs(<16 x i32>, i32) #1
> +
> +; Function Attrs: nounwind readnone
> +declare <16 x i32> @llvm.hexagon.V6.vsathub(<16 x i32>, <16 x i32>) #1
> +
> +attributes #0 = { nounwind "target-cpu"="hexagonv65"
> "target-features"="+hvxv65,+hvx-length64b" }
> +attributes #1 = { nounwind readnone }
>
> Added: llvm/trunk/test/CodeGen/Hexagon/swp-epilog-phi9.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/
> CodeGen/Hexagon/swp-epilog-phi9.ll?rev=328023&view=auto
> ============================================================
> ==================
> --- llvm/trunk/test/CodeGen/Hexagon/swp-epilog-phi9.ll (added)
> +++ llvm/trunk/test/CodeGen/Hexagon/swp-epilog-phi9.ll Tue Mar 20
> 12:35:09 2018
> @@ -0,0 +1,55 @@
> +; RUN: llc -march=hexagon < %s | FileCheck %s
> +
> +; Test that we generate the correct Phi name in the last couple of epilog
> +; blocks, when there are 3 epilog blocks. The Phi was scheduled in stage
> +; 2, so the computation for the number of Phis needs to be adjusted when
> +; the incoming prolog block is from prolog 0 or prolog 1.
> +; Note: the pipeliner no longer generates a 3 stage pipeline for this
> test.
> +
> +; CHECK: loop0
> +; CHECK: [[REG0:r([0-9]+)]] = add(r{{[0-8]+}},#8)
> +; CHECK: endloop0
> +; CHECK: [[REG0]] = add(r{{[0-9]+}},#8)
> +
> +; Function Attrs: nounwind
> +define void @f0(i16* nocapture readonly %a0) #0 {
> +b0:
> +  %v0 = alloca [129 x i32], align 8
> +  br i1 undef, label %b1, label %b3
> +
> +b1:                                               ; preds = %b0
> +  br label %b2
> +
> +b2:                                               ; preds = %b2, %b1
> +  %v1 = phi i16* [ %a0, %b1 ], [ %v2, %b2 ]
> +  %v2 = phi i16* [ undef, %b1 ], [ %v15, %b2 ]
> +  %v3 = phi i32* [ null, %b1 ], [ %v4, %b2 ]
> +  %v4 = phi i32* [ null, %b1 ], [ %v14, %b2 ]
> +  %v5 = phi i32 [ 0, %b1 ], [ %v13, %b2 ]
> +  %v6 = phi i16* [ undef, %b1 ], [ %v12, %b2 ]
> +  %v7 = load i16, i16* %v2, align 2
> +  %v8 = sext i16 %v7 to i32
> +  %v9 = call i32 @llvm.hexagon.M2.mpy.ll.s0(i32 %v8, i32 %v8) #2
> +  %v10 = load i16, i16* %v6, align 2
> +  %v11 = call i32 @llvm.hexagon.M2.mpy.acc.sat.ll.s0(i32 %v9, i32 undef,
> i32 undef) #2
> +  store i32 %v11, i32* %v4, align 4
> +  %v12 = getelementptr inbounds i16, i16* %v6, i32 -1
> +  %v13 = add i32 %v5, 1
> +  %v14 = getelementptr inbounds i32, i32* %v3, i32 2
> +  %v15 = getelementptr inbounds i16, i16* %v1, i32 2
> +  %v16 = icmp slt i32 %v13, undef
> +  br i1 %v16, label %b2, label %b3
> +
> +b3:                                               ; preds = %b2, %b0
> +  unreachable
> +}
> +
> +; Function Attrs: nounwind readnone
> +declare i32 @llvm.hexagon.M2.mpy.ll.s0(i32, i32) #1
> +
> +; Function Attrs: nounwind readnone
> +declare i32 @llvm.hexagon.M2.mpy.acc.sat.ll.s0(i32, i32, i32) #1
> +
> +attributes #0 = { nounwind "target-cpu"="hexagonv60" }
> +attributes #1 = { nounwind readnone }
> +attributes #2 = { nounwind }
>
> Added: llvm/trunk/test/CodeGen/Hexagon/swp-phi-ref.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/
> CodeGen/Hexagon/swp-phi-ref.ll?rev=328023&view=auto
> ============================================================
> ==================
> --- llvm/trunk/test/CodeGen/Hexagon/swp-phi-ref.ll (added)
> +++ llvm/trunk/test/CodeGen/Hexagon/swp-phi-ref.ll Tue Mar 20 12:35:09
> 2018
> @@ -0,0 +1,45 @@
> +; RUN: llc -march=hexagon -enable-pipeliner -enable-bsb-sched=0
> -join-liveintervals=false < %s | FileCheck %s
> +
> +; Test that we generate the correct Phi values when there is a Phi that
> +; references another Phi. We need to examine the other Phi to get the
> +; correct value. We need to do this even if we haven't generated the
> +; kernel code for the other Phi yet.
> +
> +; CHECK: [[REG0:(v[0-9]+)]] = [[REG1:(v[0-9]+)]]
> +; CHECK: loop0
> +; CHECK: [[REG0]] = [[REG1]]
> +; CHECK: endloop0
> +
> +; Function Attrs: nounwind
> +define void @f0() #0 {
> +b0:
> +  br i1 undef, label %b1, label %b2
> +
> +b1:                                               ; preds = %b1, %b0
> +  %v0 = phi i32 [ %v7, %b1 ], [ 0, %b0 ]
> +  %v1 = phi <16 x i32> [ %v4, %b1 ], [ undef, %b0 ]
> +  %v2 = phi <16 x i32> [ %v1, %b1 ], [ undef, %b0 ]
> +  %v3 = tail call <16 x i32> @llvm.hexagon.V6.valignb(<16 x i32> %v1, <16
> x i32> %v2, i32 62)
> +  %v4 = tail call <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32> undef, <16
> x i32> undef)
> +  %v5 = tail call <16 x i32> @llvm.hexagon.V6.valignb(<16 x i32> %v4, <16
> x i32> %v1, i32 2)
> +  %v6 = tail call <16 x i32> @llvm.hexagon.V6.vabsdiffh(<16 x i32> %v3,
> <16 x i32> %v5)
> +  store <16 x i32> %v6, <16 x i32>* null, align 64
> +  %v7 = add nsw i32 %v0, 1
> +  %v8 = icmp slt i32 %v7, undef
> +  br i1 %v8, label %b1, label %b2
> +
> +b2:                                               ; preds = %b1, %b0
> +  ret void
> +}
> +
> +; Function Attrs: nounwind readnone
> +declare <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32>, <16 x i32>) #1
> +
> +; Function Attrs: nounwind readnone
> +declare <16 x i32> @llvm.hexagon.V6.valignb(<16 x i32>, <16 x i32>, i32)
> #1
> +
> +; Function Attrs: nounwind readnone
> +declare <16 x i32> @llvm.hexagon.V6.vabsdiffh(<16 x i32>, <16 x i32>) #1
> +
> +attributes #0 = { nounwind "target-cpu"="hexagonv60"
> "target-features"="+hvxv60,+hvx-length64b" }
> +attributes #1 = { nounwind readnone }
>
> Added: llvm/trunk/test/CodeGen/Hexagon/swp-phi-start.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/
> CodeGen/Hexagon/swp-phi-start.ll?rev=328023&view=auto
> ============================================================
> ==================
> --- llvm/trunk/test/CodeGen/Hexagon/swp-phi-start.ll (added)
> +++ llvm/trunk/test/CodeGen/Hexagon/swp-phi-start.ll Tue Mar 20 12:35:09
> 2018
> @@ -0,0 +1,44 @@
> +; RUN: llc -march=hexagon -enable-pipeliner -pipeliner-max-stages=2
> -disable-packetizer < %s | FileCheck %s
> +
> +; Test that the early start and late start values are computed correctly
> +; when a Phi depends on another Phi. In this case, they should occur in
> +; the same stage.
> +
> +; CHECK-DAG: [[REG3:(r[0-9]+)]] = add([[REG1:(r[0-9]+)]],#-1)
> +; CHECK-DAG: [[REG2:(r[0-9]+)]] = add([[REG1]],#-1)
> +; CHECK-DAG: loop0(.LBB0_[[LOOP:.]],[[REG3]])
> +; CHECK-NOT: = [[REG2]]
> +; CHECK: .LBB0_[[LOOP]]:
> +; CHECK: }{{[ \t]*}}:endloop
> +
> +; Function Attrs: nounwind
> +define void @f0(i32 %a0, i16* nocapture %a1) #0 {
> +b0:
> +  br i1 undef, label %b1, label %b2
> +
> +b1:                                               ; preds = %b0
> +  %v0 = add nsw i32 undef, -8
> +  br i1 undef, label %b3, label %b2
> +
> +b2:                                               ; preds = %b2, %b1, %b0
> +  %v1 = phi i32 [ %v7, %b2 ], [ undef, %b0 ], [ %v0, %b1 ]
> +  %v2 = phi i32 [ %v1, %b2 ], [ %a0, %b0 ], [ undef, %b1 ]
> +  %v3 = add nsw i32 %v2, -2
> +  %v4 = getelementptr inbounds i16, i16* %a1, i32 %v3
> +  %v5 = load i16, i16* %v4, align 2, !tbaa !0
> +  %v6 = getelementptr inbounds i16, i16* %a1, i32 %v1
> +  store i16 %v5, i16* %v6, align 2, !tbaa !0
> +  %v7 = add nsw i32 %v1, -1
> +  %v8 = icmp sgt i32 %v7, 0
> +  br i1 %v8, label %b2, label %b3
> +
> +b3:                                               ; preds = %b2, %b1
> +  ret void
> +}
> +
> +attributes #0 = { nounwind "target-cpu"="hexagonv55" }
> +
> +!0 = !{!1, !1, i64 0}
> +!1 = !{!"short", !2, i64 0}
> +!2 = !{!"omnipotent char", !3, i64 0}
> +!3 = !{!"Simple C/C++ TBAA"}
>
> Added: llvm/trunk/test/CodeGen/Hexagon/swp-rename.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/
> CodeGen/Hexagon/swp-rename.ll?rev=328023&view=auto
> ============================================================
> ==================
> --- llvm/trunk/test/CodeGen/Hexagon/swp-rename.ll (added)
> +++ llvm/trunk/test/CodeGen/Hexagon/swp-rename.ll Tue Mar 20 12:35:09 2018
> @@ -0,0 +1,30 @@
> +; RUN: llc -march=hexagon -enable-pipeliner < %s | FileCheck %s
> +
> +; A test that the Phi rewrite logic is correct.
> +
> +; CHECK: [[REG0:(r[0-9]+)]] = #0
> +; CHECK: loop0(.LBB0_[[LOOP:.]],
> +; CHECK: .LBB0_[[LOOP]]:
> +; CHECK: memh([[REG0]]+#0) = #0
> +
> +define void @f0() #0 {
> +b0:
> +  %v0 = add i32 undef, -4
> +  br label %b1
> +
> +b1:                                               ; preds = %b1, %b0
> +  %v1 = phi i16* [ %v4, %b1 ], [ null, %b0 ]
> +  %v2 = phi i32 [ %v5, %b1 ], [ 0, %b0 ]
> +  %v3 = getelementptr inbounds i16, i16* %v1, i32 1
> +  store i16 0, i16* %v1, align 2
> +  %v4 = getelementptr inbounds i16, i16* %v1, i32 2
> +  store i16 0, i16* %v3, align 2
> +  %v5 = add nsw i32 %v2, 8
> +  %v6 = icmp slt i32 %v5, %v0
> +  br i1 %v6, label %b1, label %b2
> +
> +b2:                                               ; preds = %b1
> +  ret void
> +}
> +
> +attributes #0 = { nounwind "target-cpu"="hexagonv55" }
>
> Added: llvm/trunk/test/CodeGen/Hexagon/swp-xxh2.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/
> CodeGen/Hexagon/swp-xxh2.ll?rev=328023&view=auto
> ============================================================
> ==================
> --- llvm/trunk/test/CodeGen/Hexagon/swp-xxh2.ll (added)
> +++ llvm/trunk/test/CodeGen/Hexagon/swp-xxh2.ll Tue Mar 20 12:35:09 2018
> @@ -0,0 +1,57 @@
> +; RUN: llc -march=hexagon -enable-pipeliner -debug-only=pipeliner < %s -o
> - 2>&1 > /dev/null | FileCheck %s
> +; REQUIRES: asserts
> +
> +; Fix bug when pipelining xxh benchmark at O3, mv55, and with
> vectorization.
> +; The problem is choosing the correct name for the Phis in the epilog.
> +
> +; CHECK: New block
> +; CHECK: %{{.*}}, %[[REG:([0-9]+)]]{{.*}} = L2_loadri_pi
> +; CHECK: epilog:
> +; CHECK: = PHI
> +; CHECK-NOT: = PHI %{{[0-9]+}}, {{.*}}, %[[REG]]
> +; CHECK: = PHI
> +
> +; Function Attrs: nounwind
> +define void @f0(i32 %a0, i32* %a1) #0 {
> +b0:
> +  %v0 = ashr i32 %a0, 1
> +  br label %b1
> +
> +b1:                                               ; preds = %b1, %b0
> +  %v1 = phi i64 [ %v8, %b1 ], [ undef, %b0 ]
> +  %v2 = phi i32 [ %v9, %b1 ], [ 0, %b0 ]
> +  %v3 = phi i32 [ %v7, %b1 ], [ undef, %b0 ]
> +  %v4 = inttoptr i32 %v3 to i32*
> +  %v5 = load i32, i32* %v4, align 4, !tbaa !0
> +  %v6 = tail call i64 @llvm.hexagon.S2.packhl(i32 %v5, i32 undef)
> +  %v7 = add nsw i32 %v3, -16
> +  %v8 = tail call i64 @llvm.hexagon.M2.vdmacs.s0(i64 %v1, i64 undef, i64
> %v6)
> +  %v9 = add nsw i32 %v2, 1
> +  %v10 = icmp eq i32 %v9, %v0
> +  br i1 %v10, label %b2, label %b1
> +
> +b2:                                               ; preds = %b1
> +  %v11 = trunc i64 %v8 to i32
> +  %v12 = getelementptr inbounds i32, i32* %a1, i32 8
> +  store i32 %v11, i32* %v12, align 4, !tbaa !0
> +  call void @llvm.trap()
> +  unreachable
> +}
> +
> +; Function Attrs: nounwind readnone
> +declare i64 @llvm.hexagon.M2.vdmacs.s0(i64, i64, i64) #1
> +
> +; Function Attrs: nounwind readnone
> +declare i64 @llvm.hexagon.S2.packhl(i32, i32) #1
> +
> +; Function Attrs: noreturn nounwind
> +declare void @llvm.trap() #2
> +
> +attributes #0 = { nounwind "target-cpu"="hexagonv55" }
> +attributes #1 = { nounwind readnone }
> +attributes #2 = { noreturn nounwind }
> +
> +!0 = !{!1, !1, i64 0}
> +!1 = !{!"int", !2, i64 0}
> +!2 = !{!"omnipotent char", !3, i64 0}
> +!3 = !{!"Simple C/C++ TBAA"}
>
> Added: llvm/trunk/test/CodeGen/Hexagon/vect-downscale.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/
> CodeGen/Hexagon/vect-downscale.ll?rev=328023&view=auto
> ============================================================
> ==================
> --- llvm/trunk/test/CodeGen/Hexagon/vect-downscale.ll (added)
> +++ llvm/trunk/test/CodeGen/Hexagon/vect-downscale.ll Tue Mar 20 12:35:09
> 2018
> @@ -0,0 +1,177 @@
> +; RUN: llc -march=hexagon < %s | FileCheck %s
> +
> +; Make sure we generate a hardware loop and pipeline the inner loop using
> +; 4 packets, which is equivalent to the hand-coded version.
> +
> +; CHECK: loop0(.LBB0_[[LOOP:.]],
> +; CHECK: .LBB0_[[LOOP]]:
> +; CHECK: {
> +; CHECK: }
> +; CHECK: {
> +; CHECK: }
> +; CHECK: {
> +; CHECK: }
> +; CHECK: {
> +; CHECK-NOT: }
> +; CHECK: }{{[ \t]*}}:endloop0
> +
> +define void @f0(i8* noalias %a0, i32 %a1, i32 %a2, i32 %a3, i8* noalias
> nocapture %a4, i32 %a5, i32 %a6) #0 {
> +b0:
> +  %v0 = tail call <16 x i32> @llvm.hexagon.V6.lvsplatw(i32 8388736)
> +  %v1 = zext i32 %a3 to i64
> +  %v2 = shl nuw i64 %v1, 32
> +  %v3 = zext i32 %a1 to i64
> +  %v4 = shl nuw nsw i64 %v3, 16
> +  %v5 = or i64 %v4, %v2
> +  %v6 = or i64 %v5, 281474976710658
> +  tail call void asm sideeffect "    l2fetch($0, $1)\0A", "r,r"(i8* %a0,
> i64 %v6) #2, !srcloc !0
> +  %v7 = tail call i32 @llvm.hexagon.S2.ct0(i32 %a6)
> +  %v8 = add i32 %v7, 1
> +  %v9 = lshr i32 %a1, %v8
> +  %v10 = mul i32 %a6, 2
> +  %v11 = mul i32 %v10, %v9
> +  %v12 = sub i32 %a1, %v11
> +  %v13 = lshr i32 %v12, 1
> +  %v14 = tail call <512 x i1> @llvm.hexagon.V6.pred.scalar2(i32 %v13)
> +  %v15 = icmp eq i32 %a2, 0
> +  br i1 %v15, label %b11, label %b1
> +
> +b1:                                               ; preds = %b0
> +  %v16 = mul i32 %a3, 2
> +  %v17 = icmp eq i32 %v9, 0
> +  %v18 = icmp eq i32 %v11, %a1
> +  %v19 = icmp ugt i32 %v12, %a6
> +  %v20 = mul i32 %v9, 64
> +  %v21 = getelementptr i8, i8* %a4, i32 %v20
> +  %v22 = mul i32 %v9, 128
> +  %v23 = add i32 %v22, %a3
> +  %v24 = getelementptr i8, i8* %a0, i32 %v23
> +  %v25 = getelementptr i8, i8* %a0, i32 %v22
> +  br label %b2
> +
> +b2:                                               ; preds = %b10, %b1
> +  %v26 = phi i8* [ %v25, %b1 ], [ %v90, %b10 ]
> +  %v27 = phi i8* [ %v24, %b1 ], [ %v89, %b10 ]
> +  %v28 = phi i8* [ %v21, %b1 ], [ %v88, %b10 ]
> +  %v29 = phi <16 x i32> [ undef, %b1 ], [ %v85, %b10 ]
> +  %v30 = phi <16 x i32> [ undef, %b1 ], [ %v84, %b10 ]
> +  %v31 = phi i8* [ %a0, %b1 ], [ %v86, %b10 ]
> +  %v32 = phi i8* [ %a4, %b1 ], [ %v87, %b10 ]
> +  %v33 = phi i32 [ 0, %b1 ], [ %v37, %b10 ]
> +  %v34 = bitcast i8* %v26 to <16 x i32>*
> +  %v35 = bitcast i8* %v27 to <16 x i32>*
> +  %v36 = bitcast i8* %v28 to <16 x i32>*
> +  %v37 = add nsw i32 %v33, 2
> +  %v38 = icmp ult i32 %v37, %a2
> +  br i1 %v38, label %b3, label %b4
> +
> +b3:                                               ; preds = %b2
> +  %v39 = getelementptr inbounds i8, i8* %v31, i32 %v16
> +  tail call void asm sideeffect "    l2fetch($0, $1)\0A", "r,r"(i8* %v39,
> i64 %v6) #2, !srcloc !1
> +  br label %b4
> +
> +b4:                                               ; preds = %b3, %b2
> +  %v40 = bitcast i8* %v32 to <16 x i32>*
> +  %v41 = bitcast i8* %v31 to <16 x i32>*
> +  %v42 = getelementptr inbounds i8, i8* %v31, i32 %a3
> +  %v43 = bitcast i8* %v42 to <16 x i32>*
> +  br i1 %v17, label %b6, label %b5
> +
> +b5:                                               ; preds = %b5, %b4
> +  %v44 = phi <16 x i32>* [ %v54, %b5 ], [ %v43, %b4 ]
> +  %v45 = phi <16 x i32>* [ %v52, %b5 ], [ %v41, %b4 ]
> +  %v46 = phi <16 x i32>* [ %v61, %b5 ], [ %v40, %b4 ]
> +  %v47 = phi i32 [ %v62, %b5 ], [ 0, %b4 ]
> +  %v48 = getelementptr inbounds <16 x i32>, <16 x i32>* %v45, i32 1
> +  %v49 = load <16 x i32>, <16 x i32>* %v45, align 64, !tbaa !2
> +  %v50 = getelementptr inbounds <16 x i32>, <16 x i32>* %v44, i32 1
> +  %v51 = load <16 x i32>, <16 x i32>* %v44, align 64, !tbaa !2
> +  %v52 = getelementptr inbounds <16 x i32>, <16 x i32>* %v45, i32 2
> +  %v53 = load <16 x i32>, <16 x i32>* %v48, align 64, !tbaa !2
> +  %v54 = getelementptr inbounds <16 x i32>, <16 x i32>* %v44, i32 2
> +  %v55 = load <16 x i32>, <16 x i32>* %v50, align 64, !tbaa !2
> +  %v56 = tail call <16 x i32> @llvm.hexagon.V6.vdmpybus.acc(<16 x i32>
> %v0, <16 x i32> %v49, i32 1077952576)
> +  %v57 = tail call <16 x i32> @llvm.hexagon.V6.vdmpybus.acc(<16 x i32>
> %v0, <16 x i32> %v53, i32 1077952576)
> +  %v58 = tail call <16 x i32> @llvm.hexagon.V6.vdmpybus.acc(<16 x i32>
> %v56, <16 x i32> %v51, i32 1077952576)
> +  %v59 = tail call <16 x i32> @llvm.hexagon.V6.vdmpybus.acc(<16 x i32>
> %v57, <16 x i32> %v55, i32 1077952576)
> +  %v60 = tail call <16 x i32> @llvm.hexagon.V6.vpackob(<16 x i32> %v59,
> <16 x i32> %v58)
> +  %v61 = getelementptr inbounds <16 x i32>, <16 x i32>* %v46, i32 1
> +  store <16 x i32> %v60, <16 x i32>* %v46, align 64, !tbaa !2
> +  %v62 = add nsw i32 %v47, 1
> +  %v63 = icmp eq i32 %v62, %v9
> +  br i1 %v63, label %b6, label %b5
> +
> +b6:                                               ; preds = %b5, %b4
> +  %v64 = phi <16 x i32> [ %v29, %b4 ], [ %v55, %b5 ]
> +  %v65 = phi <16 x i32> [ %v30, %b4 ], [ %v53, %b5 ]
> +  %v66 = phi <16 x i32>* [ %v43, %b4 ], [ %v35, %b5 ]
> +  %v67 = phi <16 x i32>* [ %v41, %b4 ], [ %v34, %b5 ]
> +  %v68 = phi <16 x i32>* [ %v40, %b4 ], [ %v36, %b5 ]
> +  br i1 %v18, label %b10, label %b7
> +
> +b7:                                               ; preds = %b6
> +  %v69 = load <16 x i32>, <16 x i32>* %v67, align 64, !tbaa !2
> +  %v70 = load <16 x i32>, <16 x i32>* %v66, align 64, !tbaa !2
> +  br i1 %v19, label %b8, label %b9
> +
> +b8:                                               ; preds = %b7
> +  %v71 = getelementptr inbounds <16 x i32>, <16 x i32>* %v66, i32 1
> +  %v72 = getelementptr inbounds <16 x i32>, <16 x i32>* %v67, i32 1
> +  %v73 = load <16 x i32>, <16 x i32>* %v72, align 64, !tbaa !2
> +  %v74 = load <16 x i32>, <16 x i32>* %v71, align 64, !tbaa !2
> +  br label %b9
> +
> +b9:                                               ; preds = %b8, %b7
> +  %v75 = phi <16 x i32> [ %v73, %b8 ], [ %v65, %b7 ]
> +  %v76 = phi <16 x i32> [ %v74, %b8 ], [ %v64, %b7 ]
> +  %v77 = tail call <16 x i32> @llvm.hexagon.V6.vdmpybus.acc(<16 x i32>
> %v0, <16 x i32> %v69, i32 1077952576)
> +  %v78 = tail call <16 x i32> @llvm.hexagon.V6.vdmpybus.acc(<16 x i32>
> %v0, <16 x i32> %v75, i32 1077952576)
> +  %v79 = tail call <16 x i32> @llvm.hexagon.V6.vdmpybus.acc(<16 x i32>
> %v77, <16 x i32> %v70, i32 1077952576)
> +  %v80 = tail call <16 x i32> @llvm.hexagon.V6.vdmpybus.acc(<16 x i32>
> %v78, <16 x i32> %v76, i32 1077952576)
> +  %v81 = tail call <16 x i32> @llvm.hexagon.V6.vpackob(<16 x i32> %v80,
> <16 x i32> %v79)
> +  %v82 = load <16 x i32>, <16 x i32>* %v68, align 64, !tbaa !2
> +  %v83 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v14, <16
> x i32> %v81, <16 x i32> %v82)
> +  store <16 x i32> %v83, <16 x i32>* %v68, align 64, !tbaa !2
> +  br label %b10
> +
> +b10:                                              ; preds = %b9, %b6
> +  %v84 = phi <16 x i32> [ %v75, %b9 ], [ %v65, %b6 ]
> +  %v85 = phi <16 x i32> [ %v76, %b9 ], [ %v64, %b6 ]
> +  %v86 = getelementptr inbounds i8, i8* %v31, i32 %v16
> +  %v87 = getelementptr inbounds i8, i8* %v32, i32 %a5
> +  %v88 = getelementptr i8, i8* %v28, i32 %a5
> +  %v89 = getelementptr i8, i8* %v27, i32 %v16
> +  %v90 = getelementptr i8, i8* %v26, i32 %v16
> +  br i1 %v38, label %b2, label %b11
> +
> +b11:                                              ; preds = %b10, %b0
> +  ret void
> +}
> +
> +; Function Attrs: nounwind readnone
> +declare <16 x i32> @llvm.hexagon.V6.lvsplatw(i32) #1
> +
> +; Function Attrs: nounwind readnone
> +declare i32 @llvm.hexagon.S2.ct0(i32) #1
> +
> +; Function Attrs: nounwind readnone
> +declare <512 x i1> @llvm.hexagon.V6.pred.scalar2(i32) #1
> +
> +; Function Attrs: nounwind readnone
> +declare <16 x i32> @llvm.hexagon.V6.vdmpybus.acc(<16 x i32>, <16 x i32>,
> i32) #1
> +
> +; Function Attrs: nounwind readnone
> +declare <16 x i32> @llvm.hexagon.V6.vpackob(<16 x i32>, <16 x i32>) #1
> +
> +; Function Attrs: nounwind readnone
> +declare <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1>, <16 x i32>, <16 x
> i32>) #1
> +
> +attributes #0 = { nounwind "target-cpu"="hexagonv60"
> "target-features"="+hvxv60,+hvx-length64b" }
> +attributes #1 = { nounwind readnone }
> +attributes #2 = { nounwind }
> +
> +!0 = !{i32 -2146401371}
> +!1 = !{i32 -2146401153}
> +!2 = !{!3, !3, i64 0}
> +!3 = !{!"omnipotent char", !4, i64 0}
> +!4 = !{!"Simple C/C++ TBAA"}
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20180410/6bf91cde/attachment-0001.html>