[llvm] Swedev 414443 (PR #65947)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 11 03:58:07 PDT 2023
https://github.com/petar-avramovic created https://github.com/llvm/llvm-project/pull/65947:
None
>From 2dda14f6e55bac312b44f45da42a79c111498491 Mon Sep 17 00:00:00 2001
From: Petar Avramovic <Petar.Avramovic at amd.com>
Date: Thu, 7 Sep 2023 15:25:34 +0200
Subject: [PATCH 1/2] MachineSink/AMDGPU: Add test for SWEDEV-414443
---
.../AMDGPU/machine-sink-swdev414443.mir | 4102 +++++++++++++++++
1 file changed, 4102 insertions(+)
create mode 100644 llvm/test/CodeGen/AMDGPU/machine-sink-swdev414443.mir
diff --git a/llvm/test/CodeGen/AMDGPU/machine-sink-swdev414443.mir b/llvm/test/CodeGen/AMDGPU/machine-sink-swdev414443.mir
new file mode 100644
index 00000000000000..84fc2a619a5c70
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/machine-sink-swdev414443.mir
@@ -0,0 +1,4102 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -run-pass=machine-sink -o - %s | FileCheck %s
+
+--- |
+ source_filename = "/work/mselehov/rocBLAS/library/src/blas2/rocblas_gemv_kernels.cpp"
+ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8"
+ target triple = "amdgcn-amd-amdhsa"
+
+ %llvm.amdgcn.kernel._ZL20rocblas_gemvn_kernelILi64ELi16EiffffEviiT3_lPKT4_lT1_lS3_lilS0_lPT5_lil.lds.t = type { [4096 x float] }
+ %llvm.amdgcn.kernel._ZL20rocblas_gemvn_kernelILi64ELi16EmffffEviiT3_lPKT4_lT1_lS3_lilS0_lPT5_lil.lds.t = type { [4096 x float] }
+
+ $_ZL20rocblas_gemvn_kernelILi64ELi16EiffffEviiT3_lPKT4_lT1_lS3_lilS0_lPT5_lil = comdat any
+
+ $_ZL20rocblas_gemvn_kernelILi64ELi16EmffffEviiT3_lPKT4_lT1_lS3_lilS0_lPT5_lil = comdat any
+
+ @llvm.amdgcn.kernel._ZL20rocblas_gemvn_kernelILi64ELi16EiffffEviiT3_lPKT4_lT1_lS3_lilS0_lPT5_lil.lds = internal addrspace(3) global %llvm.amdgcn.kernel._ZL20rocblas_gemvn_kernelILi64ELi16EiffffEviiT3_lPKT4_lT1_lS3_lilS0_lPT5_lil.lds.t undef, align 16, !absolute_symbol !0
+ @llvm.amdgcn.kernel._ZL20rocblas_gemvn_kernelILi64ELi16EmffffEviiT3_lPKT4_lT1_lS3_lilS0_lPT5_lil.lds = internal addrspace(3) global %llvm.amdgcn.kernel._ZL20rocblas_gemvn_kernelILi64ELi16EmffffEviiT3_lPKT4_lT1_lS3_lilS0_lPT5_lil.lds.t undef, align 16, !absolute_symbol !0
+
+ define amdgpu_kernel void @_ZL20rocblas_gemvn_kernelILi64ELi16EiffffEviiT3_lPKT4_lT1_lS3_lilS0_lPT5_lil(i32 noundef %0, i32 noundef %1, float noundef %2, i64 noundef %3, ptr addrspace(1) nocapture noundef readonly %4, i64 noundef %5, i32 noundef %6, i64 noundef %7, ptr addrspace(1) nocapture noundef readonly %8, i64 noundef %9, i32 noundef %10, i64 noundef %11, float noundef %12, i64 noundef %13, ptr addrspace(1) nocapture noundef %14, i64 noundef %15, i32 noundef %16, i64 noundef %17) local_unnamed_addr #0 comdat {
+ %19 = tail call i32 @llvm.amdgcn.workgroup.id.x()
+ %20 = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr(), !amdgpu.uniform !5
+ %21 = load <3 x i32>, ptr addrspace(4) %20, align 4, !tbaa !6
+ %22 = extractelement <3 x i32> %21, i32 0
+ %23 = extractelement <3 x i32> %21, i32 1
+ %24 = extractelement <3 x i32> %21, i32 2
+ %25 = icmp ult i32 %19, %22
+ %26 = select i1 %25, i64 6, i64 9
+ %27 = getelementptr inbounds i16, ptr addrspace(4) %20, i64 %26, !amdgpu.uniform !5
+ %28 = load i16, ptr addrspace(4) %27, align 2, !tbaa !10
+ %29 = zext i16 %28 to i32
+ %30 = tail call i32 @llvm.amdgcn.workgroup.id.y()
+ %31 = icmp ult i32 %30, %23
+ %32 = select i1 %31, i64 7, i64 10
+ %33 = getelementptr inbounds i16, ptr addrspace(4) %20, i64 %32, !amdgpu.uniform !5
+ %34 = load i16, ptr addrspace(4) %33, align 2, !tbaa !10
+ %35 = zext i16 %34 to i32
+ %36 = mul nuw i32 %35, %29
+ %37 = tail call i32 @llvm.amdgcn.workgroup.id.z()
+ %38 = icmp ult i32 %37, %24
+ %39 = select i1 %38, i64 8, i64 11
+ %40 = getelementptr inbounds i16, ptr addrspace(4) %20, i64 %39, !amdgpu.uniform !5
+ %41 = load i16, ptr addrspace(4) %40, align 2, !tbaa !10
+ %42 = zext i16 %41 to i32
+ %43 = mul i32 %36, %42
+ %.not = icmp eq i32 %43, 1024
+ br i1 %.not, label %44, label %_Z25rocblas_gemvn_kernel_calcILi64ELi16EiffLi0EEviiT3_PKT2_T1_S3_iS0_PS1_i.exit, !amdgpu.uniform !5
+
+ 44: ; preds = %18
+ %_ZL20rocblas_gemvn_kernelILi64ELi16EiffffEviiT3_lPKT4_lT1_lS3_lilS0_lPT5_lil.kernarg.segment = call nonnull align 16 dereferenceable(392) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
+ %.kernarg.offset65 = bitcast ptr addrspace(4) %_ZL20rocblas_gemvn_kernelILi64ELi16EiffffEviiT3_lPKT4_lT1_lS3_lilS0_lPT5_lil.kernarg.segment to ptr addrspace(4), !amdgpu.uniform !5
+ %45 = load <3 x i32>, ptr addrspace(4) %.kernarg.offset65, align 16, !invariant.load !5
+ %.load3868 = extractelement <3 x i32> %45, i32 2
+ %46 = bitcast i32 %.load3868 to float
+ %.kernarg.offset55 = getelementptr inbounds i8, ptr addrspace(4) %_ZL20rocblas_gemvn_kernelILi64ELi16EiffffEviiT3_lPKT4_lT1_lS3_lilS0_lPT5_lil.kernarg.segment, i64 88, !amdgpu.uniform !5
+ %.load56 = load float, ptr addrspace(4) %.kernarg.offset55, align 8, !invariant.load !5
+ %47 = fcmp contract oeq float %46, 0.000000e+00
+ %48 = fcmp contract oeq float %.load56, 1.000000e+00
+ %or.cond = and i1 %47, %48
+ %or.cond.inv = xor i1 %or.cond, true
+ br i1 %or.cond.inv, label %49, label %Flow97, !amdgpu.uniform !5
+
+ 49: ; preds = %44
+ %.load66 = extractelement <3 x i32> %45, i32 0
+ %.kernarg.offset57 = getelementptr inbounds i8, ptr addrspace(4) %_ZL20rocblas_gemvn_kernelILi64ELi16EiffffEviiT3_lPKT4_lT1_lS3_lilS0_lPT5_lil.kernarg.segment, i64 104, !amdgpu.uniform !5
+ %50 = load <2 x i64>, ptr addrspace(4) %.kernarg.offset57, align 8, !invariant.load !5
+ %.load5874 = extractelement <2 x i64> %50, i32 0
+ %51 = inttoptr i64 %.load5874 to ptr addrspace(1)
+ %.load6075 = extractelement <2 x i64> %50, i32 1
+ %.kernarg.offset61 = getelementptr inbounds i8, ptr addrspace(4) %_ZL20rocblas_gemvn_kernelILi64ELi16EiffffEviiT3_lPKT4_lT1_lS3_lilS0_lPT5_lil.kernarg.segment, i64 120, !amdgpu.uniform !5
+ %.load62 = load i32, ptr addrspace(4) %.kernarg.offset61, align 8, !invariant.load !5
+ %.kernarg.offset63 = getelementptr inbounds i8, ptr addrspace(4) %_ZL20rocblas_gemvn_kernelILi64ELi16EiffffEviiT3_lPKT4_lT1_lS3_lilS0_lPT5_lil.kernarg.segment, i64 128, !amdgpu.uniform !5
+ %.load64 = load i64, ptr addrspace(4) %.kernarg.offset63, align 16, !invariant.load !5
+ %52 = sext i32 %30 to i64
+ %53 = mul nsw i64 %52, %.load64
+ %54 = getelementptr inbounds float, ptr addrspace(1) %51, i64 %53
+ %55 = getelementptr inbounds float, ptr addrspace(1) %54, i64 %.load6075
+ %56 = tail call i32 @llvm.amdgcn.workitem.id.x(), !range !12, !noundef !5
+ %57 = tail call i32 @llvm.amdgcn.workitem.id.y(), !range !12, !noundef !5
+ %58 = call i32 @llvm.amdgcn.mul.u24(i32 %57, i32 %29)
+ %59 = add nuw nsw i32 %58, %56
+ %60 = fcmp contract oeq float %46, 0.000000e+00
+ br i1 %60, label %61, label %Flow90, !amdgpu.uniform !5
+
+ 61: ; preds = %49
+ %62 = icmp ult i32 %59, 256
+ %63 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %62)
+ %64 = extractvalue { i1, i64 } %63, 0
+ %65 = extractvalue { i1, i64 } %63, 1
+ br i1 %64, label %70, label %Flow91
+
+ Flow90: ; preds = %Flow91, %49
+ %66 = phi float [ %77, %Flow91 ], [ undef, %49 ]
+ %67 = phi i64 [ %78, %Flow91 ], [ undef, %49 ]
+ %68 = phi i1 [ %79, %Flow91 ], [ false, %49 ]
+ %69 = phi i1 [ false, %Flow91 ], [ true, %49 ]
+ br i1 %69, label %91, label %Flow93, !amdgpu.uniform !5
+
+ 70: ; preds = %61
+ %71 = shl i32 %19, 8
+ %72 = add nuw i32 %59, %71
+ %73 = icmp slt i32 %72, %.load66
+ %74 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %73)
+ %75 = extractvalue { i1, i64 } %74, 0
+ %76 = extractvalue { i1, i64 } %74, 1
+ br i1 %75, label %80, label %Flow92
+
+ Flow91: ; preds = %Flow92, %61
+ %77 = phi float [ %88, %Flow92 ], [ undef, %61 ]
+ %78 = phi i64 [ %89, %Flow92 ], [ undef, %61 ]
+ %79 = phi i1 [ %90, %Flow92 ], [ false, %61 ]
+ call void @llvm.amdgcn.end.cf.i64(i64 %65)
+ br label %Flow90, !amdgpu.uniform !5
+
+ 80: ; preds = %70
+ %81 = fcmp contract une float %.load56, 0.000000e+00
+ %82 = mul nsw i32 %72, %.load62
+ %83 = sext i32 %82 to i64
+ br i1 %81, label %84, label %Flow, !amdgpu.uniform !5
+
+ 84: ; preds = %80
+ %85 = getelementptr inbounds float, ptr addrspace(1) %55, i64 %83
+ %86 = load float, ptr addrspace(1) %85, align 4, !tbaa !13, !amdgpu.noclobber !5
+ %87 = fmul contract float %86, %.load56
+ br label %Flow, !amdgpu.uniform !5
+
+ Flow92: ; preds = %Flow, %70
+ %88 = phi float [ %511, %Flow ], [ undef, %70 ]
+ %89 = phi i64 [ %83, %Flow ], [ undef, %70 ]
+ %90 = phi i1 [ true, %Flow ], [ false, %70 ]
+ call void @llvm.amdgcn.end.cf.i64(i64 %76)
+ br label %Flow91, !amdgpu.uniform !5
+
+ 91: ; preds = %Flow90
+ %.kernarg.offset39 = getelementptr inbounds i8, ptr addrspace(4) %_ZL20rocblas_gemvn_kernelILi64ELi16EiffffEviiT3_lPKT4_lT1_lS3_lilS0_lPT5_lil.kernarg.segment, i64 24, !amdgpu.uniform !5
+ %92 = load <2 x i64>, ptr addrspace(4) %.kernarg.offset39, align 8, !invariant.load !5
+ %.load4069 = extractelement <2 x i64> %92, i32 0
+ %93 = inttoptr i64 %.load4069 to ptr addrspace(1)
+ %.load4270 = extractelement <2 x i64> %92, i32 1
+ %.kernarg.offset45 = getelementptr inbounds i8, ptr addrspace(4) %_ZL20rocblas_gemvn_kernelILi64ELi16EiffffEviiT3_lPKT4_lT1_lS3_lilS0_lPT5_lil.kernarg.segment, i64 48, !amdgpu.uniform !5
+ %94 = load <3 x i64>, ptr addrspace(4) %.kernarg.offset45, align 16, !invariant.load !5
+ %.load4671 = extractelement <3 x i64> %94, i32 0
+ %.load4872 = extractelement <3 x i64> %94, i32 1
+ %95 = inttoptr i64 %.load4872 to ptr addrspace(1)
+ %.load5073 = extractelement <3 x i64> %94, i32 2
+ %.kernarg.offset53 = getelementptr inbounds i8, ptr addrspace(4) %_ZL20rocblas_gemvn_kernelILi64ELi16EiffffEviiT3_lPKT4_lT1_lS3_lilS0_lPT5_lil.kernarg.segment, i64 80, !amdgpu.uniform !5
+ %.load54 = load i64, ptr addrspace(4) %.kernarg.offset53, align 16, !invariant.load !5
+ %96 = mul nsw i64 %52, %.load4671
+ %97 = getelementptr inbounds float, ptr addrspace(1) %93, i64 %96
+ %98 = getelementptr inbounds float, ptr addrspace(1) %97, i64 %.load4270
+ %99 = mul i64 %52, %.load54
+ %100 = getelementptr inbounds float, ptr addrspace(1) %95, i64 %99
+ %101 = getelementptr inbounds float, ptr addrspace(1) %100, i64 %.load5073
+ %.load3667 = extractelement <3 x i32> %45, i32 1
+ %.kernarg.offset43 = getelementptr inbounds i8, ptr addrspace(4) %_ZL20rocblas_gemvn_kernelILi64ELi16EiffffEviiT3_lPKT4_lT1_lS3_lilS0_lPT5_lil.kernarg.segment, i64 40, !amdgpu.uniform !5
+ %.load44 = load i32, ptr addrspace(4) %.kernarg.offset43, align 8, !invariant.load !5
+ %.kernarg.offset51 = getelementptr inbounds i8, ptr addrspace(4) %_ZL20rocblas_gemvn_kernelILi64ELi16EiffffEviiT3_lPKT4_lT1_lS3_lilS0_lPT5_lil.kernarg.segment, i64 72, !amdgpu.uniform !5
+ %.load52 = load i32, ptr addrspace(4) %.kernarg.offset51, align 8, !invariant.load !5
+ %102 = shl i32 %19, 8
+ %103 = add i32 %102, %56
+ %104 = srem i32 %.load3667, 64
+ %105 = shl nuw nsw i32 %57, 2
+ %106 = sub nsw i32 %.load3667, %104
+ %107 = icmp slt i32 %105, %106
+ %108 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %107)
+ %109 = extractvalue { i1, i64 } %108, 0
+ %110 = extractvalue { i1, i64 } %108, 1
+ br i1 %109, label %.lr.ph.i, label %Flow89
+
+ .lr.ph.i: ; preds = %91
+ %111 = icmp slt i32 %103, %.load66
+ %112 = add nsw i32 %103, 64
+ %113 = icmp slt i32 %112, %.load66
+ %114 = add nsw i32 %103, 128
+ %115 = icmp slt i32 %114, %.load66
+ %116 = add nsw i32 %103, 192
+ %117 = icmp slt i32 %116, %.load66
+ %118 = add nuw nsw i32 %105, 1
+ %119 = mul i32 %.load44, %118
+ %120 = shl i32 %.load44, 6
+ %121 = add nuw nsw i32 %105, 2
+ %122 = mul i32 %.load44, %121
+ %123 = add nuw nsw i32 %105, 3
+ %124 = mul i32 %.load44, %123
+ %125 = mul i32 %57, %.load44
+ %126 = shl i32 %125, 2
+ %127 = mul i32 %.load52, %118
+ %128 = shl i32 %.load52, 6
+ %129 = mul i32 %.load52, %121
+ %130 = mul i32 %.load52, %123
+ %131 = shl i64 %99, 2
+ %132 = shl i64 %.load5073, 2
+ %133 = add i64 %131, %132
+ %134 = mul i32 %57, %.load52
+ %135 = shl i32 %134, 2
+ %136 = sext i32 %135 to i64
+ %137 = shl nsw i64 %136, 2
+ %138 = add i64 %133, %137
+ %scevgep = getelementptr i8, ptr addrspace(1) %95, i64 %138
+ %139 = sext i32 %128 to i64
+ %140 = shl nsw i64 %139, 2
+ br label %146, !amdgpu.uniform !5
+
+ Flow89: ; preds = %Flow88, %91
+ %141 = phi i32 [ %.lcssa98, %Flow88 ], [ %105, %91 ]
+ %142 = phi float [ %.sroa.095.1.i.lcssa, %Flow88 ], [ 0.000000e+00, %91 ]
+ %143 = phi float [ %.sroa.20.1.i.lcssa, %Flow88 ], [ 0.000000e+00, %91 ]
+ %144 = phi float [ %.sroa.38.1.i.lcssa, %Flow88 ], [ 0.000000e+00, %91 ]
+ %145 = phi float [ %.sroa.56.1.i.lcssa, %Flow88 ], [ 0.000000e+00, %91 ]
+ call void @llvm.amdgcn.end.cf.i64(i64 %110)
+ br label %._crit_edge.i, !amdgpu.uniform !5
+
+ 146: ; preds = %.lr.ph.i, %254
+ %phi.broken = phi i64 [ 0, %.lr.ph.i ], [ %257, %254 ]
+ %lsr.iv33 = phi ptr addrspace(1) [ %scevgep34, %254 ], [ %scevgep, %.lr.ph.i ]
+ %lsr.iv31 = phi i32 [ %lsr.iv.next32, %254 ], [ 0, %.lr.ph.i ]
+ %lsr.iv = phi i32 [ %lsr.iv.next, %254 ], [ %103, %.lr.ph.i ]
+ %.0318342.i = phi i32 [ %255, %254 ], [ %105, %.lr.ph.i ]
+ %.sroa.095.0341.i = phi float [ %.sroa.095.1.i, %254 ], [ 0.000000e+00, %.lr.ph.i ]
+ %.sroa.20.0340.i = phi float [ %.sroa.20.1.i, %254 ], [ 0.000000e+00, %.lr.ph.i ]
+ %.sroa.38.0339.i = phi float [ %.sroa.38.1.i, %254 ], [ 0.000000e+00, %.lr.ph.i ]
+ %.sroa.56.0338.i = phi float [ %.sroa.56.1.i, %254 ], [ 0.000000e+00, %.lr.ph.i ]
+ %147 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %111)
+ %148 = extractvalue { i1, i64 } %147, 0
+ %149 = extractvalue { i1, i64 } %147, 1
+ br i1 %148, label %150, label %254
+
+ 150: ; preds = %146
+ %151 = load float, ptr addrspace(1) %lsr.iv33, align 4, !tbaa !13, !amdgpu.noclobber !5
+ %152 = add i32 %127, %lsr.iv31
+ %153 = sext i32 %152 to i64
+ %154 = getelementptr inbounds float, ptr addrspace(1) %101, i64 %153
+ %155 = load float, ptr addrspace(1) %154, align 4, !tbaa !13, !amdgpu.noclobber !5
+ %156 = add i32 %129, %lsr.iv31
+ %157 = sext i32 %156 to i64
+ %158 = getelementptr inbounds float, ptr addrspace(1) %101, i64 %157
+ %159 = load float, ptr addrspace(1) %158, align 4, !tbaa !13, !amdgpu.noclobber !5
+ %160 = add i32 %130, %lsr.iv31
+ %161 = sext i32 %160 to i64
+ %162 = getelementptr inbounds float, ptr addrspace(1) %101, i64 %161
+ %163 = load float, ptr addrspace(1) %162, align 4, !tbaa !13, !amdgpu.noclobber !5
+ %164 = add i32 %126, %lsr.iv
+ %165 = sext i32 %164 to i64
+ %166 = getelementptr inbounds float, ptr addrspace(1) %98, i64 %165
+ %167 = load float, ptr addrspace(1) %166, align 4, !tbaa !13, !amdgpu.noclobber !5
+ %168 = fmul contract float %151, %167
+ %169 = fadd contract float %.sroa.095.0341.i, %168
+ %170 = add i32 %119, %lsr.iv
+ %171 = sext i32 %170 to i64
+ %172 = getelementptr inbounds float, ptr addrspace(1) %98, i64 %171
+ %173 = load float, ptr addrspace(1) %172, align 4, !tbaa !13, !amdgpu.noclobber !5
+ %174 = fmul contract float %155, %173
+ %175 = fadd contract float %169, %174
+ %176 = add i32 %122, %lsr.iv
+ %177 = sext i32 %176 to i64
+ %178 = getelementptr inbounds float, ptr addrspace(1) %98, i64 %177
+ %179 = load float, ptr addrspace(1) %178, align 4, !tbaa !13, !amdgpu.noclobber !5
+ %180 = fmul contract float %159, %179
+ %181 = fadd contract float %175, %180
+ %182 = add i32 %124, %lsr.iv
+ %183 = sext i32 %182 to i64
+ %184 = getelementptr inbounds float, ptr addrspace(1) %98, i64 %183
+ %185 = load float, ptr addrspace(1) %184, align 4, !tbaa !13, !amdgpu.noclobber !5
+ %186 = fmul contract float %163, %185
+ %187 = fadd contract float %181, %186
+ %188 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %113)
+ %189 = extractvalue { i1, i64 } %188, 0
+ %190 = extractvalue { i1, i64 } %188, 1
+ br i1 %189, label %191, label %Flow87
+
+ 191: ; preds = %150
+ %192 = getelementptr inbounds float, ptr addrspace(1) %166, i64 64
+ %193 = load float, ptr addrspace(1) %192, align 4, !tbaa !13, !amdgpu.noclobber !5
+ %194 = fmul contract float %151, %193
+ %195 = fadd contract float %.sroa.20.0340.i, %194
+ %196 = getelementptr inbounds float, ptr addrspace(1) %172, i64 64
+ %197 = load float, ptr addrspace(1) %196, align 4, !tbaa !13, !amdgpu.noclobber !5
+ %198 = fmul contract float %155, %197
+ %199 = fadd contract float %195, %198
+ %200 = getelementptr inbounds float, ptr addrspace(1) %178, i64 64
+ %201 = load float, ptr addrspace(1) %200, align 4, !tbaa !13, !amdgpu.noclobber !5
+ %202 = fmul contract float %159, %201
+ %203 = fadd contract float %199, %202
+ %204 = getelementptr inbounds float, ptr addrspace(1) %184, i64 64
+ %205 = load float, ptr addrspace(1) %204, align 4, !tbaa !13, !amdgpu.noclobber !5
+ %206 = fmul contract float %163, %205
+ %207 = fadd contract float %203, %206
+ %208 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %115)
+ %209 = extractvalue { i1, i64 } %208, 0
+ %210 = extractvalue { i1, i64 } %208, 1
+ br i1 %209, label %211, label %Flow86
+
+ 211: ; preds = %191
+ %212 = getelementptr inbounds float, ptr addrspace(1) %166, i64 128
+ %213 = load float, ptr addrspace(1) %212, align 4, !tbaa !13, !amdgpu.noclobber !5
+ %214 = fmul contract float %151, %213
+ %215 = fadd contract float %.sroa.38.0339.i, %214
+ %216 = getelementptr inbounds float, ptr addrspace(1) %172, i64 128
+ %217 = load float, ptr addrspace(1) %216, align 4, !tbaa !13, !amdgpu.noclobber !5
+ %218 = fmul contract float %155, %217
+ %219 = fadd contract float %215, %218
+ %220 = getelementptr inbounds float, ptr addrspace(1) %178, i64 128
+ %221 = load float, ptr addrspace(1) %220, align 4, !tbaa !13, !amdgpu.noclobber !5
+ %222 = fmul contract float %159, %221
+ %223 = fadd contract float %219, %222
+ %224 = getelementptr inbounds float, ptr addrspace(1) %184, i64 128
+ %225 = load float, ptr addrspace(1) %224, align 4, !tbaa !13, !amdgpu.noclobber !5
+ %226 = fmul contract float %163, %225
+ %227 = fadd contract float %223, %226
+ %228 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %117)
+ %229 = extractvalue { i1, i64 } %228, 0
+ %230 = extractvalue { i1, i64 } %228, 1
+ br i1 %229, label %231, label %Flow85
+
+ 231: ; preds = %211
+ %232 = getelementptr inbounds float, ptr addrspace(1) %166, i64 192
+ %233 = load float, ptr addrspace(1) %232, align 4, !tbaa !13, !amdgpu.noclobber !5
+ %234 = fmul contract float %151, %233
+ %235 = fadd contract float %.sroa.56.0338.i, %234
+ %236 = getelementptr inbounds float, ptr addrspace(1) %172, i64 192
+ %237 = load float, ptr addrspace(1) %236, align 4, !tbaa !13, !amdgpu.noclobber !5
+ %238 = fmul contract float %155, %237
+ %239 = fadd contract float %235, %238
+ %240 = getelementptr inbounds float, ptr addrspace(1) %178, i64 192
+ %241 = load float, ptr addrspace(1) %240, align 4, !tbaa !13, !amdgpu.noclobber !5
+ %242 = fmul contract float %159, %241
+ %243 = fadd contract float %239, %242
+ %244 = getelementptr inbounds float, ptr addrspace(1) %184, i64 192
+ %245 = load float, ptr addrspace(1) %244, align 4, !tbaa !13, !amdgpu.noclobber !5
+ %246 = fmul contract float %163, %245
+ %247 = fadd contract float %243, %246
+ br label %Flow85, !amdgpu.uniform !5
+
+ Flow85: ; preds = %231, %211
+ %248 = phi float [ %247, %231 ], [ %.sroa.56.0338.i, %211 ]
+ call void @llvm.amdgcn.end.cf.i64(i64 %230)
+ br label %Flow86, !amdgpu.uniform !5
+
+ Flow86: ; preds = %Flow85, %191
+ %249 = phi float [ %227, %Flow85 ], [ %.sroa.38.0339.i, %191 ]
+ %250 = phi float [ %248, %Flow85 ], [ %.sroa.56.0338.i, %191 ]
+ call void @llvm.amdgcn.end.cf.i64(i64 %210)
+ br label %Flow87, !amdgpu.uniform !5
+
+ Flow87: ; preds = %Flow86, %150
+ %251 = phi float [ %207, %Flow86 ], [ %.sroa.20.0340.i, %150 ]
+ %252 = phi float [ %249, %Flow86 ], [ %.sroa.38.0339.i, %150 ]
+ %253 = phi float [ %250, %Flow86 ], [ %.sroa.56.0338.i, %150 ]
+ call void @llvm.amdgcn.end.cf.i64(i64 %190)
+ br label %254, !amdgpu.uniform !5
+
+ 254: ; preds = %146, %Flow87
+ %.sroa.56.1.i = phi float [ %.sroa.56.0338.i, %146 ], [ %253, %Flow87 ]
+ %.sroa.38.1.i = phi float [ %.sroa.38.0339.i, %146 ], [ %252, %Flow87 ]
+ %.sroa.20.1.i = phi float [ %.sroa.20.0340.i, %146 ], [ %251, %Flow87 ]
+ %.sroa.095.1.i = phi float [ %.sroa.095.0341.i, %146 ], [ %187, %Flow87 ]
+ call void @llvm.amdgcn.end.cf.i64(i64 %149)
+ %255 = add nuw nsw i32 %.0318342.i, 64
+ %lsr.iv.next = add i32 %lsr.iv, %120
+ %lsr.iv.next32 = add i32 %lsr.iv31, %128
+ %scevgep34 = getelementptr i8, ptr addrspace(1) %lsr.iv33, i64 %140
+ %256 = icmp sge i32 %255, %106
+ %257 = call i64 @llvm.amdgcn.if.break.i64(i1 %256, i64 %phi.broken)
+ %258 = call i1 @llvm.amdgcn.loop.i64(i64 %257)
+ br i1 %258, label %Flow88, label %146
+
+ Flow88: ; preds = %254
+ %.sroa.56.1.i.lcssa = phi float [ %.sroa.56.1.i, %254 ]
+ %.sroa.38.1.i.lcssa = phi float [ %.sroa.38.1.i, %254 ]
+ %.sroa.20.1.i.lcssa = phi float [ %.sroa.20.1.i, %254 ]
+ %.sroa.095.1.i.lcssa = phi float [ %.sroa.095.1.i, %254 ]
+ %.lcssa98 = phi i32 [ %255, %254 ]
+ %.lcssa = phi i64 [ %257, %254 ]
+ call void @llvm.amdgcn.end.cf.i64(i64 %.lcssa)
+ br label %Flow89, !amdgpu.uniform !5
+
+ Flow93: ; preds = %Flow94, %Flow90
+ %259 = phi float [ %498, %Flow94 ], [ %66, %Flow90 ]
+ %260 = phi i64 [ %499, %Flow94 ], [ %67, %Flow90 ]
+ %261 = phi i1 [ %500, %Flow94 ], [ %68, %Flow90 ]
+ %262 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %261)
+ %263 = extractvalue { i1, i64 } %262, 0
+ %264 = extractvalue { i1, i64 } %262, 1
+ br i1 %263, label %.sink.split.i, label %Flow96
+
+ ._crit_edge.i: ; preds = %Flow89
+ %265 = icmp sgt i32 %104, 0
+ br i1 %265, label %266, label %Flow84, !amdgpu.uniform !5
+
+ 266: ; preds = %._crit_edge.i
+ %267 = icmp slt i32 %141, %.load3667
+ %268 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %267)
+ %269 = extractvalue { i1, i64 } %268, 0
+ %270 = extractvalue { i1, i64 } %268, 1
+ br i1 %269, label %271, label %316
+
+ 271: ; preds = %266
+ %272 = mul nsw i32 %141, %.load52
+ %273 = sext i32 %272 to i64
+ %274 = getelementptr inbounds float, ptr addrspace(1) %101, i64 %273
+ %275 = load float, ptr addrspace(1) %274, align 4, !tbaa !13, !amdgpu.noclobber !5
+ %276 = or i32 %141, 1
+ %277 = icmp slt i32 %276, %.load3667
+ %278 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %277)
+ %279 = extractvalue { i1, i64 } %278, 0
+ %280 = extractvalue { i1, i64 } %278, 1
+ br i1 %279, label %281, label %Flow83
+
+ 281: ; preds = %271
+ %282 = mul nsw i32 %276, %.load52
+ %283 = sext i32 %282 to i64
+ %284 = getelementptr inbounds float, ptr addrspace(1) %101, i64 %283
+ %285 = load float, ptr addrspace(1) %284, align 4, !tbaa !13, !amdgpu.noclobber !5
+ %286 = or i32 %141, 2
+ %287 = icmp slt i32 %286, %.load3667
+ %288 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %287)
+ %289 = extractvalue { i1, i64 } %288, 0
+ %290 = extractvalue { i1, i64 } %288, 1
+ br i1 %289, label %291, label %Flow82
+
+ 291: ; preds = %281
+ %292 = mul nsw i32 %286, %.load52
+ %293 = sext i32 %292 to i64
+ %294 = getelementptr inbounds float, ptr addrspace(1) %101, i64 %293
+ %295 = load float, ptr addrspace(1) %294, align 4, !tbaa !13, !amdgpu.noclobber !5
+ %296 = or i32 %141, 3
+ %297 = icmp slt i32 %296, %.load3667
+ %298 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %297)
+ %299 = extractvalue { i1, i64 } %298, 0
+ %300 = extractvalue { i1, i64 } %298, 1
+ br i1 %299, label %301, label %Flow81
+
+ 301: ; preds = %291
+ %302 = mul nsw i32 %296, %.load52
+ %303 = sext i32 %302 to i64
+ %304 = getelementptr inbounds float, ptr addrspace(1) %101, i64 %303
+ %305 = load float, ptr addrspace(1) %304, align 4, !tbaa !13, !amdgpu.noclobber !5
+ br label %Flow81, !amdgpu.uniform !5
+
+ Flow81: ; preds = %301, %291
+ %306 = phi float [ %305, %301 ], [ 0.000000e+00, %291 ]
+ call void @llvm.amdgcn.end.cf.i64(i64 %300)
+ br label %Flow82, !amdgpu.uniform !5
+
+ Flow82: ; preds = %Flow81, %281
+ %307 = phi float [ %295, %Flow81 ], [ 0.000000e+00, %281 ]
+ %308 = phi float [ %306, %Flow81 ], [ 0.000000e+00, %281 ]
+ call void @llvm.amdgcn.end.cf.i64(i64 %290)
+ br label %Flow83, !amdgpu.uniform !5
+
+ Flow83: ; preds = %Flow82, %271
+ %309 = phi float [ %285, %Flow82 ], [ 0.000000e+00, %271 ]
+ %310 = phi float [ %307, %Flow82 ], [ 0.000000e+00, %271 ]
+ %311 = phi float [ %308, %Flow82 ], [ 0.000000e+00, %271 ]
+ call void @llvm.amdgcn.end.cf.i64(i64 %280)
+ br label %316, !amdgpu.uniform !5
+
+ Flow84: ; preds = %Flow80, %._crit_edge.i
+ %312 = phi float [ %432, %Flow80 ], [ %142, %._crit_edge.i ]
+ %313 = phi float [ %433, %Flow80 ], [ %143, %._crit_edge.i ]
+ %314 = phi float [ %434, %Flow80 ], [ %144, %._crit_edge.i ]
+ %315 = phi float [ %435, %Flow80 ], [ %145, %._crit_edge.i ]
+ br label %436, !amdgpu.uniform !5
+
+ 316: ; preds = %266, %Flow83
+ %.sroa.35.0.i = phi float [ 0.000000e+00, %266 ], [ %311, %Flow83 ]
+ %.sroa.24.0.i = phi float [ 0.000000e+00, %266 ], [ %310, %Flow83 ]
+ %.sroa.13.0.i = phi float [ 0.000000e+00, %266 ], [ %309, %Flow83 ]
+ %.sroa.0.0.i = phi float [ 0.000000e+00, %266 ], [ %275, %Flow83 ]
+ call void @llvm.amdgcn.end.cf.i64(i64 %270)
+ %317 = icmp slt i32 %103, %.load66
+ %318 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %317)
+ %319 = extractvalue { i1, i64 } %318, 0
+ %320 = extractvalue { i1, i64 } %318, 1
+ br i1 %319, label %321, label %Flow80
+
+ 321: ; preds = %316
+ %322 = mul nsw i32 %141, %.load44
+ %323 = select i1 %267, i32 %322, i32 0
+ %324 = add nsw i32 %323, %103
+ %325 = sext i32 %324 to i64
+ %326 = getelementptr inbounds float, ptr addrspace(1) %98, i64 %325
+ %327 = load float, ptr addrspace(1) %326, align 4, !tbaa !13, !amdgpu.noclobber !5
+ %328 = fmul contract float %.sroa.0.0.i, %327
+ %329 = fadd contract float %142, %328
+ %330 = or i32 %141, 1
+ %331 = mul nsw i32 %330, %.load44
+ %332 = icmp slt i32 %330, %.load3667
+ %333 = select i1 %332, i32 %331, i32 0
+ %334 = add nsw i32 %333, %103
+ %335 = sext i32 %334 to i64
+ %336 = getelementptr inbounds float, ptr addrspace(1) %98, i64 %335
+ %337 = load float, ptr addrspace(1) %336, align 4, !tbaa !13, !amdgpu.noclobber !5
+ %338 = fmul contract float %.sroa.13.0.i, %337
+ %339 = fadd contract float %329, %338
+ %340 = or i32 %141, 2
+ %341 = mul nsw i32 %340, %.load44
+ %342 = icmp slt i32 %340, %.load3667
+ %343 = select i1 %342, i32 %341, i32 0
+ %344 = add nsw i32 %343, %103
+ %345 = sext i32 %344 to i64
+ %346 = getelementptr inbounds float, ptr addrspace(1) %98, i64 %345
+ %347 = load float, ptr addrspace(1) %346, align 4, !tbaa !13, !amdgpu.noclobber !5
+ %348 = fmul contract float %.sroa.24.0.i, %347
+ %349 = fadd contract float %339, %348
+ %350 = or i32 %141, 3
+ %351 = mul nsw i32 %350, %.load44
+ %352 = icmp slt i32 %350, %.load3667
+ %353 = select i1 %352, i32 %351, i32 0
+ %354 = add nsw i32 %353, %103
+ %355 = sext i32 %354 to i64
+ %356 = getelementptr inbounds float, ptr addrspace(1) %98, i64 %355
+ %357 = load float, ptr addrspace(1) %356, align 4, !tbaa !13, !amdgpu.noclobber !5
+ %358 = fmul contract float %.sroa.35.0.i, %357
+ %359 = fadd contract float %349, %358
+ %360 = add nsw i32 %103, 64
+ %361 = icmp slt i32 %360, %.load66
+ %362 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %361)
+ %363 = extractvalue { i1, i64 } %362, 0
+ %364 = extractvalue { i1, i64 } %362, 1
+ br i1 %363, label %365, label %Flow79
+
+ 365: ; preds = %321
+ %366 = getelementptr inbounds float, ptr addrspace(1) %326, i64 64
+ %367 = load float, ptr addrspace(1) %366, align 4, !tbaa !13, !amdgpu.noclobber !5
+ %368 = fmul contract float %.sroa.0.0.i, %367
+ %369 = fadd contract float %143, %368
+ %370 = getelementptr inbounds float, ptr addrspace(1) %336, i64 64
+ %371 = load float, ptr addrspace(1) %370, align 4, !tbaa !13, !amdgpu.noclobber !5
+ %372 = fmul contract float %.sroa.13.0.i, %371
+ %373 = fadd contract float %369, %372
+ %374 = getelementptr inbounds float, ptr addrspace(1) %346, i64 64
+ %375 = load float, ptr addrspace(1) %374, align 4, !tbaa !13, !amdgpu.noclobber !5
+ %376 = fmul contract float %.sroa.24.0.i, %375
+ %377 = fadd contract float %373, %376
+ %378 = getelementptr inbounds float, ptr addrspace(1) %356, i64 64
+ %379 = load float, ptr addrspace(1) %378, align 4, !tbaa !13, !amdgpu.noclobber !5
+ %380 = fmul contract float %.sroa.35.0.i, %379
+ %381 = fadd contract float %377, %380
+ %382 = add nsw i32 %103, 128
+ %383 = icmp slt i32 %382, %.load66
+ %384 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %383)
+ %385 = extractvalue { i1, i64 } %384, 0
+ %386 = extractvalue { i1, i64 } %384, 1
+ br i1 %385, label %387, label %Flow78
+
+ 387: ; preds = %365
+ %388 = getelementptr inbounds float, ptr addrspace(1) %326, i64 128
+ %389 = load float, ptr addrspace(1) %388, align 4, !tbaa !13, !amdgpu.noclobber !5
+ %390 = fmul contract float %.sroa.0.0.i, %389
+ %391 = fadd contract float %144, %390
+ %392 = getelementptr inbounds float, ptr addrspace(1) %336, i64 128
+ %393 = load float, ptr addrspace(1) %392, align 4, !tbaa !13, !amdgpu.noclobber !5
+ %394 = fmul contract float %.sroa.13.0.i, %393
+ %395 = fadd contract float %391, %394
+ %396 = getelementptr inbounds float, ptr addrspace(1) %346, i64 128
+ %397 = load float, ptr addrspace(1) %396, align 4, !tbaa !13, !amdgpu.noclobber !5
+ %398 = fmul contract float %.sroa.24.0.i, %397
+ %399 = fadd contract float %395, %398
+ %400 = getelementptr inbounds float, ptr addrspace(1) %356, i64 128
+ %401 = load float, ptr addrspace(1) %400, align 4, !tbaa !13, !amdgpu.noclobber !5
+ %402 = fmul contract float %.sroa.35.0.i, %401
+ %403 = fadd contract float %399, %402
+ %404 = add nsw i32 %103, 192
+ %405 = icmp slt i32 %404, %.load66
+ %406 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %405)
+ %407 = extractvalue { i1, i64 } %406, 0
+ %408 = extractvalue { i1, i64 } %406, 1
+ br i1 %407, label %409, label %Flow77
+
+ 409: ; preds = %387
+ %410 = getelementptr inbounds float, ptr addrspace(1) %326, i64 192
+ %411 = load float, ptr addrspace(1) %410, align 4, !tbaa !13, !amdgpu.noclobber !5
+ %412 = fmul contract float %.sroa.0.0.i, %411
+ %413 = fadd contract float %145, %412
+ %414 = getelementptr inbounds float, ptr addrspace(1) %336, i64 192
+ %415 = load float, ptr addrspace(1) %414, align 4, !tbaa !13, !amdgpu.noclobber !5
+ %416 = fmul contract float %.sroa.13.0.i, %415
+ %417 = fadd contract float %413, %416
+ %418 = getelementptr inbounds float, ptr addrspace(1) %346, i64 192
+ %419 = load float, ptr addrspace(1) %418, align 4, !tbaa !13, !amdgpu.noclobber !5
+ %420 = fmul contract float %.sroa.24.0.i, %419
+ %421 = fadd contract float %417, %420
+ %422 = getelementptr inbounds float, ptr addrspace(1) %356, i64 192
+ %423 = load float, ptr addrspace(1) %422, align 4, !tbaa !13, !amdgpu.noclobber !5
+ %424 = fmul contract float %.sroa.35.0.i, %423
+ %425 = fadd contract float %421, %424
+ br label %Flow77, !amdgpu.uniform !5
+
+ Flow77: ; preds = %409, %387
+ %426 = phi float [ %425, %409 ], [ %145, %387 ]
+ call void @llvm.amdgcn.end.cf.i64(i64 %408)
+ br label %Flow78, !amdgpu.uniform !5
+
+ Flow78: ; preds = %Flow77, %365
+ %427 = phi float [ %403, %Flow77 ], [ %144, %365 ]
+ %428 = phi float [ %426, %Flow77 ], [ %145, %365 ]
+ call void @llvm.amdgcn.end.cf.i64(i64 %386)
+ br label %Flow79, !amdgpu.uniform !5
+
+ Flow79: ; preds = %Flow78, %321
+ %429 = phi float [ %381, %Flow78 ], [ %143, %321 ]
+ %430 = phi float [ %427, %Flow78 ], [ %144, %321 ]
+ %431 = phi float [ %428, %Flow78 ], [ %145, %321 ]
+ call void @llvm.amdgcn.end.cf.i64(i64 %364)
+ br label %Flow80, !amdgpu.uniform !5
+
+ Flow80: ; preds = %Flow79, %316
+ %432 = phi float [ %359, %Flow79 ], [ %142, %316 ]
+ %433 = phi float [ %429, %Flow79 ], [ %143, %316 ]
+ %434 = phi float [ %430, %Flow79 ], [ %144, %316 ]
+ %435 = phi float [ %431, %Flow79 ], [ %145, %316 ]
+ call void @llvm.amdgcn.end.cf.i64(i64 %320)
+ br label %Flow84, !amdgpu.uniform !5
+
+ 436: ; preds = %Flow84
+ %437 = shl nuw nsw i32 %57, 8
+ %438 = add nuw nsw i32 %437, %56
+ %439 = getelementptr inbounds [4096 x float], ptr addrspace(3) @llvm.amdgcn.kernel._ZL20rocblas_gemvn_kernelILi64ELi16EiffffEviiT3_lPKT4_lT1_lS3_lilS0_lPT5_lil.lds, i32 0, i32 %438
+ store float %312, ptr addrspace(3) %439, align 4, !tbaa !13
+ %440 = getelementptr inbounds float, ptr addrspace(3) %439, i32 64
+ store float %313, ptr addrspace(3) %440, align 4, !tbaa !13
+ %441 = getelementptr inbounds float, ptr addrspace(3) %439, i32 128
+ store float %314, ptr addrspace(3) %441, align 4, !tbaa !13
+ %442 = getelementptr inbounds float, ptr addrspace(3) %439, i32 192
+ store float %315, ptr addrspace(3) %442, align 4, !tbaa !13
+ fence syncscope("workgroup") release
+ tail call void @llvm.amdgcn.s.barrier()
+ fence syncscope("workgroup") acquire
+ %443 = icmp ult i32 %59, 256
+ %444 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %443)
+ %445 = extractvalue { i1, i64 } %444, 0
+ %446 = extractvalue { i1, i64 } %444, 1
+ br i1 %445, label %.preheader.i, label %Flow94
+
+ .preheader.i: ; preds = %436
+ %447 = getelementptr inbounds [4096 x float], ptr addrspace(3) @llvm.amdgcn.kernel._ZL20rocblas_gemvn_kernelILi64ELi16EiffffEviiT3_lPKT4_lT1_lS3_lilS0_lPT5_lil.lds, i32 0, i32 %59
+ %.promoted.i = load float, ptr addrspace(3) %447, align 4, !tbaa !13
+ %448 = getelementptr inbounds float, ptr addrspace(3) %447, i32 256
+ %449 = load float, ptr addrspace(3) %448, align 4, !tbaa !13
+ %450 = fadd contract float %.promoted.i, %449
+ %451 = getelementptr inbounds float, ptr addrspace(3) %447, i32 512
+ %452 = load float, ptr addrspace(3) %451, align 4, !tbaa !13
+ %453 = fadd contract float %452, %450
+ %454 = getelementptr inbounds float, ptr addrspace(3) %447, i32 768
+ %455 = load float, ptr addrspace(3) %454, align 4, !tbaa !13
+ %456 = fadd contract float %455, %453
+ %457 = getelementptr inbounds float, ptr addrspace(3) %447, i32 1024
+ %458 = load float, ptr addrspace(3) %457, align 4, !tbaa !13
+ %459 = fadd contract float %458, %456
+ %460 = getelementptr inbounds float, ptr addrspace(3) %447, i32 1280
+ %461 = load float, ptr addrspace(3) %460, align 4, !tbaa !13
+ %462 = fadd contract float %461, %459
+ %463 = getelementptr inbounds float, ptr addrspace(3) %447, i32 1536
+ %464 = load float, ptr addrspace(3) %463, align 4, !tbaa !13
+ %465 = fadd contract float %464, %462
+ %466 = getelementptr inbounds float, ptr addrspace(3) %447, i32 1792
+ %467 = load float, ptr addrspace(3) %466, align 4, !tbaa !13
+ %468 = fadd contract float %467, %465
+ %469 = getelementptr inbounds float, ptr addrspace(3) %447, i32 2048
+ %470 = load float, ptr addrspace(3) %469, align 4, !tbaa !13
+ %471 = fadd contract float %470, %468
+ %472 = getelementptr inbounds float, ptr addrspace(3) %447, i32 2304
+ %473 = load float, ptr addrspace(3) %472, align 4, !tbaa !13
+ %474 = fadd contract float %473, %471
+ %475 = getelementptr inbounds float, ptr addrspace(3) %447, i32 2560
+ %476 = load float, ptr addrspace(3) %475, align 4, !tbaa !13
+ %477 = fadd contract float %476, %474
+ %478 = getelementptr inbounds float, ptr addrspace(3) %447, i32 2816
+ %479 = load float, ptr addrspace(3) %478, align 4, !tbaa !13
+ %480 = fadd contract float %479, %477
+ %481 = getelementptr inbounds float, ptr addrspace(3) %447, i32 3072
+ %482 = load float, ptr addrspace(3) %481, align 4, !tbaa !13
+ %483 = fadd contract float %482, %480
+ %484 = getelementptr inbounds float, ptr addrspace(3) %447, i32 3328
+ %485 = load float, ptr addrspace(3) %484, align 4, !tbaa !13
+ %486 = fadd contract float %485, %483
+ %487 = getelementptr inbounds float, ptr addrspace(3) %447, i32 3584
+ %488 = load float, ptr addrspace(3) %487, align 4, !tbaa !13
+ %489 = fadd contract float %488, %486
+ %490 = getelementptr inbounds float, ptr addrspace(3) %447, i32 3840
+ %491 = load float, ptr addrspace(3) %490, align 4, !tbaa !13
+ %492 = fadd contract float %491, %489
+ store float %492, ptr addrspace(3) %447, align 4, !tbaa !13
+ %493 = add i32 %59, %102
+ %494 = icmp slt i32 %493, %.load66
+ %495 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %494)
+ %496 = extractvalue { i1, i64 } %495, 0
+ %497 = extractvalue { i1, i64 } %495, 1
+ br i1 %496, label %501, label %Flow95
+
+ Flow94: ; preds = %Flow95, %436
+ %498 = phi float [ %513, %Flow95 ], [ undef, %436 ]
+ %499 = phi i64 [ %514, %Flow95 ], [ undef, %436 ]
+ %500 = phi i1 [ %515, %Flow95 ], [ %68, %436 ]
+ call void @llvm.amdgcn.end.cf.i64(i64 %446)
+ br label %Flow93, !amdgpu.uniform !5
+
+ 501: ; preds = %.preheader.i
+ %502 = fcmp contract une float %.load56, 0.000000e+00
+ %503 = fmul contract float %492, %46
+ %504 = mul nsw i32 %493, %.load62
+ %505 = sext i32 %504 to i64
+ br i1 %502, label %506, label %Flow76, !amdgpu.uniform !5
+
+ 506: ; preds = %501
+ %507 = getelementptr inbounds float, ptr addrspace(1) %55, i64 %505
+ %508 = load float, ptr addrspace(1) %507, align 4, !tbaa !13, !amdgpu.noclobber !5
+ %509 = fmul contract float %508, %.load56
+ %510 = fadd contract float %503, %509
+ br label %Flow76, !amdgpu.uniform !5
+
+ Flow: ; preds = %84, %80
+ %511 = phi float [ %87, %84 ], [ 0.000000e+00, %80 ]
+ br label %Flow92, !amdgpu.uniform !5
+
+ Flow76: ; preds = %506, %501
+ %512 = phi float [ %510, %506 ], [ %503, %501 ]
+ br label %Flow95, !amdgpu.uniform !5
+
+ Flow95: ; preds = %Flow76, %.preheader.i
+ %513 = phi float [ %512, %Flow76 ], [ undef, %.preheader.i ]
+ %514 = phi i64 [ %505, %Flow76 ], [ undef, %.preheader.i ]
+ %515 = phi i1 [ true, %Flow76 ], [ %68, %.preheader.i ]
+ call void @llvm.amdgcn.end.cf.i64(i64 %497)
+ br label %Flow94, !amdgpu.uniform !5
+
+ .sink.split.i: ; preds = %Flow93
+ %516 = getelementptr inbounds float, ptr addrspace(1) %55, i64 %260
+ store float %259, ptr addrspace(1) %516, align 4, !tbaa !13
+ br label %Flow96, !amdgpu.uniform !5
+
+ Flow96: ; preds = %.sink.split.i, %Flow93
+ call void @llvm.amdgcn.end.cf.i64(i64 %264)
+ br label %Flow97, !amdgpu.uniform !5
+
+ Flow97: ; preds = %Flow96, %44
+ br label %_Z25rocblas_gemvn_kernel_calcILi64ELi16EiffLi0EEviiT3_PKT2_T1_S3_iS0_PS1_i.exit, !amdgpu.uniform !5
+
+ _Z25rocblas_gemvn_kernel_calcILi64ELi16EiffLi0EEviiT3_PKT2_T1_S3_iS0_PS1_i.exit: ; preds = %18, %Flow97
+ ret void
+ }
+
+ declare align 4 ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #1
+
+ declare void @llvm.amdgcn.s.barrier() #2
+
+ declare i32 @llvm.amdgcn.workgroup.id.x() #1
+
+ declare i32 @llvm.amdgcn.workgroup.id.y() #1
+
+ declare i32 @llvm.amdgcn.workgroup.id.z() #1
+
+ declare i32 @llvm.amdgcn.workitem.id.x() #1
+
+ declare i32 @llvm.amdgcn.workitem.id.y() #1
+
+ declare i32 @llvm.amdgcn.mul.u24(i32, i32) #3
+
+ declare align 4 ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() #3
+
+ declare { i1, i64 } @llvm.amdgcn.if.i64(i1) #4
+
+ declare { i1, i64 } @llvm.amdgcn.else.i64.i64(i64) #4
+
+ declare i64 @llvm.amdgcn.if.break.i64(i1, i64) #5
+
+ declare i1 @llvm.amdgcn.loop.i64(i64) #4
+
+ declare void @llvm.amdgcn.end.cf.i64(i64) #4
+
+ attributes #0 = { nofree nounwind "amdgpu-lds-size"="16384" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "amdgpu-wave-limiter"="true" "uniform-work-group-size"="false" }
+ attributes #1 = { mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+ attributes #2 = { convergent mustprogress nocallback nofree nounwind willreturn }
+ attributes #3 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+ attributes #4 = { convergent nocallback nofree nounwind willreturn }
+ attributes #5 = { convergent nocallback nofree nounwind willreturn memory(none) }
+
+ !llvm.module.flags = !{!1, !2}
+ !opencl.ocl.version = !{!3}
+ !llvm.ident = !{!4}
+
+ !0 = !{i32 0, i32 1}
+ !1 = !{i32 1, !"wchar_size", i32 4}
+ !2 = !{i32 8, !"PIC Level", i32 1}
+ !3 = !{i32 2, i32 0}
+ !4 = !{!"AMD clang version 16.0.0 (https://github.com/RadeonOpenCompute/llvm-project roc-5.6.0 23223 3403c07804e79cd94b1efdf9f8d6cd45ac127382)"}
+ !5 = !{}
+ !6 = !{!7, !7, i64 0}
+ !7 = !{!"int", !8, i64 0}
+ !8 = !{!"omnipotent char", !9, i64 0}
+ !9 = !{!"Simple C/C++ TBAA"}
+ !10 = !{!11, !11, i64 0}
+ !11 = !{!"short", !8, i64 0}
+ !12 = !{i32 0, i32 1024}
+ !13 = !{!14, !14, i64 0}
+ !14 = !{!"float", !15, i64 0}
+ !15 = !{!"omnipotent char", !16, i64 0}
+ !16 = !{!"Simple C++ TBAA"}
+
+...
+---
+name: _ZL20rocblas_gemvn_kernelILi64ELi16EiffffEviiT3_lPKT4_lT1_lS3_lilS0_lPT5_lil
+alignment: 1
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+failedISel: false
+tracksRegLiveness: true
+hasWinCFI: false
+callsEHReturn: false
+callsUnwindInit: false
+hasEHCatchret: false
+hasEHScopes: false
+hasEHFunclets: false
+isOutlined: false
+debugInstrRef: false
+failsVerification: false
+tracksDebugUserValues: false
+registers:
+ - { id: 0, class: sreg_32, preferred-register: '' }
+ - { id: 1, class: sreg_32, preferred-register: '' }
+ - { id: 2, class: sreg_32, preferred-register: '' }
+ - { id: 3, class: sreg_64, preferred-register: '' }
+ - { id: 4, class: sgpr_96, preferred-register: '' }
+ - { id: 5, class: sgpr_32, preferred-register: '' }
+ - { id: 6, class: sgpr_32, preferred-register: '' }
+ - { id: 7, class: sreg_32, preferred-register: '' }
+ - { id: 8, class: sreg_32, preferred-register: '' }
+ - { id: 9, class: sreg_64, preferred-register: '' }
+ - { id: 10, class: sreg_64, preferred-register: '' }
+ - { id: 11, class: vgpr_32, preferred-register: '' }
+ - { id: 12, class: vgpr_32, preferred-register: '' }
+ - { id: 13, class: vgpr_32, preferred-register: '' }
+ - { id: 14, class: sreg_64, preferred-register: '' }
+ - { id: 15, class: vgpr_32, preferred-register: '' }
+ - { id: 16, class: vreg_64_align2, preferred-register: '' }
+ - { id: 17, class: sreg_64, preferred-register: '' }
+ - { id: 18, class: sreg_64, preferred-register: '' }
+ - { id: 19, class: vgpr_32, preferred-register: '' }
+ - { id: 20, class: sreg_64, preferred-register: '' }
+ - { id: 21, class: vgpr_32, preferred-register: '' }
+ - { id: 22, class: vreg_64_align2, preferred-register: '' }
+ - { id: 23, class: sreg_64, preferred-register: '' }
+ - { id: 24, class: vreg_64_align2, preferred-register: '' }
+ - { id: 25, class: vgpr_32, preferred-register: '' }
+ - { id: 26, class: vgpr_32, preferred-register: '' }
+ - { id: 27, class: vreg_64_align2, preferred-register: '' }
+ - { id: 28, class: sreg_64, preferred-register: '' }
+ - { id: 29, class: sreg_64, preferred-register: '' }
+ - { id: 30, class: sreg_64, preferred-register: '' }
+ - { id: 31, class: sreg_64, preferred-register: '' }
+ - { id: 32, class: sreg_64, preferred-register: '' }
+ - { id: 33, class: sreg_64, preferred-register: '' }
+ - { id: 34, class: sreg_32, preferred-register: '' }
+ - { id: 35, class: sreg_32, preferred-register: '' }
+ - { id: 36, class: sreg_32, preferred-register: '' }
+ - { id: 37, class: sreg_32, preferred-register: '' }
+ - { id: 38, class: vgpr_32, preferred-register: '' }
+ - { id: 39, class: sreg_32, preferred-register: '' }
+ - { id: 40, class: vgpr_32, preferred-register: '' }
+ - { id: 41, class: sreg_32, preferred-register: '' }
+ - { id: 42, class: sreg_64, preferred-register: '' }
+ - { id: 43, class: sreg_64, preferred-register: '' }
+ - { id: 44, class: sreg_64, preferred-register: '' }
+ - { id: 45, class: sreg_64, preferred-register: '' }
+ - { id: 46, class: sreg_64, preferred-register: '' }
+ - { id: 47, class: vgpr_32, preferred-register: '' }
+ - { id: 48, class: sreg_32, preferred-register: '' }
+ - { id: 49, class: vgpr_32, preferred-register: '' }
+ - { id: 50, class: vgpr_32, preferred-register: '' }
+ - { id: 51, class: vgpr_32, preferred-register: '' }
+ - { id: 52, class: vgpr_32, preferred-register: '' }
+ - { id: 53, class: sreg_32, preferred-register: '' }
+ - { id: 54, class: vgpr_32, preferred-register: '' }
+ - { id: 55, class: vgpr_32, preferred-register: '' }
+ - { id: 56, class: vreg_64_align2, preferred-register: '' }
+ - { id: 57, class: sreg_64, preferred-register: '' }
+ - { id: 58, class: vgpr_32, preferred-register: '' }
+ - { id: 59, class: vgpr_32, preferred-register: '' }
+ - { id: 60, class: vgpr_32, preferred-register: '' }
+ - { id: 61, class: vgpr_32, preferred-register: '' }
+ - { id: 62, class: vgpr_32, preferred-register: '' }
+ - { id: 63, class: sreg_64, preferred-register: '' }
+ - { id: 64, class: vreg_64_align2, preferred-register: '' }
+ - { id: 65, class: sreg_32, preferred-register: '' }
+ - { id: 66, class: vgpr_32, preferred-register: '' }
+ - { id: 67, class: vgpr_32, preferred-register: '' }
+ - { id: 68, class: vgpr_32, preferred-register: '' }
+ - { id: 69, class: vgpr_32, preferred-register: '' }
+ - { id: 70, class: vgpr_32, preferred-register: '' }
+ - { id: 71, class: vgpr_32, preferred-register: '' }
+ - { id: 72, class: sreg_64, preferred-register: '' }
+ - { id: 73, class: vgpr_32, preferred-register: '' }
+ - { id: 74, class: vgpr_32, preferred-register: '' }
+ - { id: 75, class: vgpr_32, preferred-register: '' }
+ - { id: 76, class: vgpr_32, preferred-register: '' }
+ - { id: 77, class: vreg_64_align2, preferred-register: '' }
+ - { id: 78, class: vreg_64_align2, preferred-register: '' }
+ - { id: 79, class: vreg_64_align2, preferred-register: '' }
+ - { id: 80, class: vreg_64_align2, preferred-register: '' }
+ - { id: 81, class: vgpr_32, preferred-register: '' }
+ - { id: 82, class: sreg_64, preferred-register: '' }
+ - { id: 83, class: vgpr_32, preferred-register: '' }
+ - { id: 84, class: sreg_64, preferred-register: '' }
+ - { id: 85, class: vgpr_32, preferred-register: '' }
+ - { id: 86, class: sreg_64, preferred-register: '' }
+ - { id: 87, class: vgpr_32, preferred-register: '' }
+ - { id: 88, class: vgpr_32, preferred-register: '' }
+ - { id: 89, class: vgpr_32, preferred-register: '' }
+ - { id: 90, class: vgpr_32, preferred-register: '' }
+ - { id: 91, class: vgpr_32, preferred-register: '' }
+ - { id: 92, class: vgpr_32, preferred-register: '' }
+ - { id: 93, class: vgpr_32, preferred-register: '' }
+ - { id: 94, class: vgpr_32, preferred-register: '' }
+ - { id: 95, class: vgpr_32, preferred-register: '' }
+ - { id: 96, class: vgpr_32, preferred-register: '' }
+ - { id: 97, class: vgpr_32, preferred-register: '' }
+ - { id: 98, class: vgpr_32, preferred-register: '' }
+ - { id: 99, class: vgpr_32, preferred-register: '' }
+ - { id: 100, class: sreg_32, preferred-register: '' }
+ - { id: 101, class: vreg_64_align2, preferred-register: '' }
+ - { id: 102, class: sreg_64, preferred-register: '' }
+ - { id: 103, class: vgpr_32, preferred-register: '' }
+ - { id: 104, class: vgpr_32, preferred-register: '' }
+ - { id: 105, class: vgpr_32, preferred-register: '' }
+ - { id: 106, class: vgpr_32, preferred-register: '' }
+ - { id: 107, class: vgpr_32, preferred-register: '' }
+ - { id: 108, class: sreg_64, preferred-register: '' }
+ - { id: 109, class: vgpr_32, preferred-register: '' }
+ - { id: 110, class: vreg_64_align2, preferred-register: '' }
+ - { id: 111, class: sreg_64, preferred-register: '' }
+ - { id: 112, class: sreg_64, preferred-register: '' }
+ - { id: 113, class: sreg_64_xexec, preferred-register: '' }
+ - { id: 114, class: sreg_64, preferred-register: '' }
+ - { id: 115, class: vgpr_32, preferred-register: '' }
+ - { id: 116, class: vgpr_32, preferred-register: '' }
+ - { id: 117, class: sreg_64, preferred-register: '' }
+ - { id: 118, class: vgpr_32, preferred-register: '' }
+ - { id: 119, class: vgpr_32, preferred-register: '' }
+ - { id: 120, class: sreg_64, preferred-register: '' }
+ - { id: 121, class: vgpr_32, preferred-register: '' }
+ - { id: 122, class: vgpr_32, preferred-register: '' }
+ - { id: 123, class: sreg_64, preferred-register: '' }
+ - { id: 124, class: vgpr_32, preferred-register: '' }
+ - { id: 125, class: vgpr_32, preferred-register: '' }
+ - { id: 126, class: vgpr_32, preferred-register: '' }
+ - { id: 127, class: vgpr_32, preferred-register: '' }
+ - { id: 128, class: vgpr_32, preferred-register: '' }
+ - { id: 129, class: vgpr_32, preferred-register: '' }
+ - { id: 130, class: vgpr_32, preferred-register: '' }
+ - { id: 131, class: vgpr_32, preferred-register: '' }
+ - { id: 132, class: vgpr_32, preferred-register: '' }
+ - { id: 133, class: vgpr_32, preferred-register: '' }
+ - { id: 134, class: vgpr_32, preferred-register: '' }
+ - { id: 135, class: vgpr_32, preferred-register: '' }
+ - { id: 136, class: vgpr_32, preferred-register: '' }
+ - { id: 137, class: vgpr_32, preferred-register: '' }
+ - { id: 138, class: vgpr_32, preferred-register: '' }
+ - { id: 139, class: sreg_64, preferred-register: '' }
+ - { id: 140, class: vreg_64_align2, preferred-register: '' }
+ - { id: 141, class: vreg_64_align2, preferred-register: '' }
+ - { id: 142, class: vreg_64_align2, preferred-register: '' }
+ - { id: 143, class: vreg_64_align2, preferred-register: '' }
+ - { id: 144, class: vgpr_32, preferred-register: '' }
+ - { id: 145, class: sreg_64, preferred-register: '' }
+ - { id: 146, class: vgpr_32, preferred-register: '' }
+ - { id: 147, class: sreg_64, preferred-register: '' }
+ - { id: 148, class: vgpr_32, preferred-register: '' }
+ - { id: 149, class: sreg_64, preferred-register: '' }
+ - { id: 150, class: vgpr_32, preferred-register: '' }
+ - { id: 151, class: vgpr_32, preferred-register: '' }
+ - { id: 152, class: vgpr_32, preferred-register: '' }
+ - { id: 153, class: vgpr_32, preferred-register: '' }
+ - { id: 154, class: vgpr_32, preferred-register: '' }
+ - { id: 155, class: vgpr_32, preferred-register: '' }
+ - { id: 156, class: vgpr_32, preferred-register: '' }
+ - { id: 157, class: vgpr_32, preferred-register: '' }
+ - { id: 158, class: vgpr_32, preferred-register: '' }
+ - { id: 159, class: vgpr_32, preferred-register: '' }
+ - { id: 160, class: vgpr_32, preferred-register: '' }
+ - { id: 161, class: sreg_64, preferred-register: '' }
+ - { id: 162, class: vgpr_32, preferred-register: '' }
+ - { id: 163, class: vgpr_32, preferred-register: '' }
+ - { id: 164, class: sreg_64, preferred-register: '' }
+ - { id: 165, class: vgpr_32, preferred-register: '' }
+ - { id: 166, class: vreg_64_align2, preferred-register: '' }
+ - { id: 167, class: sreg_64, preferred-register: '' }
+ - { id: 168, class: vgpr_32, preferred-register: '' }
+ - { id: 169, class: vreg_64_align2, preferred-register: '' }
+ - { id: 170, class: vgpr_32, preferred-register: '' }
+ - { id: 171, class: vgpr_32, preferred-register: '' }
+ - { id: 172, class: vgpr_32, preferred-register: '' }
+ - { id: 173, class: vgpr_32, preferred-register: '' }
+ - { id: 174, class: vreg_64_align2, preferred-register: '' }
+ - { id: 175, class: sreg_64, preferred-register: '' }
+ - { id: 176, class: vgpr_32, preferred-register: '' }
+ - { id: 177, class: sgpr_128, preferred-register: '' }
+ - { id: 178, class: sgpr_64, preferred-register: '' }
+ - { id: 179, class: sgpr_32, preferred-register: '' }
+ - { id: 180, class: sgpr_32, preferred-register: '' }
+ - { id: 181, class: sgpr_32, preferred-register: '' }
+ - { id: 182, class: sgpr_32, preferred-register: '' }
+ - { id: 183, class: sreg_32, preferred-register: '' }
+ - { id: 184, class: sreg_32, preferred-register: '' }
+ - { id: 185, class: sreg_64, preferred-register: '' }
+ - { id: 186, class: sreg_64, preferred-register: '' }
+ - { id: 187, class: sreg_64_xexec, preferred-register: '' }
+ - { id: 188, class: sreg_32_xm0_xexec, preferred-register: '' }
+ - { id: 189, class: sreg_32, preferred-register: '' }
+ - { id: 190, class: sreg_32, preferred-register: '' }
+ - { id: 191, class: sreg_32, preferred-register: '' }
+ - { id: 192, class: sreg_32, preferred-register: '' }
+ - { id: 193, class: sreg_32, preferred-register: '' }
+ - { id: 194, class: sreg_64, preferred-register: '' }
+ - { id: 195, class: sreg_64, preferred-register: '' }
+ - { id: 196, class: vgpr_32, preferred-register: '' }
+ - { id: 197, class: vgpr_32, preferred-register: '' }
+ - { id: 198, class: sreg_32, preferred-register: '' }
+ - { id: 199, class: sreg_32, preferred-register: '' }
+ - { id: 200, class: sreg_32, preferred-register: '' }
+ - { id: 201, class: sreg_64, preferred-register: '' }
+ - { id: 202, class: sreg_64, preferred-register: '' }
+ - { id: 203, class: vgpr_32, preferred-register: '' }
+ - { id: 204, class: sreg_32, preferred-register: '' }
+ - { id: 205, class: sreg_32, preferred-register: '' }
+ - { id: 206, class: sreg_32, preferred-register: '' }
+ - { id: 207, class: sreg_32, preferred-register: '' }
+ - { id: 208, class: sreg_32, preferred-register: '' }
+ - { id: 209, class: sreg_32, preferred-register: '' }
+ - { id: 210, class: sreg_64, preferred-register: '' }
+ - { id: 211, class: sreg_64, preferred-register: '' }
+ - { id: 212, class: vgpr_32, preferred-register: '' }
+ - { id: 213, class: sreg_32, preferred-register: '' }
+ - { id: 214, class: sreg_32, preferred-register: '' }
+ - { id: 215, class: sreg_32, preferred-register: '' }
+ - { id: 216, class: sgpr_128, preferred-register: '' }
+ - { id: 217, class: sreg_32, preferred-register: '' }
+ - { id: 218, class: sreg_32, preferred-register: '' }
+ - { id: 219, class: sgpr_96, preferred-register: '' }
+ - { id: 220, class: sgpr_32, preferred-register: '' }
+ - { id: 221, class: sgpr_32, preferred-register: '' }
+ - { id: 222, class: sreg_64, preferred-register: '' }
+ - { id: 223, class: vgpr_32, preferred-register: '' }
+ - { id: 224, class: sgpr_32, preferred-register: '' }
+ - { id: 225, class: sreg_64, preferred-register: '' }
+ - { id: 226, class: vgpr_32, preferred-register: '' }
+ - { id: 227, class: sreg_64, preferred-register: '' }
+ - { id: 228, class: sreg_64, preferred-register: '' }
+ - { id: 229, class: sgpr_32, preferred-register: '' }
+ - { id: 230, class: sreg_64, preferred-register: '' }
+ - { id: 231, class: sreg_64, preferred-register: '' }
+ - { id: 232, class: sreg_64, preferred-register: '' }
+ - { id: 233, class: sgpr_128, preferred-register: '' }
+ - { id: 234, class: sreg_32_xm0_xexec, preferred-register: '' }
+ - { id: 235, class: sreg_64_xexec, preferred-register: '' }
+ - { id: 236, class: sreg_32, preferred-register: '' }
+ - { id: 237, class: sreg_32, preferred-register: '' }
+ - { id: 238, class: sreg_64, preferred-register: '' }
+ - { id: 239, class: sreg_32, preferred-register: '' }
+ - { id: 240, class: sreg_32, preferred-register: '' }
+ - { id: 241, class: sreg_64, preferred-register: '' }
+ - { id: 242, class: sreg_32, preferred-register: '' }
+ - { id: 243, class: sreg_32_xm0, preferred-register: '' }
+ - { id: 244, class: sreg_32_xm0, preferred-register: '' }
+ - { id: 245, class: sreg_64, preferred-register: '' }
+ - { id: 246, class: sreg_32, preferred-register: '' }
+ - { id: 247, class: sreg_32, preferred-register: '' }
+ - { id: 248, class: sreg_32, preferred-register: '' }
+ - { id: 249, class: sreg_32, preferred-register: '' }
+ - { id: 250, class: sreg_32, preferred-register: '' }
+ - { id: 251, class: sreg_32, preferred-register: '' }
+ - { id: 252, class: sreg_32, preferred-register: '' }
+ - { id: 253, class: sreg_32, preferred-register: '' }
+ - { id: 254, class: sreg_64, preferred-register: '' }
+ - { id: 255, class: sreg_32, preferred-register: '' }
+ - { id: 256, class: sreg_64, preferred-register: '' }
+ - { id: 257, class: sreg_64, preferred-register: '' }
+ - { id: 258, class: sreg_64, preferred-register: '' }
+ - { id: 259, class: sreg_32, preferred-register: '' }
+ - { id: 260, class: sgpr_32, preferred-register: '' }
+ - { id: 261, class: sreg_64, preferred-register: '' }
+ - { id: 262, class: vgpr_32, preferred-register: '' }
+ - { id: 263, class: sreg_64, preferred-register: '' }
+ - { id: 264, class: sgpr_32, preferred-register: '' }
+ - { id: 265, class: sreg_64, preferred-register: '' }
+ - { id: 266, class: sreg_64, preferred-register: '' }
+ - { id: 267, class: sreg_32, preferred-register: '' }
+ - { id: 268, class: sreg_64, preferred-register: '' }
+ - { id: 269, class: sgpr_32, preferred-register: '' }
+ - { id: 270, class: sreg_64, preferred-register: '' }
+ - { id: 271, class: sreg_64, preferred-register: '' }
+ - { id: 272, class: sreg_32, preferred-register: '' }
+ - { id: 273, class: vgpr_32, preferred-register: '' }
+ - { id: 274, class: sreg_64, preferred-register: '' }
+ - { id: 275, class: sgpr_32, preferred-register: '' }
+ - { id: 276, class: sreg_64, preferred-register: '' }
+ - { id: 277, class: vgpr_32, preferred-register: '' }
+ - { id: 278, class: vgpr_32, preferred-register: '' }
+ - { id: 279, class: vgpr_32, preferred-register: '' }
+ - { id: 280, class: vgpr_32, preferred-register: '' }
+ - { id: 281, class: vreg_64_align2, preferred-register: '' }
+ - { id: 282, class: sreg_64, preferred-register: '' }
+ - { id: 283, class: sreg_32, preferred-register: '' }
+ - { id: 284, class: vreg_64_align2, preferred-register: '' }
+ - { id: 285, class: vreg_64_align2, preferred-register: '' }
+ - { id: 286, class: vgpr_32, preferred-register: '' }
+ - { id: 287, class: sreg_64, preferred-register: '' }
+ - { id: 288, class: sreg_64, preferred-register: '' }
+ - { id: 289, class: sreg_64, preferred-register: '' }
+ - { id: 290, class: sgpr_32, preferred-register: '' }
+ - { id: 291, class: sgpr_128, preferred-register: '' }
+ - { id: 292, class: sreg_32_xm0_xexec, preferred-register: '' }
+ - { id: 293, class: sgpr_128, preferred-register: '' }
+ - { id: 294, class: sreg_64_xexec, preferred-register: '' }
+ - { id: 295, class: sreg_32_xm0_xexec, preferred-register: '' }
+ - { id: 296, class: sreg_64_xexec, preferred-register: '' }
+ - { id: 297, class: sreg_32, preferred-register: '' }
+ - { id: 298, class: sreg_32, preferred-register: '' }
+ - { id: 299, class: sreg_64, preferred-register: '' }
+ - { id: 300, class: sreg_32, preferred-register: '' }
+ - { id: 301, class: sreg_32, preferred-register: '' }
+ - { id: 302, class: sreg_64, preferred-register: '' }
+ - { id: 303, class: sreg_32, preferred-register: '' }
+ - { id: 304, class: sreg_32, preferred-register: '' }
+ - { id: 305, class: sreg_32, preferred-register: '' }
+ - { id: 306, class: sreg_32, preferred-register: '' }
+ - { id: 307, class: sreg_32, preferred-register: '' }
+ - { id: 308, class: sreg_32, preferred-register: '' }
+ - { id: 309, class: sreg_64, preferred-register: '' }
+ - { id: 310, class: sreg_64, preferred-register: '' }
+ - { id: 311, class: sreg_32, preferred-register: '' }
+ - { id: 312, class: sreg_32, preferred-register: '' }
+ - { id: 313, class: sreg_32, preferred-register: '' }
+ - { id: 314, class: sreg_32, preferred-register: '' }
+ - { id: 315, class: sreg_32, preferred-register: '' }
+ - { id: 316, class: sreg_32, preferred-register: '' }
+ - { id: 317, class: sreg_32, preferred-register: '' }
+ - { id: 318, class: sreg_32, preferred-register: '' }
+ - { id: 319, class: sreg_32, preferred-register: '' }
+ - { id: 320, class: sreg_64, preferred-register: '' }
+ - { id: 321, class: sreg_32, preferred-register: '' }
+ - { id: 322, class: sreg_64, preferred-register: '' }
+ - { id: 323, class: sreg_64, preferred-register: '' }
+ - { id: 324, class: sreg_64, preferred-register: '' }
+ - { id: 325, class: sreg_32, preferred-register: '' }
+ - { id: 326, class: sreg_32, preferred-register: '' }
+ - { id: 327, class: sreg_32, preferred-register: '' }
+ - { id: 328, class: sreg_32, preferred-register: '' }
+ - { id: 329, class: sreg_32, preferred-register: '' }
+ - { id: 330, class: sreg_32, preferred-register: '' }
+ - { id: 331, class: sreg_32, preferred-register: '' }
+ - { id: 332, class: sreg_64, preferred-register: '' }
+ - { id: 333, class: sreg_64, preferred-register: '' }
+ - { id: 334, class: sreg_64, preferred-register: '' }
+ - { id: 335, class: sreg_64, preferred-register: '' }
+ - { id: 336, class: sreg_32, preferred-register: '' }
+ - { id: 337, class: sreg_32, preferred-register: '' }
+ - { id: 338, class: sreg_32, preferred-register: '' }
+ - { id: 339, class: sreg_32, preferred-register: '' }
+ - { id: 340, class: sreg_32, preferred-register: '' }
+ - { id: 341, class: sreg_32, preferred-register: '' }
+ - { id: 342, class: sreg_32, preferred-register: '' }
+ - { id: 343, class: sreg_64, preferred-register: '' }
+ - { id: 344, class: sreg_64, preferred-register: '' }
+ - { id: 345, class: sreg_32, preferred-register: '' }
+ - { id: 346, class: sgpr_32, preferred-register: '' }
+ - { id: 347, class: sreg_64, preferred-register: '' }
+ - { id: 348, class: sreg_32, preferred-register: '' }
+ - { id: 349, class: vgpr_32, preferred-register: '' }
+ - { id: 350, class: sreg_64, preferred-register: '' }
+ - { id: 351, class: sreg_32, preferred-register: '' }
+ - { id: 352, class: vgpr_32, preferred-register: '' }
+ - { id: 353, class: sreg_64, preferred-register: '' }
+ - { id: 354, class: sreg_32, preferred-register: '' }
+ - { id: 355, class: vgpr_32, preferred-register: '' }
+ - { id: 356, class: sreg_64, preferred-register: '' }
+ - { id: 357, class: vgpr_32, preferred-register: '' }
+ - { id: 358, class: sreg_32, preferred-register: '' }
+ - { id: 359, class: sreg_32, preferred-register: '' }
+ - { id: 360, class: vgpr_32, preferred-register: '' }
+ - { id: 361, class: sreg_32, preferred-register: '' }
+ - { id: 362, class: vgpr_32, preferred-register: '' }
+ - { id: 363, class: vgpr_32, preferred-register: '' }
+ - { id: 364, class: vgpr_32, preferred-register: '' }
+ - { id: 365, class: sreg_64, preferred-register: '' }
+ - { id: 366, class: sreg_64, preferred-register: '' }
+ - { id: 367, class: sreg_64, preferred-register: '' }
+ - { id: 368, class: vgpr_32, preferred-register: '' }
+ - { id: 369, class: vgpr_32, preferred-register: '' }
+ - { id: 370, class: vgpr_32, preferred-register: '' }
+ - { id: 371, class: vgpr_32, preferred-register: '' }
+ - { id: 372, class: vreg_64_align2, preferred-register: '' }
+ - { id: 373, class: vreg_64_align2, preferred-register: '' }
+ - { id: 374, class: sreg_64, preferred-register: '' }
+ - { id: 375, class: sreg_32_xm0, preferred-register: '' }
+ - { id: 376, class: sreg_32_xm0, preferred-register: '' }
+ - { id: 377, class: sreg_64, preferred-register: '' }
+ - { id: 378, class: sreg_64, preferred-register: '' }
+ - { id: 379, class: vgpr_32, preferred-register: '' }
+ - { id: 380, class: vgpr_32, preferred-register: '' }
+ - { id: 381, class: vgpr_32, preferred-register: '' }
+ - { id: 382, class: vreg_64_align2, preferred-register: '' }
+ - { id: 383, class: sreg_32, preferred-register: '' }
+ - { id: 384, class: vreg_64_align2, preferred-register: '' }
+ - { id: 385, class: vreg_64_align2, preferred-register: '' }
+ - { id: 386, class: vgpr_32, preferred-register: '' }
+ - { id: 387, class: vgpr_32, preferred-register: '' }
+ - { id: 388, class: vgpr_32, preferred-register: '' }
+ - { id: 389, class: vreg_64_align2, preferred-register: '' }
+ - { id: 390, class: vreg_64_align2, preferred-register: '' }
+ - { id: 391, class: vreg_64_align2, preferred-register: '' }
+ - { id: 392, class: vgpr_32, preferred-register: '' }
+ - { id: 393, class: vgpr_32, preferred-register: '' }
+ - { id: 394, class: vgpr_32, preferred-register: '' }
+ - { id: 395, class: vreg_64_align2, preferred-register: '' }
+ - { id: 396, class: vreg_64_align2, preferred-register: '' }
+ - { id: 397, class: vreg_64_align2, preferred-register: '' }
+ - { id: 398, class: vgpr_32, preferred-register: '' }
+ - { id: 399, class: vgpr_32, preferred-register: '' }
+ - { id: 400, class: vgpr_32, preferred-register: '' }
+ - { id: 401, class: vreg_64_align2, preferred-register: '' }
+ - { id: 402, class: vreg_64_align2, preferred-register: '' }
+ - { id: 403, class: vgpr_32, preferred-register: '' }
+ - { id: 404, class: vgpr_32, preferred-register: '' }
+ - { id: 405, class: vgpr_32, preferred-register: '' }
+ - { id: 406, class: vgpr_32, preferred-register: '' }
+ - { id: 407, class: vgpr_32, preferred-register: '' }
+ - { id: 408, class: vreg_64_align2, preferred-register: '' }
+ - { id: 409, class: vreg_64_align2, preferred-register: '' }
+ - { id: 410, class: vgpr_32, preferred-register: '' }
+ - { id: 411, class: vgpr_32, preferred-register: '' }
+ - { id: 412, class: vgpr_32, preferred-register: '' }
+ - { id: 413, class: vgpr_32, preferred-register: '' }
+ - { id: 414, class: vgpr_32, preferred-register: '' }
+ - { id: 415, class: vreg_64_align2, preferred-register: '' }
+ - { id: 416, class: vreg_64_align2, preferred-register: '' }
+ - { id: 417, class: vgpr_32, preferred-register: '' }
+ - { id: 418, class: vgpr_32, preferred-register: '' }
+ - { id: 419, class: vgpr_32, preferred-register: '' }
+ - { id: 420, class: vgpr_32, preferred-register: '' }
+ - { id: 421, class: vgpr_32, preferred-register: '' }
+ - { id: 422, class: vreg_64_align2, preferred-register: '' }
+ - { id: 423, class: vreg_64_align2, preferred-register: '' }
+ - { id: 424, class: vgpr_32, preferred-register: '' }
+ - { id: 425, class: sreg_64, preferred-register: '' }
+ - { id: 426, class: vgpr_32, preferred-register: '' }
+ - { id: 427, class: vgpr_32, preferred-register: '' }
+ - { id: 428, class: vgpr_32, preferred-register: '' }
+ - { id: 429, class: vgpr_32, preferred-register: '' }
+ - { id: 430, class: vgpr_32, preferred-register: '' }
+ - { id: 431, class: vgpr_32, preferred-register: '' }
+ - { id: 432, class: vgpr_32, preferred-register: '' }
+ - { id: 433, class: sreg_64, preferred-register: '' }
+ - { id: 434, class: vgpr_32, preferred-register: '' }
+ - { id: 435, class: vgpr_32, preferred-register: '' }
+ - { id: 436, class: vgpr_32, preferred-register: '' }
+ - { id: 437, class: vgpr_32, preferred-register: '' }
+ - { id: 438, class: vgpr_32, preferred-register: '' }
+ - { id: 439, class: vgpr_32, preferred-register: '' }
+ - { id: 440, class: vgpr_32, preferred-register: '' }
+ - { id: 441, class: sreg_64, preferred-register: '' }
+ - { id: 442, class: vgpr_32, preferred-register: '' }
+ - { id: 443, class: vgpr_32, preferred-register: '' }
+ - { id: 444, class: vgpr_32, preferred-register: '' }
+ - { id: 445, class: vgpr_32, preferred-register: '' }
+ - { id: 446, class: vgpr_32, preferred-register: '' }
+ - { id: 447, class: vgpr_32, preferred-register: '' }
+ - { id: 448, class: vgpr_32, preferred-register: '' }
+ - { id: 449, class: sreg_32, preferred-register: '' }
+ - { id: 450, class: sreg_64, preferred-register: '' }
+ - { id: 451, class: sreg_32, preferred-register: '' }
+ - { id: 452, class: sgpr_32, preferred-register: '' }
+ - { id: 453, class: sreg_64, preferred-register: '' }
+ - { id: 454, class: sgpr_32, preferred-register: '' }
+ - { id: 455, class: vgpr_32, preferred-register: '' }
+ - { id: 456, class: vgpr_32, preferred-register: '' }
+ - { id: 457, class: vgpr_32, preferred-register: '' }
+ - { id: 458, class: vreg_64_align2, preferred-register: '' }
+ - { id: 459, class: sreg_32, preferred-register: '' }
+ - { id: 460, class: vreg_64_align2, preferred-register: '' }
+ - { id: 461, class: vreg_64_align2, preferred-register: '' }
+ - { id: 462, class: sreg_32, preferred-register: '' }
+ - { id: 463, class: sreg_64, preferred-register: '' }
+ - { id: 464, class: sgpr_32, preferred-register: '' }
+ - { id: 465, class: vgpr_32, preferred-register: '' }
+ - { id: 466, class: vgpr_32, preferred-register: '' }
+ - { id: 467, class: vgpr_32, preferred-register: '' }
+ - { id: 468, class: vreg_64_align2, preferred-register: '' }
+ - { id: 469, class: sreg_32, preferred-register: '' }
+ - { id: 470, class: vreg_64_align2, preferred-register: '' }
+ - { id: 471, class: vreg_64_align2, preferred-register: '' }
+ - { id: 472, class: sreg_64, preferred-register: '' }
+ - { id: 473, class: sgpr_32, preferred-register: '' }
+ - { id: 474, class: vgpr_32, preferred-register: '' }
+ - { id: 475, class: vgpr_32, preferred-register: '' }
+ - { id: 476, class: vgpr_32, preferred-register: '' }
+ - { id: 477, class: vreg_64_align2, preferred-register: '' }
+ - { id: 478, class: sreg_32, preferred-register: '' }
+ - { id: 479, class: vreg_64_align2, preferred-register: '' }
+ - { id: 480, class: vreg_64_align2, preferred-register: '' }
+ - { id: 481, class: sreg_32, preferred-register: '' }
+ - { id: 482, class: sreg_64, preferred-register: '' }
+ - { id: 483, class: vgpr_32, preferred-register: '' }
+ - { id: 484, class: vgpr_32, preferred-register: '' }
+ - { id: 485, class: vgpr_32, preferred-register: '' }
+ - { id: 486, class: vreg_64_align2, preferred-register: '' }
+ - { id: 487, class: sreg_32, preferred-register: '' }
+ - { id: 488, class: vreg_64_align2, preferred-register: '' }
+ - { id: 489, class: vreg_64_align2, preferred-register: '' }
+ - { id: 490, class: sreg_64, preferred-register: '' }
+ - { id: 491, class: vgpr_32, preferred-register: '' }
+ - { id: 492, class: sreg_32, preferred-register: '' }
+ - { id: 493, class: vgpr_32, preferred-register: '' }
+ - { id: 494, class: sreg_64_xexec, preferred-register: '' }
+ - { id: 495, class: vgpr_32, preferred-register: '' }
+ - { id: 496, class: vgpr_32, preferred-register: '' }
+ - { id: 497, class: vgpr_32, preferred-register: '' }
+ - { id: 498, class: vgpr_32, preferred-register: '' }
+ - { id: 499, class: vreg_64_align2, preferred-register: '' }
+ - { id: 500, class: sreg_32, preferred-register: '' }
+ - { id: 501, class: vreg_64_align2, preferred-register: '' }
+ - { id: 502, class: vgpr_32, preferred-register: '' }
+ - { id: 503, class: vgpr_32, preferred-register: '' }
+ - { id: 504, class: sreg_32, preferred-register: '' }
+ - { id: 505, class: vgpr_32, preferred-register: '' }
+ - { id: 506, class: vgpr_32, preferred-register: '' }
+ - { id: 507, class: sreg_64_xexec, preferred-register: '' }
+ - { id: 508, class: vgpr_32, preferred-register: '' }
+ - { id: 509, class: vgpr_32, preferred-register: '' }
+ - { id: 510, class: vgpr_32, preferred-register: '' }
+ - { id: 511, class: vgpr_32, preferred-register: '' }
+ - { id: 512, class: vgpr_32, preferred-register: '' }
+ - { id: 513, class: vreg_64_align2, preferred-register: '' }
+ - { id: 514, class: vreg_64_align2, preferred-register: '' }
+ - { id: 515, class: vgpr_32, preferred-register: '' }
+ - { id: 516, class: vgpr_32, preferred-register: '' }
+ - { id: 517, class: vgpr_32, preferred-register: '' }
+ - { id: 518, class: vgpr_32, preferred-register: '' }
+ - { id: 519, class: sreg_64_xexec, preferred-register: '' }
+ - { id: 520, class: vgpr_32, preferred-register: '' }
+ - { id: 521, class: vgpr_32, preferred-register: '' }
+ - { id: 522, class: vgpr_32, preferred-register: '' }
+ - { id: 523, class: vgpr_32, preferred-register: '' }
+ - { id: 524, class: vgpr_32, preferred-register: '' }
+ - { id: 525, class: vreg_64_align2, preferred-register: '' }
+ - { id: 526, class: vreg_64_align2, preferred-register: '' }
+ - { id: 527, class: vgpr_32, preferred-register: '' }
+ - { id: 528, class: vgpr_32, preferred-register: '' }
+ - { id: 529, class: sreg_32, preferred-register: '' }
+ - { id: 530, class: vgpr_32, preferred-register: '' }
+ - { id: 531, class: vgpr_32, preferred-register: '' }
+ - { id: 532, class: sreg_64_xexec, preferred-register: '' }
+ - { id: 533, class: vgpr_32, preferred-register: '' }
+ - { id: 534, class: vgpr_32, preferred-register: '' }
+ - { id: 535, class: vgpr_32, preferred-register: '' }
+ - { id: 536, class: vgpr_32, preferred-register: '' }
+ - { id: 537, class: vgpr_32, preferred-register: '' }
+ - { id: 538, class: vreg_64_align2, preferred-register: '' }
+ - { id: 539, class: vreg_64_align2, preferred-register: '' }
+ - { id: 540, class: vgpr_32, preferred-register: '' }
+ - { id: 541, class: sreg_32, preferred-register: '' }
+ - { id: 542, class: vgpr_32, preferred-register: '' }
+ - { id: 543, class: sreg_64, preferred-register: '' }
+ - { id: 544, class: vgpr_32, preferred-register: '' }
+ - { id: 545, class: vgpr_32, preferred-register: '' }
+ - { id: 546, class: vgpr_32, preferred-register: '' }
+ - { id: 547, class: vgpr_32, preferred-register: '' }
+ - { id: 548, class: vgpr_32, preferred-register: '' }
+ - { id: 549, class: vgpr_32, preferred-register: '' }
+ - { id: 550, class: vgpr_32, preferred-register: '' }
+ - { id: 551, class: sreg_32, preferred-register: '' }
+ - { id: 552, class: vgpr_32, preferred-register: '' }
+ - { id: 553, class: sreg_64, preferred-register: '' }
+ - { id: 554, class: vgpr_32, preferred-register: '' }
+ - { id: 555, class: vgpr_32, preferred-register: '' }
+ - { id: 556, class: vgpr_32, preferred-register: '' }
+ - { id: 557, class: vgpr_32, preferred-register: '' }
+ - { id: 558, class: vgpr_32, preferred-register: '' }
+ - { id: 559, class: vgpr_32, preferred-register: '' }
+ - { id: 560, class: vgpr_32, preferred-register: '' }
+ - { id: 561, class: sreg_32, preferred-register: '' }
+ - { id: 562, class: vgpr_32, preferred-register: '' }
+ - { id: 563, class: sreg_64, preferred-register: '' }
+ - { id: 564, class: vgpr_32, preferred-register: '' }
+ - { id: 565, class: vgpr_32, preferred-register: '' }
+ - { id: 566, class: vgpr_32, preferred-register: '' }
+ - { id: 567, class: vgpr_32, preferred-register: '' }
+ - { id: 568, class: vgpr_32, preferred-register: '' }
+ - { id: 569, class: vgpr_32, preferred-register: '' }
+ - { id: 570, class: vgpr_32, preferred-register: '' }
+ - { id: 571, class: sgpr_32, preferred-register: '' }
+ - { id: 572, class: sreg_64, preferred-register: '' }
+ - { id: 573, class: sreg_32, preferred-register: '' }
+ - { id: 574, class: vgpr_32, preferred-register: '' }
+ - { id: 575, class: sreg_32, preferred-register: '' }
+ - { id: 576, class: vgpr_32, preferred-register: '' }
+ - { id: 577, class: sreg_32, preferred-register: '' }
+ - { id: 578, class: sreg_64, preferred-register: '' }
+ - { id: 579, class: sgpr_32, preferred-register: '' }
+ - { id: 580, class: sreg_64, preferred-register: '' }
+ - { id: 581, class: sreg_32, preferred-register: '' }
+ - { id: 582, class: vgpr_32, preferred-register: '' }
+ - { id: 583, class: vgpr_32, preferred-register: '' }
+ - { id: 584, class: vgpr_32, preferred-register: '' }
+ - { id: 585, class: vgpr_32, preferred-register: '' }
+ - { id: 586, class: vgpr_32, preferred-register: '' }
+ - { id: 587, class: vgpr_32, preferred-register: '' }
+ - { id: 588, class: vgpr_32, preferred-register: '' }
+ - { id: 589, class: vgpr_32, preferred-register: '' }
+ - { id: 590, class: vgpr_32, preferred-register: '' }
+ - { id: 591, class: vgpr_32, preferred-register: '' }
+ - { id: 592, class: vgpr_32, preferred-register: '' }
+ - { id: 593, class: vgpr_32, preferred-register: '' }
+ - { id: 594, class: vgpr_32, preferred-register: '' }
+ - { id: 595, class: vgpr_32, preferred-register: '' }
+ - { id: 596, class: vgpr_32, preferred-register: '' }
+ - { id: 597, class: vgpr_32, preferred-register: '' }
+ - { id: 598, class: vgpr_32, preferred-register: '' }
+ - { id: 599, class: vgpr_32, preferred-register: '' }
+ - { id: 600, class: vgpr_32, preferred-register: '' }
+ - { id: 601, class: vgpr_32, preferred-register: '' }
+ - { id: 602, class: vgpr_32, preferred-register: '' }
+ - { id: 603, class: vgpr_32, preferred-register: '' }
+ - { id: 604, class: vgpr_32, preferred-register: '' }
+ - { id: 605, class: vgpr_32, preferred-register: '' }
+ - { id: 606, class: vgpr_32, preferred-register: '' }
+ - { id: 607, class: vgpr_32, preferred-register: '' }
+ - { id: 608, class: vgpr_32, preferred-register: '' }
+ - { id: 609, class: vgpr_32, preferred-register: '' }
+ - { id: 610, class: vgpr_32, preferred-register: '' }
+ - { id: 611, class: vgpr_32, preferred-register: '' }
+ - { id: 612, class: vgpr_32, preferred-register: '' }
+ - { id: 613, class: sreg_64, preferred-register: '' }
+ - { id: 614, class: sgpr_32, preferred-register: '' }
+ - { id: 615, class: sreg_64, preferred-register: '' }
+ - { id: 616, class: vgpr_32, preferred-register: '' }
+ - { id: 617, class: vgpr_32, preferred-register: '' }
+ - { id: 618, class: vgpr_32, preferred-register: '' }
+ - { id: 619, class: vgpr_32, preferred-register: '' }
+ - { id: 620, class: vreg_64_align2, preferred-register: '' }
+ - { id: 621, class: sreg_64, preferred-register: '' }
+ - { id: 622, class: sreg_32, preferred-register: '' }
+ - { id: 623, class: vreg_64_align2, preferred-register: '' }
+ - { id: 624, class: vreg_64_align2, preferred-register: '' }
+ - { id: 625, class: vgpr_32, preferred-register: '' }
+ - { id: 626, class: sreg_64, preferred-register: '' }
+ - { id: 627, class: sreg_64, preferred-register: '' }
+ - { id: 628, class: sreg_32, preferred-register: '' }
+ - { id: 629, class: vreg_64_align2, preferred-register: '' }
+ - { id: 630, class: vreg_64_align2, preferred-register: '' }
+ - { id: 631, class: vgpr_32, preferred-register: '' }
+ - { id: 632, class: vgpr_32, preferred-register: '' }
+ - { id: 633, class: sreg_64_xexec, preferred-register: '' }
+ - { id: 634, class: vgpr_32, preferred-register: '' }
+ - { id: 635, class: vreg_64_align2, preferred-register: '' }
+ - { id: 636, class: vreg_1, preferred-register: '' }
+ - { id: 637, class: vgpr_32, preferred-register: '' }
+ - { id: 638, class: vreg_64_align2, preferred-register: '' }
+ - { id: 639, class: vreg_1, preferred-register: '' }
+ - { id: 640, class: vgpr_32, preferred-register: '' }
+ - { id: 641, class: vreg_64_align2, preferred-register: '' }
+ - { id: 642, class: vreg_1, preferred-register: '' }
+ - { id: 643, class: vreg_1, preferred-register: '' }
+ - { id: 644, class: vgpr_32, preferred-register: '' }
+ - { id: 645, class: vgpr_32, preferred-register: '' }
+ - { id: 646, class: vgpr_32, preferred-register: '' }
+ - { id: 647, class: vgpr_32, preferred-register: '' }
+ - { id: 648, class: vgpr_32, preferred-register: '' }
+ - { id: 649, class: vgpr_32, preferred-register: '' }
+ - { id: 650, class: vgpr_32, preferred-register: '' }
+ - { id: 651, class: vgpr_32, preferred-register: '' }
+ - { id: 652, class: vgpr_32, preferred-register: '' }
+ - { id: 653, class: vgpr_32, preferred-register: '' }
+ - { id: 654, class: vgpr_32, preferred-register: '' }
+ - { id: 655, class: vgpr_32, preferred-register: '' }
+ - { id: 656, class: vgpr_32, preferred-register: '' }
+ - { id: 657, class: vgpr_32, preferred-register: '' }
+ - { id: 658, class: vgpr_32, preferred-register: '' }
+ - { id: 659, class: vgpr_32, preferred-register: '' }
+ - { id: 660, class: vgpr_32, preferred-register: '' }
+ - { id: 661, class: vgpr_32, preferred-register: '' }
+ - { id: 662, class: vgpr_32, preferred-register: '' }
+ - { id: 663, class: vreg_64_align2, preferred-register: '' }
+ - { id: 664, class: vgpr_32, preferred-register: '' }
+ - { id: 665, class: vgpr_32, preferred-register: '' }
+ - { id: 666, class: vreg_64_align2, preferred-register: '' }
+ - { id: 667, class: vreg_1, preferred-register: '' }
+ - { id: 668, class: sreg_64, preferred-register: '' }
+ - { id: 669, class: sreg_64, preferred-register: '' }
+ - { id: 670, class: sreg_64, preferred-register: '' }
+ - { id: 671, class: sreg_64, preferred-register: '' }
+ - { id: 672, class: sreg_64, preferred-register: '' }
+ - { id: 673, class: sreg_64, preferred-register: '' }
+ - { id: 674, class: sreg_64, preferred-register: '' }
+ - { id: 675, class: sreg_64, preferred-register: '' }
+ - { id: 676, class: sreg_64, preferred-register: '' }
+ - { id: 677, class: sreg_64, preferred-register: '' }
+ - { id: 678, class: sreg_64, preferred-register: '' }
+ - { id: 679, class: sreg_64, preferred-register: '' }
+ - { id: 680, class: sreg_32, preferred-register: '' }
+ - { id: 681, class: sreg_32, preferred-register: '' }
+ - { id: 682, class: sreg_32, preferred-register: '' }
+ - { id: 683, class: sreg_32, preferred-register: '' }
+ - { id: 684, class: sreg_32, preferred-register: '' }
+ - { id: 685, class: sreg_32, preferred-register: '' }
+ - { id: 686, class: sreg_32, preferred-register: '' }
+ - { id: 687, class: sreg_32, preferred-register: '' }
+ - { id: 688, class: sreg_32, preferred-register: '' }
+ - { id: 689, class: sreg_32, preferred-register: '' }
+ - { id: 690, class: sreg_32, preferred-register: '' }
+ - { id: 691, class: sreg_32, preferred-register: '' }
+ - { id: 692, class: sreg_32, preferred-register: '' }
+ - { id: 693, class: sreg_32, preferred-register: '' }
+ - { id: 694, class: sreg_32, preferred-register: '' }
+ - { id: 695, class: sreg_32, preferred-register: '' }
+ - { id: 696, class: sreg_32, preferred-register: '' }
+ - { id: 697, class: sreg_32, preferred-register: '' }
+ - { id: 698, class: sreg_32, preferred-register: '' }
+ - { id: 699, class: sreg_32, preferred-register: '' }
+ - { id: 700, class: sreg_32, preferred-register: '' }
+ - { id: 701, class: sreg_32, preferred-register: '' }
+ - { id: 702, class: sreg_32, preferred-register: '' }
+ - { id: 703, class: sreg_32, preferred-register: '' }
+ - { id: 704, class: sreg_32, preferred-register: '' }
+ - { id: 705, class: sreg_32, preferred-register: '' }
+ - { id: 706, class: sreg_32, preferred-register: '' }
+ - { id: 707, class: sreg_32, preferred-register: '' }
+ - { id: 708, class: sreg_32, preferred-register: '' }
+ - { id: 709, class: sreg_32, preferred-register: '' }
+ - { id: 710, class: sreg_32, preferred-register: '' }
+ - { id: 711, class: sreg_32, preferred-register: '' }
+ - { id: 712, class: sreg_32, preferred-register: '' }
+ - { id: 713, class: sreg_32, preferred-register: '' }
+ - { id: 714, class: sreg_32, preferred-register: '' }
+ - { id: 715, class: sreg_32, preferred-register: '' }
+ - { id: 716, class: vgpr_32, preferred-register: '' }
+ - { id: 717, class: vgpr_32, preferred-register: '' }
+ - { id: 718, class: sreg_64_xexec, preferred-register: '' }
+ - { id: 719, class: sreg_64_xexec, preferred-register: '' }
+ - { id: 720, class: sreg_32_xexec_hi_and_sreg_32_xm0, preferred-register: '' }
+ - { id: 721, class: vgpr_32, preferred-register: '' }
+ - { id: 722, class: sreg_32_xexec_hi_and_sreg_32_xm0, preferred-register: '' }
+ - { id: 723, class: vgpr_32, preferred-register: '' }
+ - { id: 724, class: vgpr_32, preferred-register: '' }
+ - { id: 725, class: sreg_32, preferred-register: '' }
+ - { id: 726, class: sreg_32, preferred-register: '' }
+ - { id: 727, class: sreg_32, preferred-register: '' }
+ - { id: 728, class: sreg_32, preferred-register: '' }
+ - { id: 729, class: sreg_32, preferred-register: '' }
+ - { id: 730, class: sreg_32, preferred-register: '' }
+ - { id: 731, class: sreg_32, preferred-register: '' }
+ - { id: 732, class: sreg_32, preferred-register: '' }
+ - { id: 733, class: sreg_32, preferred-register: '' }
+ - { id: 734, class: sreg_32, preferred-register: '' }
+ - { id: 735, class: sreg_32, preferred-register: '' }
+ - { id: 736, class: sreg_32, preferred-register: '' }
+ - { id: 737, class: sreg_32, preferred-register: '' }
+ - { id: 738, class: sreg_32, preferred-register: '' }
+ - { id: 739, class: sreg_32, preferred-register: '' }
+ - { id: 740, class: sreg_32, preferred-register: '' }
+ - { id: 741, class: sreg_32, preferred-register: '' }
+ - { id: 742, class: sreg_32, preferred-register: '' }
+ - { id: 743, class: sreg_32, preferred-register: '' }
+ - { id: 744, class: sreg_32, preferred-register: '' }
+ - { id: 745, class: sreg_32, preferred-register: '' }
+ - { id: 746, class: sreg_32, preferred-register: '' }
+ - { id: 747, class: sreg_32, preferred-register: '' }
+ - { id: 748, class: sreg_32, preferred-register: '' }
+ - { id: 749, class: sreg_32, preferred-register: '' }
+ - { id: 750, class: sreg_32, preferred-register: '' }
+ - { id: 751, class: sreg_32, preferred-register: '' }
+ - { id: 752, class: sreg_32, preferred-register: '' }
+ - { id: 753, class: sreg_32, preferred-register: '' }
+ - { id: 754, class: sreg_32, preferred-register: '' }
+ - { id: 755, class: sreg_32, preferred-register: '' }
+ - { id: 756, class: sreg_32, preferred-register: '' }
+ - { id: 757, class: sreg_32, preferred-register: '' }
+ - { id: 758, class: sreg_32, preferred-register: '' }
+ - { id: 759, class: sreg_32, preferred-register: '' }
+ - { id: 760, class: sreg_32, preferred-register: '' }
+ - { id: 761, class: vgpr_32, preferred-register: '' }
+ - { id: 762, class: vgpr_32, preferred-register: '' }
+ - { id: 763, class: sreg_64_xexec, preferred-register: '' }
+ - { id: 764, class: sreg_64_xexec, preferred-register: '' }
+ - { id: 765, class: sreg_32_xexec_hi_and_sreg_32_xm0, preferred-register: '' }
+ - { id: 766, class: vgpr_32, preferred-register: '' }
+ - { id: 767, class: sreg_32_xexec_hi_and_sreg_32_xm0, preferred-register: '' }
+ - { id: 768, class: vgpr_32, preferred-register: '' }
+ - { id: 769, class: vgpr_32, preferred-register: '' }
+ - { id: 770, class: vgpr_32, preferred-register: '' }
+ - { id: 771, class: vgpr_32, preferred-register: '' }
+ - { id: 772, class: sreg_64_xexec, preferred-register: '' }
+ - { id: 773, class: sreg_64_xexec, preferred-register: '' }
+ - { id: 774, class: sreg_32_xexec_hi_and_sreg_32_xm0, preferred-register: '' }
+ - { id: 775, class: vgpr_32, preferred-register: '' }
+ - { id: 776, class: sreg_32_xexec_hi_and_sreg_32_xm0, preferred-register: '' }
+ - { id: 777, class: vgpr_32, preferred-register: '' }
+ - { id: 778, class: vgpr_32, preferred-register: '' }
+ - { id: 779, class: vgpr_32, preferred-register: '' }
+ - { id: 780, class: vgpr_32, preferred-register: '' }
+ - { id: 781, class: sreg_64_xexec, preferred-register: '' }
+ - { id: 782, class: sreg_64_xexec, preferred-register: '' }
+ - { id: 783, class: sreg_32_xexec_hi_and_sreg_32_xm0, preferred-register: '' }
+ - { id: 784, class: vgpr_32, preferred-register: '' }
+ - { id: 785, class: sreg_32_xexec_hi_and_sreg_32_xm0, preferred-register: '' }
+ - { id: 786, class: vgpr_32, preferred-register: '' }
+ - { id: 787, class: vgpr_32, preferred-register: '' }
+ - { id: 788, class: vgpr_32, preferred-register: '' }
+ - { id: 789, class: vgpr_32, preferred-register: '' }
+ - { id: 790, class: sreg_64_xexec, preferred-register: '' }
+ - { id: 791, class: sreg_64_xexec, preferred-register: '' }
+ - { id: 792, class: sreg_32_xexec_hi_and_sreg_32_xm0, preferred-register: '' }
+ - { id: 793, class: vgpr_32, preferred-register: '' }
+ - { id: 794, class: sreg_32_xexec_hi_and_sreg_32_xm0, preferred-register: '' }
+ - { id: 795, class: vgpr_32, preferred-register: '' }
+ - { id: 796, class: vgpr_32, preferred-register: '' }
+ - { id: 797, class: vgpr_32, preferred-register: '' }
+ - { id: 798, class: vgpr_32, preferred-register: '' }
+ - { id: 799, class: sreg_64_xexec, preferred-register: '' }
+ - { id: 800, class: sreg_64_xexec, preferred-register: '' }
+ - { id: 801, class: sreg_32_xexec_hi_and_sreg_32_xm0, preferred-register: '' }
+ - { id: 802, class: vgpr_32, preferred-register: '' }
+ - { id: 803, class: sreg_32_xexec_hi_and_sreg_32_xm0, preferred-register: '' }
+ - { id: 804, class: vgpr_32, preferred-register: '' }
+ - { id: 805, class: vgpr_32, preferred-register: '' }
+ - { id: 806, class: vgpr_32, preferred-register: '' }
+ - { id: 807, class: vgpr_32, preferred-register: '' }
+ - { id: 808, class: sreg_64_xexec, preferred-register: '' }
+ - { id: 809, class: sreg_64_xexec, preferred-register: '' }
+ - { id: 810, class: sreg_32_xexec_hi_and_sreg_32_xm0, preferred-register: '' }
+ - { id: 811, class: vgpr_32, preferred-register: '' }
+ - { id: 812, class: sreg_32_xexec_hi_and_sreg_32_xm0, preferred-register: '' }
+ - { id: 813, class: vgpr_32, preferred-register: '' }
+ - { id: 814, class: vgpr_32, preferred-register: '' }
+ - { id: 815, class: vgpr_32, preferred-register: '' }
+ - { id: 816, class: vgpr_32, preferred-register: '' }
+ - { id: 817, class: sreg_64_xexec, preferred-register: '' }
+ - { id: 818, class: sreg_64_xexec, preferred-register: '' }
+ - { id: 819, class: sreg_32_xexec_hi_and_sreg_32_xm0, preferred-register: '' }
+ - { id: 820, class: vgpr_32, preferred-register: '' }
+ - { id: 821, class: sreg_32_xexec_hi_and_sreg_32_xm0, preferred-register: '' }
+ - { id: 822, class: vgpr_32, preferred-register: '' }
+ - { id: 823, class: vgpr_32, preferred-register: '' }
+ - { id: 824, class: vgpr_32, preferred-register: '' }
+ - { id: 825, class: vgpr_32, preferred-register: '' }
+ - { id: 826, class: sreg_64_xexec, preferred-register: '' }
+ - { id: 827, class: sreg_64_xexec, preferred-register: '' }
+ - { id: 828, class: sreg_32_xexec_hi_and_sreg_32_xm0, preferred-register: '' }
+ - { id: 829, class: vgpr_32, preferred-register: '' }
+ - { id: 830, class: sreg_32_xexec_hi_and_sreg_32_xm0, preferred-register: '' }
+ - { id: 831, class: vgpr_32, preferred-register: '' }
+ - { id: 832, class: vgpr_32, preferred-register: '' }
+ - { id: 833, class: vgpr_32, preferred-register: '' }
+ - { id: 834, class: vgpr_32, preferred-register: '' }
+ - { id: 835, class: sreg_64_xexec, preferred-register: '' }
+ - { id: 836, class: sreg_64_xexec, preferred-register: '' }
+ - { id: 837, class: vgpr_32, preferred-register: '' }
+ - { id: 838, class: sreg_32_xm0, preferred-register: '' }
+ - { id: 839, class: vgpr_32, preferred-register: '' }
+ - { id: 840, class: sreg_32_xm0, preferred-register: '' }
+ - { id: 841, class: vgpr_32, preferred-register: '' }
+ - { id: 842, class: vgpr_32, preferred-register: '' }
+ - { id: 843, class: vgpr_32, preferred-register: '' }
+ - { id: 844, class: sreg_64_xexec, preferred-register: '' }
+ - { id: 845, class: sreg_64_xexec, preferred-register: '' }
+ - { id: 846, class: sreg_32_xexec_hi_and_sreg_32_xm0, preferred-register: '' }
+ - { id: 847, class: vgpr_32, preferred-register: '' }
+ - { id: 848, class: sreg_32_xexec_hi_and_sreg_32_xm0, preferred-register: '' }
+ - { id: 849, class: vgpr_32, preferred-register: '' }
+ - { id: 850, class: vgpr_32, preferred-register: '' }
+ - { id: 851, class: vgpr_32, preferred-register: '' }
+ - { id: 852, class: vgpr_32, preferred-register: '' }
+ - { id: 853, class: sreg_64_xexec, preferred-register: '' }
+ - { id: 854, class: sreg_64_xexec, preferred-register: '' }
+ - { id: 855, class: sreg_32_xexec_hi_and_sreg_32_xm0, preferred-register: '' }
+ - { id: 856, class: vgpr_32, preferred-register: '' }
+ - { id: 857, class: sreg_32_xexec_hi_and_sreg_32_xm0, preferred-register: '' }
+ - { id: 858, class: vgpr_32, preferred-register: '' }
+ - { id: 859, class: vgpr_32, preferred-register: '' }
+ - { id: 860, class: vgpr_32, preferred-register: '' }
+ - { id: 861, class: vgpr_32, preferred-register: '' }
+ - { id: 862, class: sreg_64_xexec, preferred-register: '' }
+ - { id: 863, class: sreg_64_xexec, preferred-register: '' }
+ - { id: 864, class: sreg_32_xexec_hi_and_sreg_32_xm0, preferred-register: '' }
+ - { id: 865, class: vgpr_32, preferred-register: '' }
+ - { id: 866, class: sreg_32_xexec_hi_and_sreg_32_xm0, preferred-register: '' }
+ - { id: 867, class: vgpr_32, preferred-register: '' }
+ - { id: 868, class: vgpr_32, preferred-register: '' }
+ - { id: 869, class: vgpr_32, preferred-register: '' }
+ - { id: 870, class: vgpr_32, preferred-register: '' }
+ - { id: 871, class: sreg_64_xexec, preferred-register: '' }
+ - { id: 872, class: sreg_64_xexec, preferred-register: '' }
+ - { id: 873, class: sreg_32_xexec_hi_and_sreg_32_xm0, preferred-register: '' }
+ - { id: 874, class: vgpr_32, preferred-register: '' }
+ - { id: 875, class: sreg_32_xexec_hi_and_sreg_32_xm0, preferred-register: '' }
+ - { id: 876, class: vgpr_32, preferred-register: '' }
+ - { id: 877, class: vgpr_32, preferred-register: '' }
+ - { id: 878, class: vgpr_32, preferred-register: '' }
+ - { id: 879, class: vgpr_32, preferred-register: '' }
+ - { id: 880, class: sreg_64_xexec, preferred-register: '' }
+ - { id: 881, class: sreg_64_xexec, preferred-register: '' }
+ - { id: 882, class: sreg_32_xexec_hi_and_sreg_32_xm0, preferred-register: '' }
+ - { id: 883, class: vgpr_32, preferred-register: '' }
+ - { id: 884, class: sreg_32_xexec_hi_and_sreg_32_xm0, preferred-register: '' }
+ - { id: 885, class: vgpr_32, preferred-register: '' }
+ - { id: 886, class: vgpr_32, preferred-register: '' }
+ - { id: 887, class: vgpr_32, preferred-register: '' }
+ - { id: 888, class: vgpr_32, preferred-register: '' }
+ - { id: 889, class: sreg_64_xexec, preferred-register: '' }
+ - { id: 890, class: sreg_64_xexec, preferred-register: '' }
+ - { id: 891, class: sreg_32_xexec_hi_and_sreg_32_xm0, preferred-register: '' }
+ - { id: 892, class: vgpr_32, preferred-register: '' }
+ - { id: 893, class: sreg_32_xexec_hi_and_sreg_32_xm0, preferred-register: '' }
+ - { id: 894, class: vgpr_32, preferred-register: '' }
+ - { id: 895, class: vgpr_32, preferred-register: '' }
+ - { id: 896, class: vgpr_32, preferred-register: '' }
+ - { id: 897, class: vgpr_32, preferred-register: '' }
+ - { id: 898, class: sreg_64_xexec, preferred-register: '' }
+ - { id: 899, class: sreg_64_xexec, preferred-register: '' }
+ - { id: 900, class: sreg_32_xexec_hi_and_sreg_32_xm0, preferred-register: '' }
+ - { id: 901, class: vgpr_32, preferred-register: '' }
+ - { id: 902, class: sreg_32_xexec_hi_and_sreg_32_xm0, preferred-register: '' }
+ - { id: 903, class: vgpr_32, preferred-register: '' }
+ - { id: 904, class: vgpr_32, preferred-register: '' }
+ - { id: 905, class: vgpr_32, preferred-register: '' }
+ - { id: 906, class: vgpr_32, preferred-register: '' }
+ - { id: 907, class: sreg_64_xexec, preferred-register: '' }
+ - { id: 908, class: sreg_64_xexec, preferred-register: '' }
+ - { id: 909, class: sreg_32_xexec_hi_and_sreg_32_xm0, preferred-register: '' }
+ - { id: 910, class: vgpr_32, preferred-register: '' }
+ - { id: 911, class: sreg_32_xexec_hi_and_sreg_32_xm0, preferred-register: '' }
+ - { id: 912, class: vgpr_32, preferred-register: '' }
+ - { id: 913, class: vgpr_32, preferred-register: '' }
+ - { id: 914, class: vgpr_32, preferred-register: '' }
+ - { id: 915, class: vgpr_32, preferred-register: '' }
+ - { id: 916, class: sreg_64_xexec, preferred-register: '' }
+ - { id: 917, class: sreg_64_xexec, preferred-register: '' }
+ - { id: 918, class: sreg_32_xexec_hi_and_sreg_32_xm0, preferred-register: '' }
+ - { id: 919, class: vgpr_32, preferred-register: '' }
+ - { id: 920, class: sreg_32_xexec_hi_and_sreg_32_xm0, preferred-register: '' }
+ - { id: 921, class: vgpr_32, preferred-register: '' }
+ - { id: 922, class: vgpr_32, preferred-register: '' }
+ - { id: 923, class: vgpr_32, preferred-register: '' }
+ - { id: 924, class: vgpr_32, preferred-register: '' }
+ - { id: 925, class: sreg_64_xexec, preferred-register: '' }
+ - { id: 926, class: sreg_64_xexec, preferred-register: '' }
+ - { id: 927, class: sreg_32_xexec_hi_and_sreg_32_xm0, preferred-register: '' }
+ - { id: 928, class: vgpr_32, preferred-register: '' }
+ - { id: 929, class: sreg_32_xexec_hi_and_sreg_32_xm0, preferred-register: '' }
+ - { id: 930, class: vgpr_32, preferred-register: '' }
+ - { id: 931, class: vgpr_32, preferred-register: '' }
+ - { id: 932, class: sreg_64, preferred-register: '' }
+liveins:
+ - { reg: '$vgpr0', virtual-reg: '%176' }
+ - { reg: '$sgpr4_sgpr5', virtual-reg: '%178' }
+ - { reg: '$sgpr6', virtual-reg: '%179' }
+ - { reg: '$sgpr7', virtual-reg: '%180' }
+ - { reg: '$sgpr8', virtual-reg: '%181' }
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 0
+ offsetAdjustment: 0
+ maxAlignment: 1
+ adjustsStack: false
+ hasCalls: false
+ stackProtector: ''
+ functionContext: ''
+ maxCallFrameSize: 4294967295
+ cvBytesOfCalleeSavedRegisters: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+ hasTailCall: false
+ localFrameSize: 0
+ savePoint: ''
+ restorePoint: ''
+fixedStack: []
+stack: []
+entry_values: []
+callSites: []
+debugValueSubstitutions: []
+constants: []
+machineFunctionInfo:
+ explicitKernArgSize: 136
+ maxKernArgAlign: 8
+ ldsSize: 16384
+ gdsSize: 0
+ dynLDSAlign: 1
+ isEntryFunction: true
+ noSignedZerosFPMath: false
+ memoryBound: false
+ waveLimiter: true
+ hasSpilledSGPRs: false
+ hasSpilledVGPRs: false
+ scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
+ frameOffsetReg: '$fp_reg'
+ stackPtrOffsetReg: '$sgpr32'
+ bytesInStackArgArea: 0
+ returnsVoid: true
+ argumentInfo:
+ privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
+ kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
+ workGroupIDX: { reg: '$sgpr6' }
+ workGroupIDY: { reg: '$sgpr7' }
+ workGroupIDZ: { reg: '$sgpr8' }
+ privateSegmentWaveByteOffset: { reg: '$sgpr9' }
+ workItemIDX: { reg: '$vgpr0', mask: 1023 }
+ workItemIDY: { reg: '$vgpr0', mask: 1047552 }
+ psInputAddr: 0
+ psInputEnable: 0
+ mode:
+ ieee: true
+ dx10-clamp: true
+ fp32-input-denormals: true
+ fp32-output-denormals: true
+ fp64-fp16-input-denormals: true
+ fp64-fp16-output-denormals: true
+ highBitsOf32BitAddress: 0
+ occupancy: 8
+ vgprForAGPRCopy: ''
+ sgprForEXECCopy: '$sgpr100_sgpr101'
+ longBranchReservedReg: ''
+body: |
+ ; CHECK-LABEL: name: _ZL20rocblas_gemvn_kernelILi64ELi16EiffffEviiT3_lPKT4_lT1_lS3_lilS0_lPT5_lil
+ ; CHECK: bb.0 (%ir-block.18):
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.54(0x40000000)
+ ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5, $sgpr6, $sgpr7, $sgpr8
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr8
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr7
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr6
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 136
+ ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE killed [[S_MOV_B32_1]], %subreg.sub0, [[S_MOV_B32_]], %subreg.sub1
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY3]].sub0(p4)
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[COPY3]].sub1(p4)
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+ ; CHECK-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY5]], [[COPY7]], implicit-def $scc
+ ; CHECK-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY6]], [[COPY8]], implicit-def $scc, implicit $scc
+ ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
+ ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY3]](p4), 136, 0 :: (invariant load (s64) from %ir.20, align 4, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY3]](p4), 144, 0 :: (invariant load (s32) from %ir.20 + 8, addrspace 4)
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX2_IMM]].sub0
+ ; CHECK-NEXT: S_CMP_LT_U32 [[COPY2]], killed [[COPY10]], implicit-def $scc
+ ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 18
+ ; CHECK-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 12
+ ; CHECK-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 killed [[S_MOV_B32_3]], killed [[S_MOV_B32_2]], implicit $scc
+ ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE killed [[S_CSELECT_B32_]], %subreg.sub0, [[S_MOV_B32_]], %subreg.sub1
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE2]].sub0
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE2]].sub1
+ ; CHECK-NEXT: [[S_ADD_U32_1:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY11]], [[COPY13]], implicit-def $scc
+ ; CHECK-NEXT: [[S_ADDC_U32_1:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY12]], [[COPY14]], implicit-def $scc, implicit $scc
+ ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_ADD_U32_1]], %subreg.sub0, [[S_ADDC_U32_1]], %subreg.sub1
+ ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK-NEXT: [[GLOBAL_LOAD_USHORT_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT_SADDR killed [[REG_SEQUENCE3]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (invariant load (s16) from %ir.27, !tbaa !10, addrspace 4)
+ ; CHECK-NEXT: S_CMP_LT_U32 [[COPY1]], killed [[COPY9]], implicit-def $scc
+ ; CHECK-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 20
+ ; CHECK-NEXT: [[S_MOV_B32_5:%[0-9]+]]:sreg_32 = S_MOV_B32 14
+ ; CHECK-NEXT: [[S_CSELECT_B32_1:%[0-9]+]]:sreg_32 = S_CSELECT_B32 killed [[S_MOV_B32_5]], killed [[S_MOV_B32_4]], implicit $scc
+ ; CHECK-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sreg_64 = REG_SEQUENCE killed [[S_CSELECT_B32_1]], %subreg.sub0, [[S_MOV_B32_]], %subreg.sub1
+ ; CHECK-NEXT: [[COPY15:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; CHECK-NEXT: [[COPY16:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; CHECK-NEXT: [[COPY17:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE4]].sub0
+ ; CHECK-NEXT: [[COPY18:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE4]].sub1
+ ; CHECK-NEXT: [[S_ADD_U32_2:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY15]], [[COPY17]], implicit-def $scc
+ ; CHECK-NEXT: [[S_ADDC_U32_2:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY16]], [[COPY18]], implicit-def $scc, implicit $scc
+ ; CHECK-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_ADD_U32_2]], %subreg.sub0, [[S_ADDC_U32_2]], %subreg.sub1
+ ; CHECK-NEXT: [[GLOBAL_LOAD_USHORT_SADDR1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT_SADDR killed [[REG_SEQUENCE5]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (invariant load (s16) from %ir.33, !tbaa !10, addrspace 4)
+ ; CHECK-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nuw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_USHORT_SADDR1]], [[GLOBAL_LOAD_USHORT_SADDR]], implicit $exec
+ ; CHECK-NEXT: S_CMP_LT_U32 [[COPY]], killed [[S_LOAD_DWORD_IMM]], implicit-def $scc
+ ; CHECK-NEXT: [[S_MOV_B32_6:%[0-9]+]]:sreg_32 = S_MOV_B32 22
+ ; CHECK-NEXT: [[S_MOV_B32_7:%[0-9]+]]:sreg_32 = S_MOV_B32 16
+ ; CHECK-NEXT: [[S_CSELECT_B32_2:%[0-9]+]]:sreg_32 = S_CSELECT_B32 killed [[S_MOV_B32_7]], killed [[S_MOV_B32_6]], implicit $scc
+ ; CHECK-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:sreg_64 = REG_SEQUENCE killed [[S_CSELECT_B32_2]], %subreg.sub0, [[S_MOV_B32_]], %subreg.sub1
+ ; CHECK-NEXT: [[COPY19:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; CHECK-NEXT: [[COPY20:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; CHECK-NEXT: [[COPY21:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE6]].sub0
+ ; CHECK-NEXT: [[COPY22:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE6]].sub1
+ ; CHECK-NEXT: [[S_ADD_U32_3:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY19]], [[COPY21]], implicit-def $scc
+ ; CHECK-NEXT: [[S_ADDC_U32_3:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY20]], [[COPY22]], implicit-def $scc, implicit $scc
+ ; CHECK-NEXT: [[REG_SEQUENCE7:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_ADD_U32_3]], %subreg.sub0, [[S_ADDC_U32_3]], %subreg.sub1
+ ; CHECK-NEXT: [[GLOBAL_LOAD_USHORT_SADDR2:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT_SADDR killed [[REG_SEQUENCE7]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (invariant load (s16) from %ir.40, !tbaa !10, addrspace 4)
+ ; CHECK-NEXT: [[V_MUL_LO_U32_e64_1:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 killed [[V_MUL_LO_U32_e64_]], [[GLOBAL_LOAD_USHORT_SADDR2]], implicit $exec
+ ; CHECK-NEXT: [[S_MOV_B32_8:%[0-9]+]]:sreg_32 = S_MOV_B32 1024
+ ; CHECK-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 killed [[V_MUL_LO_U32_e64_1]], killed [[S_MOV_B32_8]], implicit $exec
+ ; CHECK-NEXT: $vcc = S_AND_B64 $exec, [[V_CMP_NE_U32_e64_]], implicit-def $scc
+ ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.54, implicit $vcc
+ ; CHECK-NEXT: S_BRANCH %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1 (%ir-block.44):
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.53(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY3]](p4), 0, 0 :: (dereferenceable invariant load (s128) from %ir..kernarg.offset65, addrspace 4)
+ ; CHECK-NEXT: [[COPY23:%[0-9]+]]:sgpr_32 = COPY [[S_LOAD_DWORDX4_IMM]].sub2
+ ; CHECK-NEXT: [[S_LOAD_DWORD_IMM1:%[0-9]+]]:sgpr_32 = S_LOAD_DWORD_IMM [[COPY3]](p4), 88, 0 :: (dereferenceable invariant load (s32) from %ir..kernarg.offset55, align 8, addrspace 4)
+ ; CHECK-NEXT: [[S_MOV_B32_9:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
+ ; CHECK-NEXT: [[COPY24:%[0-9]+]]:vgpr_32 = COPY killed [[S_MOV_B32_9]]
+ ; CHECK-NEXT: [[V_CMP_EQ_F32_e64_:%[0-9]+]]:sreg_64 = contract nofpexcept V_CMP_EQ_F32_e64 0, [[COPY23]], 0, [[COPY24]], 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[S_MOV_B32_10:%[0-9]+]]:sgpr_32 = S_MOV_B32 1065353216
+ ; CHECK-NEXT: [[COPY25:%[0-9]+]]:vgpr_32 = COPY killed [[S_MOV_B32_10]]
+ ; CHECK-NEXT: [[V_CMP_EQ_F32_e64_1:%[0-9]+]]:sreg_64 = contract nofpexcept V_CMP_EQ_F32_e64 0, [[S_LOAD_DWORD_IMM1]], 0, [[COPY25]], 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 killed [[V_CMP_EQ_F32_e64_]], killed [[V_CMP_EQ_F32_e64_1]], implicit-def dead $scc
+ ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 $exec, killed [[S_AND_B64_]], implicit-def dead $scc
+ ; CHECK-NEXT: $vcc = COPY [[S_AND_B64_1]]
+ ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.53, implicit $vcc
+ ; CHECK-NEXT: S_BRANCH %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2 (%ir-block.49):
+ ; CHECK-NEXT: successors: %bb.3(0x50000000), %bb.55(0x30000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY26:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX4_IMM]].sub1
+ ; CHECK-NEXT: [[COPY27:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX4_IMM]].sub0
+ ; CHECK-NEXT: [[REG_SEQUENCE8:%[0-9]+]]:sgpr_96 = REG_SEQUENCE [[COPY27]], %subreg.sub0, [[COPY26]], %subreg.sub1, [[COPY23]], %subreg.sub2
+ ; CHECK-NEXT: [[COPY28:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE8]].sub0
+ ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM1:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY3]](p4), 104, 0 :: (dereferenceable invariant load (s128) from %ir..kernarg.offset57, align 8, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORD_IMM2:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY3]](p4), 120, 0 :: (dereferenceable invariant load (s32) from %ir..kernarg.offset61, align 8, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM1:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY3]](p4), 128, 0 :: (dereferenceable invariant load (s64) from %ir..kernarg.offset63, align 16, addrspace 4)
+ ; CHECK-NEXT: [[COPY29:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX4_IMM1]].sub1
+ ; CHECK-NEXT: [[COPY30:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX4_IMM1]].sub0
+ ; CHECK-NEXT: [[REG_SEQUENCE9:%[0-9]+]]:sreg_64 = REG_SEQUENCE killed [[COPY30]], %subreg.sub0, killed [[COPY29]], %subreg.sub1
+ ; CHECK-NEXT: [[COPY31:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX4_IMM1]].sub3
+ ; CHECK-NEXT: [[COPY32:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX4_IMM1]].sub2
+ ; CHECK-NEXT: [[REG_SEQUENCE10:%[0-9]+]]:sreg_64 = REG_SEQUENCE killed [[COPY32]], %subreg.sub0, killed [[COPY31]], %subreg.sub1
+ ; CHECK-NEXT: [[COPY33:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX2_IMM1]].sub1
+ ; CHECK-NEXT: [[S_ASHR_I32_:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[COPY1]], 31, implicit-def dead $scc
+ ; CHECK-NEXT: [[REG_SEQUENCE11:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_ASHR_I32_]], %subreg.sub1
+ ; CHECK-NEXT: [[COPY34:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE11]].sub1
+ ; CHECK-NEXT: [[S_MUL_I32_:%[0-9]+]]:sreg_32 = S_MUL_I32 [[COPY1]], killed [[COPY33]]
+ ; CHECK-NEXT: [[COPY35:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX2_IMM1]].sub0
+ ; CHECK-NEXT: [[S_MUL_HI_U32_:%[0-9]+]]:sreg_32 = S_MUL_HI_U32 [[COPY1]], [[COPY35]]
+ ; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 killed [[S_MUL_HI_U32_]], killed [[S_MUL_I32_]], implicit-def dead $scc
+ ; CHECK-NEXT: [[S_MUL_I32_1:%[0-9]+]]:sreg_32 = S_MUL_I32 killed [[COPY34]], [[COPY35]]
+ ; CHECK-NEXT: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 killed [[S_ADD_I32_]], killed [[S_MUL_I32_1]], implicit-def dead $scc
+ ; CHECK-NEXT: [[S_MUL_I32_2:%[0-9]+]]:sreg_32 = S_MUL_I32 [[COPY1]], [[COPY35]]
+ ; CHECK-NEXT: [[REG_SEQUENCE12:%[0-9]+]]:sreg_64 = REG_SEQUENCE killed [[S_MUL_I32_2]], %subreg.sub0, killed [[S_ADD_I32_1]], %subreg.sub1
+ ; CHECK-NEXT: [[S_MOV_B32_11:%[0-9]+]]:sreg_32 = S_MOV_B32 2
+ ; CHECK-NEXT: [[S_LSHL_B64_:%[0-9]+]]:sreg_64 = S_LSHL_B64 killed [[REG_SEQUENCE12]], [[S_MOV_B32_11]], implicit-def dead $scc
+ ; CHECK-NEXT: [[COPY36:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE9]].sub0
+ ; CHECK-NEXT: [[COPY37:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE9]].sub1
+ ; CHECK-NEXT: [[COPY38:%[0-9]+]]:sreg_32 = COPY [[S_LSHL_B64_]].sub0
+ ; CHECK-NEXT: [[COPY39:%[0-9]+]]:sreg_32 = COPY [[S_LSHL_B64_]].sub1
+ ; CHECK-NEXT: [[S_ADD_U32_4:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY36]], [[COPY38]], implicit-def $scc
+ ; CHECK-NEXT: [[S_ADDC_U32_4:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY37]], [[COPY39]], implicit-def $scc, implicit $scc
+ ; CHECK-NEXT: [[REG_SEQUENCE13:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_ADD_U32_4]], %subreg.sub0, [[S_ADDC_U32_4]], %subreg.sub1
+ ; CHECK-NEXT: [[S_LSHL_B64_1:%[0-9]+]]:sreg_64 = S_LSHL_B64 killed [[REG_SEQUENCE10]], [[S_MOV_B32_11]], implicit-def dead $scc
+ ; CHECK-NEXT: [[COPY40:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE13]].sub0
+ ; CHECK-NEXT: [[COPY41:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE13]].sub1
+ ; CHECK-NEXT: [[COPY42:%[0-9]+]]:sreg_32 = COPY [[S_LSHL_B64_1]].sub0
+ ; CHECK-NEXT: [[COPY43:%[0-9]+]]:sreg_32 = COPY [[S_LSHL_B64_1]].sub1
+ ; CHECK-NEXT: [[S_ADD_U32_5:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY40]], [[COPY42]], implicit-def $scc
+ ; CHECK-NEXT: [[S_ADDC_U32_5:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY41]], [[COPY43]], implicit-def $scc, implicit $scc
+ ; CHECK-NEXT: [[REG_SEQUENCE14:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_ADD_U32_5]], %subreg.sub0, [[S_ADDC_U32_5]], %subreg.sub1
+ ; CHECK-NEXT: [[S_MOV_B32_12:%[0-9]+]]:sreg_32 = S_MOV_B32 1023
+ ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY4]](s32), killed [[S_MOV_B32_12]], implicit $exec
+ ; CHECK-NEXT: [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 [[COPY4]](s32), 10, 10, implicit $exec
+ ; CHECK-NEXT: [[V_MAD_U32_U24_e64_:%[0-9]+]]:vgpr_32 = nuw nsw V_MAD_U32_U24_e64 [[V_BFE_U32_e64_]], [[GLOBAL_LOAD_USHORT_SADDR]], [[V_AND_B32_e64_]], 0, implicit $exec
+ ; CHECK-NEXT: [[S_MOV_B32_13:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
+ ; CHECK-NEXT: [[COPY44:%[0-9]+]]:vgpr_32 = COPY killed [[S_MOV_B32_13]]
+ ; CHECK-NEXT: [[V_CMP_NEQ_F32_e64_:%[0-9]+]]:sreg_64 = nofpexcept V_CMP_NEQ_F32_e64 0, [[COPY23]], 0, [[COPY44]], 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[S_AND_B64_2:%[0-9]+]]:sreg_64 = S_AND_B64 $exec, killed [[V_CMP_NEQ_F32_e64_]], implicit-def dead $scc
+ ; CHECK-NEXT: $vcc = COPY [[S_AND_B64_2]]
+ ; CHECK-NEXT: S_CBRANCH_VCCZ %bb.3, implicit $vcc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.55:
+ ; CHECK-NEXT: successors: %bb.4(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 -1
+ ; CHECK-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY45:%[0-9]+]]:vgpr_32 = COPY [[DEF1]]
+ ; CHECK-NEXT: [[COPY46:%[0-9]+]]:vreg_64_align2 = COPY [[DEF]]
+ ; CHECK-NEXT: S_BRANCH %bb.4
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3 (%ir-block.61):
+ ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.6(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[S_MOV_B32_14:%[0-9]+]]:sreg_32 = S_MOV_B32 256
+ ; CHECK-NEXT: [[V_CMP_LT_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_LT_U32_e64 [[V_MAD_U32_U24_e64_]], killed [[S_MOV_B32_14]], implicit $exec
+ ; CHECK-NEXT: [[S_MOV_B64_2:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY47:%[0-9]+]]:vgpr_32 = COPY [[DEF3]]
+ ; CHECK-NEXT: [[COPY48:%[0-9]+]]:vreg_64_align2 = COPY [[DEF2]]
+ ; CHECK-NEXT: [[SI_IF:%[0-9]+]]:sreg_64 = SI_IF killed [[V_CMP_LT_U32_e64_]], %bb.6, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4.Flow90:
+ ; CHECK-NEXT: successors: %bb.10(0x40000000), %bb.23(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64 = PHI [[S_MOV_B64_1]], %bb.55, %23, %bb.6
+ ; CHECK-NEXT: [[PHI1:%[0-9]+]]:vgpr_32 = PHI [[COPY45]], %bb.55, %21, %bb.6
+ ; CHECK-NEXT: [[PHI2:%[0-9]+]]:vreg_64_align2 = PHI [[COPY46]], %bb.55, %22, %bb.6
+ ; CHECK-NEXT: [[PHI3:%[0-9]+]]:sreg_64 = PHI [[S_MOV_B64_]], %bb.55, [[S_MOV_B64_2]], %bb.6
+ ; CHECK-NEXT: [[S_AND_B64_3:%[0-9]+]]:sreg_64 = S_AND_B64 $exec, [[PHI3]], implicit-def dead $scc
+ ; CHECK-NEXT: $vcc = COPY [[S_AND_B64_3]]
+ ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.10, implicit $vcc
+ ; CHECK-NEXT: S_BRANCH %bb.23
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.5 (%ir-block.70):
+ ; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.9(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[S_MOV_B32_15:%[0-9]+]]:sreg_32 = S_MOV_B32 8
+ ; CHECK-NEXT: [[COPY49:%[0-9]+]]:vgpr_32 = COPY killed [[S_MOV_B32_15]]
+ ; CHECK-NEXT: [[V_LSHL_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = nuw V_LSHL_ADD_U32_e64 [[COPY2]], [[COPY49]], [[V_MAD_U32_U24_e64_]], implicit $exec
+ ; CHECK-NEXT: [[V_CMP_LT_I32_e64_:%[0-9]+]]:sreg_64 = V_CMP_LT_I32_e64 [[V_LSHL_ADD_U32_e64_]], [[COPY28]], implicit $exec
+ ; CHECK-NEXT: [[S_MOV_B64_3:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; CHECK-NEXT: [[DEF4:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF5:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY50:%[0-9]+]]:vgpr_32 = COPY [[DEF5]]
+ ; CHECK-NEXT: [[COPY51:%[0-9]+]]:vreg_64_align2 = COPY [[DEF4]]
+ ; CHECK-NEXT: [[SI_IF1:%[0-9]+]]:sreg_64 = SI_IF killed [[V_CMP_LT_I32_e64_]], %bb.9, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.7
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.6.Flow91:
+ ; CHECK-NEXT: successors: %bb.4(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI4:%[0-9]+]]:sreg_64 = PHI [[S_MOV_B64_2]], %bb.3, %669, %bb.9
+ ; CHECK-NEXT: [[PHI5:%[0-9]+]]:vgpr_32 = PHI [[COPY47]], %bb.3, %26, %bb.9
+ ; CHECK-NEXT: [[PHI6:%[0-9]+]]:vreg_64_align2 = PHI [[COPY48]], %bb.3, %27, %bb.9
+ ; CHECK-NEXT: SI_END_CF [[SI_IF]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.4
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.7 (%ir-block.80):
+ ; CHECK-NEXT: successors: %bb.8(0x50000000), %bb.48(0x30000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[S_MOV_B32_16:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
+ ; CHECK-NEXT: [[COPY52:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_16]]
+ ; CHECK-NEXT: [[V_CMP_EQ_F32_e64_2:%[0-9]+]]:sreg_64 = nofpexcept V_CMP_EQ_F32_e64 0, [[S_LOAD_DWORD_IMM1]], 0, [[COPY52]], 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MUL_LO_U32_e64_2:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[V_LSHL_ADD_U32_e64_]], [[S_LOAD_DWORD_IMM2]], implicit $exec
+ ; CHECK-NEXT: [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 31, [[V_MUL_LO_U32_e64_2]], implicit $exec
+ ; CHECK-NEXT: [[REG_SEQUENCE15:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_MUL_LO_U32_e64_2]], %subreg.sub0, [[V_ASHRREV_I32_e64_]], %subreg.sub1
+ ; CHECK-NEXT: [[S_AND_B64_4:%[0-9]+]]:sreg_64 = S_AND_B64 $exec, killed [[V_CMP_EQ_F32_e64_2]], implicit-def dead $scc
+ ; CHECK-NEXT: $vcc = COPY [[S_AND_B64_4]]
+ ; CHECK-NEXT: [[COPY53:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_16]], implicit $exec
+ ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.48, implicit $vcc
+ ; CHECK-NEXT: S_BRANCH %bb.8
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.8 (%ir-block.84):
+ ; CHECK-NEXT: successors: %bb.48(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[S_MOV_B32_17:%[0-9]+]]:sreg_32 = S_MOV_B32 2
+ ; CHECK-NEXT: [[V_LSHLREV_B64_e64_:%[0-9]+]]:vreg_64_align2 = V_LSHLREV_B64_e64 killed [[S_MOV_B32_17]], [[REG_SEQUENCE15]], implicit $exec
+ ; CHECK-NEXT: [[COPY54:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY [[REG_SEQUENCE14]].sub0
+ ; CHECK-NEXT: [[COPY55:%[0-9]+]]:vgpr_32 = COPY [[V_LSHLREV_B64_e64_]].sub0
+ ; CHECK-NEXT: [[COPY56:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY [[REG_SEQUENCE14]].sub1
+ ; CHECK-NEXT: [[COPY57:%[0-9]+]]:vgpr_32 = COPY [[V_LSHLREV_B64_e64_]].sub1
+ ; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY54]], [[COPY55]], 0, implicit $exec
+ ; CHECK-NEXT: [[COPY58:%[0-9]+]]:vgpr_32 = COPY [[COPY56]]
+ ; CHECK-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY58]], [[COPY57]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+ ; CHECK-NEXT: [[REG_SEQUENCE16:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1
+ ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD killed [[REG_SEQUENCE16]], 0, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.85, !tbaa !13, addrspace 1)
+ ; CHECK-NEXT: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = contract nofpexcept V_MUL_F32_e64 0, killed [[GLOBAL_LOAD_DWORD]], 0, [[S_LOAD_DWORD_IMM1]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.48
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.9.Flow92:
+ ; CHECK-NEXT: successors: %bb.6(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI7:%[0-9]+]]:sreg_64 = PHI [[S_MOV_B64_3]], %bb.5, %672, %bb.48
+ ; CHECK-NEXT: [[PHI8:%[0-9]+]]:vgpr_32 = PHI [[COPY50]], %bb.5, %171, %bb.48
+ ; CHECK-NEXT: [[PHI9:%[0-9]+]]:vreg_64_align2 = PHI [[COPY51]], %bb.5, %24, %bb.48
+ ; CHECK-NEXT: SI_END_CF [[SI_IF1]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: [[S_AND_B64_5:%[0-9]+]]:sreg_64 = S_AND_B64 [[PHI7]], $exec, implicit-def $scc
+ ; CHECK-NEXT: [[COPY59:%[0-9]+]]:sreg_64 = COPY [[S_AND_B64_5]]
+ ; CHECK-NEXT: S_BRANCH %bb.6
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.10 (%ir-block.91):
+ ; CHECK-NEXT: successors: %bb.11(0x40000000), %bb.12(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY60:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE11]]
+ ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM2:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY3]](p4), 24, 0 :: (dereferenceable invariant load (s128) from %ir..kernarg.offset39, align 8, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORD_IMM3:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY3]](p4), 40, 0 :: (dereferenceable invariant load (s32) from %ir..kernarg.offset43, align 8, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM3:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY3]](p4), 48, 0 :: (dereferenceable invariant load (s128) from %ir..kernarg.offset45, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM2:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY3]](p4), 64, 0 :: (dereferenceable invariant load (s64) from %ir..kernarg.offset45 + 16, align 16, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORD_IMM4:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY3]](p4), 72, 0 :: (dereferenceable invariant load (s32) from %ir..kernarg.offset51, align 8, addrspace 4)
+ ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM3:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY3]](p4), 80, 0 :: (dereferenceable invariant load (s64) from %ir..kernarg.offset53, align 16, addrspace 4)
+ ; CHECK-NEXT: [[COPY61:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX4_IMM2]].sub1
+ ; CHECK-NEXT: [[COPY62:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX4_IMM2]].sub0
+ ; CHECK-NEXT: [[REG_SEQUENCE17:%[0-9]+]]:sreg_64 = REG_SEQUENCE killed [[COPY62]], %subreg.sub0, killed [[COPY61]], %subreg.sub1
+ ; CHECK-NEXT: [[COPY63:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX4_IMM2]].sub3
+ ; CHECK-NEXT: [[COPY64:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX4_IMM2]].sub2
+ ; CHECK-NEXT: [[REG_SEQUENCE18:%[0-9]+]]:sreg_64 = REG_SEQUENCE killed [[COPY64]], %subreg.sub0, killed [[COPY63]], %subreg.sub1
+ ; CHECK-NEXT: [[COPY65:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX4_IMM3]].sub0
+ ; CHECK-NEXT: [[COPY66:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX4_IMM3]].sub1
+ ; CHECK-NEXT: [[COPY67:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX2_IMM2]].sub1
+ ; CHECK-NEXT: [[COPY68:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX2_IMM2]].sub0
+ ; CHECK-NEXT: [[COPY69:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX4_IMM3]].sub3
+ ; CHECK-NEXT: [[COPY70:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX4_IMM3]].sub2
+ ; CHECK-NEXT: [[REG_SEQUENCE19:%[0-9]+]]:sreg_64 = REG_SEQUENCE killed [[COPY70]], %subreg.sub0, killed [[COPY69]], %subreg.sub1
+ ; CHECK-NEXT: [[REG_SEQUENCE20:%[0-9]+]]:sreg_64 = REG_SEQUENCE killed [[COPY68]], %subreg.sub0, killed [[COPY67]], %subreg.sub1
+ ; CHECK-NEXT: [[COPY71:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX2_IMM3]].sub1
+ ; CHECK-NEXT: [[COPY72:%[0-9]+]]:sreg_32 = COPY [[COPY60]].sub0
+ ; CHECK-NEXT: [[S_MUL_I32_3:%[0-9]+]]:sreg_32 = S_MUL_I32 [[COPY72]], killed [[COPY66]]
+ ; CHECK-NEXT: [[S_MUL_HI_U32_1:%[0-9]+]]:sreg_32 = S_MUL_HI_U32 [[COPY72]], [[COPY65]]
+ ; CHECK-NEXT: [[S_ADD_I32_2:%[0-9]+]]:sreg_32 = S_ADD_I32 killed [[S_MUL_HI_U32_1]], killed [[S_MUL_I32_3]], implicit-def dead $scc
+ ; CHECK-NEXT: [[COPY73:%[0-9]+]]:sreg_32 = COPY [[COPY60]].sub1
+ ; CHECK-NEXT: [[S_MUL_I32_4:%[0-9]+]]:sreg_32 = S_MUL_I32 [[COPY73]], [[COPY65]]
+ ; CHECK-NEXT: [[S_ADD_I32_3:%[0-9]+]]:sreg_32 = S_ADD_I32 killed [[S_ADD_I32_2]], killed [[S_MUL_I32_4]], implicit-def dead $scc
+ ; CHECK-NEXT: [[S_MUL_I32_5:%[0-9]+]]:sreg_32 = S_MUL_I32 [[COPY72]], [[COPY65]]
+ ; CHECK-NEXT: [[REG_SEQUENCE21:%[0-9]+]]:sreg_64 = REG_SEQUENCE killed [[S_MUL_I32_5]], %subreg.sub0, killed [[S_ADD_I32_3]], %subreg.sub1
+ ; CHECK-NEXT: [[S_MOV_B32_18:%[0-9]+]]:sreg_32 = S_MOV_B32 2
+ ; CHECK-NEXT: [[S_LSHL_B64_2:%[0-9]+]]:sreg_64 = S_LSHL_B64 killed [[REG_SEQUENCE21]], [[S_MOV_B32_18]], implicit-def dead $scc
+ ; CHECK-NEXT: [[COPY74:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE17]].sub0
+ ; CHECK-NEXT: [[COPY75:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE17]].sub1
+ ; CHECK-NEXT: [[COPY76:%[0-9]+]]:sreg_32 = COPY [[S_LSHL_B64_2]].sub0
+ ; CHECK-NEXT: [[COPY77:%[0-9]+]]:sreg_32 = COPY [[S_LSHL_B64_2]].sub1
+ ; CHECK-NEXT: [[S_ADD_U32_6:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY74]], [[COPY76]], implicit-def $scc
+ ; CHECK-NEXT: [[S_ADDC_U32_6:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY75]], [[COPY77]], implicit-def $scc, implicit $scc
+ ; CHECK-NEXT: [[REG_SEQUENCE22:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_ADD_U32_6]], %subreg.sub0, [[S_ADDC_U32_6]], %subreg.sub1
+ ; CHECK-NEXT: [[S_LSHL_B64_3:%[0-9]+]]:sreg_64 = S_LSHL_B64 killed [[REG_SEQUENCE18]], [[S_MOV_B32_18]], implicit-def dead $scc
+ ; CHECK-NEXT: [[COPY78:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE22]].sub0
+ ; CHECK-NEXT: [[COPY79:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE22]].sub1
+ ; CHECK-NEXT: [[COPY80:%[0-9]+]]:sreg_32 = COPY [[S_LSHL_B64_3]].sub0
+ ; CHECK-NEXT: [[COPY81:%[0-9]+]]:sreg_32 = COPY [[S_LSHL_B64_3]].sub1
+ ; CHECK-NEXT: [[S_ADD_U32_7:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY78]], [[COPY80]], implicit-def $scc
+ ; CHECK-NEXT: [[S_ADDC_U32_7:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY79]], [[COPY81]], implicit-def $scc, implicit $scc
+ ; CHECK-NEXT: [[REG_SEQUENCE23:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_ADD_U32_7]], %subreg.sub0, [[S_ADDC_U32_7]], %subreg.sub1
+ ; CHECK-NEXT: [[S_MUL_I32_6:%[0-9]+]]:sreg_32 = S_MUL_I32 [[COPY72]], killed [[COPY71]]
+ ; CHECK-NEXT: [[COPY82:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX2_IMM3]].sub0
+ ; CHECK-NEXT: [[S_MUL_HI_U32_2:%[0-9]+]]:sreg_32 = S_MUL_HI_U32 [[COPY72]], [[COPY82]]
+ ; CHECK-NEXT: [[S_ADD_I32_4:%[0-9]+]]:sreg_32 = S_ADD_I32 killed [[S_MUL_HI_U32_2]], killed [[S_MUL_I32_6]], implicit-def dead $scc
+ ; CHECK-NEXT: [[S_MUL_I32_7:%[0-9]+]]:sreg_32 = S_MUL_I32 [[COPY73]], [[COPY82]]
+ ; CHECK-NEXT: [[S_ADD_I32_5:%[0-9]+]]:sreg_32 = S_ADD_I32 killed [[S_ADD_I32_4]], killed [[S_MUL_I32_7]], implicit-def dead $scc
+ ; CHECK-NEXT: [[S_MUL_I32_8:%[0-9]+]]:sreg_32 = S_MUL_I32 [[COPY72]], [[COPY82]]
+ ; CHECK-NEXT: [[REG_SEQUENCE24:%[0-9]+]]:sreg_64 = REG_SEQUENCE killed [[S_MUL_I32_8]], %subreg.sub0, killed [[S_ADD_I32_5]], %subreg.sub1
+ ; CHECK-NEXT: [[S_LSHL_B64_4:%[0-9]+]]:sreg_64 = S_LSHL_B64 [[REG_SEQUENCE24]], [[S_MOV_B32_18]], implicit-def dead $scc
+ ; CHECK-NEXT: [[COPY83:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE19]].sub0
+ ; CHECK-NEXT: [[COPY84:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE19]].sub1
+ ; CHECK-NEXT: [[COPY85:%[0-9]+]]:sreg_32 = COPY [[S_LSHL_B64_4]].sub0
+ ; CHECK-NEXT: [[COPY86:%[0-9]+]]:sreg_32 = COPY [[S_LSHL_B64_4]].sub1
+ ; CHECK-NEXT: [[S_ADD_U32_8:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY83]], [[COPY85]], implicit-def $scc
+ ; CHECK-NEXT: [[S_ADDC_U32_8:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY84]], [[COPY86]], implicit-def $scc, implicit $scc
+ ; CHECK-NEXT: [[REG_SEQUENCE25:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_ADD_U32_8]], %subreg.sub0, [[S_ADDC_U32_8]], %subreg.sub1
+ ; CHECK-NEXT: [[S_LSHL_B64_5:%[0-9]+]]:sreg_64 = S_LSHL_B64 [[REG_SEQUENCE20]], [[S_MOV_B32_18]], implicit-def dead $scc
+ ; CHECK-NEXT: [[COPY87:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE25]].sub0
+ ; CHECK-NEXT: [[COPY88:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE25]].sub1
+ ; CHECK-NEXT: [[COPY89:%[0-9]+]]:sreg_32 = COPY [[S_LSHL_B64_5]].sub0
+ ; CHECK-NEXT: [[COPY90:%[0-9]+]]:sreg_32 = COPY [[S_LSHL_B64_5]].sub1
+ ; CHECK-NEXT: [[S_ADD_U32_9:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY87]], [[COPY89]], implicit-def $scc
+ ; CHECK-NEXT: [[S_ADDC_U32_9:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY88]], [[COPY90]], implicit-def $scc, implicit $scc
+ ; CHECK-NEXT: [[REG_SEQUENCE26:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_ADD_U32_9]], %subreg.sub0, [[S_ADDC_U32_9]], %subreg.sub1
+ ; CHECK-NEXT: [[COPY91:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE8]].sub1
+ ; CHECK-NEXT: [[S_MOV_B32_19:%[0-9]+]]:sreg_32 = S_MOV_B32 8
+ ; CHECK-NEXT: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY2]], killed [[S_MOV_B32_19]], implicit-def dead $scc
+ ; CHECK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[S_LSHL_B32_]], [[V_AND_B32_e64_]], 0, implicit $exec
+ ; CHECK-NEXT: [[S_MOV_B32_20:%[0-9]+]]:sreg_32 = S_MOV_B32 31
+ ; CHECK-NEXT: [[S_ASHR_I32_1:%[0-9]+]]:sreg_32 = S_ASHR_I32 [[COPY91]], killed [[S_MOV_B32_20]], implicit-def dead $scc
+ ; CHECK-NEXT: [[S_MOV_B32_21:%[0-9]+]]:sreg_32 = S_MOV_B32 26
+ ; CHECK-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 killed [[S_ASHR_I32_1]], killed [[S_MOV_B32_21]], implicit-def dead $scc
+ ; CHECK-NEXT: [[S_ADD_I32_6:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY91]], killed [[S_LSHR_B32_]], implicit-def dead $scc
+ ; CHECK-NEXT: [[S_MOV_B32_22:%[0-9]+]]:sreg_32 = S_MOV_B32 -64
+ ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 killed [[S_ADD_I32_6]], killed [[S_MOV_B32_22]], implicit-def dead $scc
+ ; CHECK-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = nuw nsw V_LSHLREV_B32_e64 [[S_MOV_B32_18]], [[V_BFE_U32_e64_]], implicit $exec
+ ; CHECK-NEXT: [[V_CMP_LT_I32_e64_1:%[0-9]+]]:sreg_64 = V_CMP_LT_I32_e64 [[V_LSHLREV_B32_e64_]], [[S_AND_B32_]], implicit $exec
+ ; CHECK-NEXT: [[S_MOV_B32_23:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
+ ; CHECK-NEXT: [[COPY92:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_23]], implicit $exec
+ ; CHECK-NEXT: [[COPY93:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_23]], implicit $exec
+ ; CHECK-NEXT: [[COPY94:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_23]], implicit $exec
+ ; CHECK-NEXT: [[COPY95:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_23]], implicit $exec
+ ; CHECK-NEXT: [[SI_IF2:%[0-9]+]]:sreg_64 = SI_IF killed [[V_CMP_LT_I32_e64_1]], %bb.12, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.11
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.11..lr.ph.i:
+ ; CHECK-NEXT: successors: %bb.13(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY96:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE19]]
+ ; CHECK-NEXT: [[V_CMP_LT_I32_e64_2:%[0-9]+]]:sreg_64 = V_CMP_LT_I32_e64 [[V_ADD_U32_e64_]], [[COPY28]], implicit $exec
+ ; CHECK-NEXT: [[V_CMP_LT_I32_e64_3:%[0-9]+]]:sreg_64 = V_CMP_LT_I32_e64 [[V_ADD_U32_e64_]], [[COPY28]], implicit $exec
+ ; CHECK-NEXT: [[COPY97:%[0-9]+]]:sreg_64 = COPY [[V_CMP_LT_I32_e64_3]]
+ ; CHECK-NEXT: [[S_MOV_B32_24:%[0-9]+]]:sreg_32 = S_MOV_B32 64
+ ; CHECK-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = nsw V_ADD_U32_e64 [[V_ADD_U32_e64_]], [[S_MOV_B32_24]], 0, implicit $exec
+ ; CHECK-NEXT: [[V_CMP_LT_I32_e64_4:%[0-9]+]]:sreg_64 = V_CMP_LT_I32_e64 killed [[V_ADD_U32_e64_1]], [[COPY28]], implicit $exec
+ ; CHECK-NEXT: [[COPY98:%[0-9]+]]:sreg_64 = COPY [[V_CMP_LT_I32_e64_4]]
+ ; CHECK-NEXT: [[S_MOV_B32_25:%[0-9]+]]:sreg_32 = S_MOV_B32 128
+ ; CHECK-NEXT: [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = nsw V_ADD_U32_e64 [[V_ADD_U32_e64_]], killed [[S_MOV_B32_25]], 0, implicit $exec
+ ; CHECK-NEXT: [[V_CMP_LT_I32_e64_5:%[0-9]+]]:sreg_64 = V_CMP_LT_I32_e64 killed [[V_ADD_U32_e64_2]], [[COPY28]], implicit $exec
+ ; CHECK-NEXT: [[COPY99:%[0-9]+]]:sreg_64 = COPY [[V_CMP_LT_I32_e64_5]]
+ ; CHECK-NEXT: [[S_MOV_B32_26:%[0-9]+]]:sreg_32 = S_MOV_B32 192
+ ; CHECK-NEXT: [[V_ADD_U32_e64_3:%[0-9]+]]:vgpr_32 = nsw V_ADD_U32_e64 [[V_ADD_U32_e64_]], killed [[S_MOV_B32_26]], 0, implicit $exec
+ ; CHECK-NEXT: [[V_CMP_LT_I32_e64_6:%[0-9]+]]:sreg_64 = V_CMP_LT_I32_e64 killed [[V_ADD_U32_e64_3]], [[COPY28]], implicit $exec
+ ; CHECK-NEXT: [[COPY100:%[0-9]+]]:sreg_64 = COPY [[V_CMP_LT_I32_e64_6]]
+ ; CHECK-NEXT: [[V_MUL_LO_U32_e64_3:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 [[S_LOAD_DWORD_IMM3]], [[V_LSHLREV_B32_e64_]], implicit $exec
+ ; CHECK-NEXT: [[V_ADD_U32_e64_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[V_MUL_LO_U32_e64_3]], [[S_LOAD_DWORD_IMM3]], 0, implicit $exec
+ ; CHECK-NEXT: [[S_MOV_B32_27:%[0-9]+]]:sreg_32 = S_MOV_B32 6
+ ; CHECK-NEXT: [[S_LSHL_B32_1:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[S_LOAD_DWORD_IMM3]], [[S_MOV_B32_27]], implicit-def dead $scc
+ ; CHECK-NEXT: [[V_ADD_U32_e64_5:%[0-9]+]]:vgpr_32 = nuw nsw V_ADD_U32_e64 [[V_LSHLREV_B32_e64_]], [[S_MOV_B32_18]], 0, implicit $exec
+ ; CHECK-NEXT: [[V_MUL_LO_U32_e64_4:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 [[S_LOAD_DWORD_IMM3]], [[V_ADD_U32_e64_5]], implicit $exec
+ ; CHECK-NEXT: [[S_MOV_B32_28:%[0-9]+]]:sreg_32 = S_MOV_B32 3
+ ; CHECK-NEXT: [[V_ADD_U32_e64_6:%[0-9]+]]:vgpr_32 = nuw nsw V_ADD_U32_e64 [[V_LSHLREV_B32_e64_]], killed [[S_MOV_B32_28]], 0, implicit $exec
+ ; CHECK-NEXT: [[V_MUL_LO_U32_e64_5:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 [[S_LOAD_DWORD_IMM3]], [[V_ADD_U32_e64_6]], implicit $exec
+ ; CHECK-NEXT: [[V_MUL_LO_U32_e64_6:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 [[V_BFE_U32_e64_]], [[S_LOAD_DWORD_IMM3]], implicit $exec
+ ; CHECK-NEXT: [[V_LSHLREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[S_MOV_B32_18]], killed [[V_MUL_LO_U32_e64_6]], implicit $exec
+ ; CHECK-NEXT: [[V_MUL_LO_U32_e64_7:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 [[S_LOAD_DWORD_IMM4]], [[V_LSHLREV_B32_e64_]], implicit $exec
+ ; CHECK-NEXT: [[V_ADD_U32_e64_7:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[V_MUL_LO_U32_e64_7]], [[S_LOAD_DWORD_IMM4]], 0, implicit $exec
+ ; CHECK-NEXT: [[S_LSHL_B32_2:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[S_LOAD_DWORD_IMM4]], [[S_MOV_B32_27]], implicit-def dead $scc
+ ; CHECK-NEXT: [[V_MUL_LO_U32_e64_8:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 [[S_LOAD_DWORD_IMM4]], [[V_ADD_U32_e64_5]], implicit $exec
+ ; CHECK-NEXT: [[V_MUL_LO_U32_e64_9:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 [[S_LOAD_DWORD_IMM4]], [[V_ADD_U32_e64_6]], implicit $exec
+ ; CHECK-NEXT: [[COPY101:%[0-9]+]]:sreg_32 = COPY [[S_LSHL_B64_4]].sub0
+ ; CHECK-NEXT: [[COPY102:%[0-9]+]]:sreg_32 = COPY [[S_LSHL_B64_4]].sub1
+ ; CHECK-NEXT: [[COPY103:%[0-9]+]]:sreg_32 = COPY [[S_LSHL_B64_5]].sub0
+ ; CHECK-NEXT: [[COPY104:%[0-9]+]]:sreg_32 = COPY [[S_LSHL_B64_5]].sub1
+ ; CHECK-NEXT: [[S_ADD_U32_10:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY101]], [[COPY103]], implicit-def $scc
+ ; CHECK-NEXT: [[S_ADDC_U32_10:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY102]], [[COPY104]], implicit-def $scc, implicit $scc
+ ; CHECK-NEXT: [[REG_SEQUENCE27:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_ADD_U32_10]], %subreg.sub0, [[S_ADDC_U32_10]], %subreg.sub1
+ ; CHECK-NEXT: [[V_MUL_LO_U32_e64_10:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 [[V_BFE_U32_e64_]], [[S_LOAD_DWORD_IMM4]], implicit $exec
+ ; CHECK-NEXT: [[V_LSHLREV_B32_e64_2:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[S_MOV_B32_18]], killed [[V_MUL_LO_U32_e64_10]], implicit $exec
+ ; CHECK-NEXT: [[V_ASHRREV_I32_e64_1:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 31, [[V_LSHLREV_B32_e64_2]], implicit $exec
+ ; CHECK-NEXT: [[REG_SEQUENCE28:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_LSHLREV_B32_e64_2]], %subreg.sub0, [[V_ASHRREV_I32_e64_1]], %subreg.sub1
+ ; CHECK-NEXT: [[V_LSHLREV_B64_e64_1:%[0-9]+]]:vreg_64_align2 = nsw V_LSHLREV_B64_e64 [[S_MOV_B32_18]], killed [[REG_SEQUENCE28]], implicit $exec
+ ; CHECK-NEXT: [[COPY105:%[0-9]+]]:sreg_32 = COPY [[COPY96]].sub0
+ ; CHECK-NEXT: [[COPY106:%[0-9]+]]:sreg_32 = COPY [[COPY96]].sub1
+ ; CHECK-NEXT: [[COPY107:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE27]].sub0
+ ; CHECK-NEXT: [[COPY108:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE27]].sub1
+ ; CHECK-NEXT: [[S_ADD_U32_11:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY105]], [[COPY107]], implicit-def $scc
+ ; CHECK-NEXT: [[S_ADDC_U32_11:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY106]], [[COPY108]], implicit-def $scc, implicit $scc
+ ; CHECK-NEXT: [[REG_SEQUENCE29:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_ADD_U32_11]], %subreg.sub0, [[S_ADDC_U32_11]], %subreg.sub1
+ ; CHECK-NEXT: [[COPY109:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY [[REG_SEQUENCE29]].sub0
+ ; CHECK-NEXT: [[COPY110:%[0-9]+]]:vgpr_32 = COPY [[V_LSHLREV_B64_e64_1]].sub0
+ ; CHECK-NEXT: [[COPY111:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY [[REG_SEQUENCE29]].sub1
+ ; CHECK-NEXT: [[COPY112:%[0-9]+]]:vgpr_32 = COPY [[V_LSHLREV_B64_e64_1]].sub1
+ ; CHECK-NEXT: [[V_ADD_CO_U32_e64_2:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_3:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY109]], [[COPY110]], 0, implicit $exec
+ ; CHECK-NEXT: [[COPY113:%[0-9]+]]:vgpr_32 = COPY [[COPY111]]
+ ; CHECK-NEXT: [[V_ADDC_U32_e64_2:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_3:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY113]], [[COPY112]], killed [[V_ADD_CO_U32_e64_3]], 0, implicit $exec
+ ; CHECK-NEXT: [[REG_SEQUENCE30:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_ADD_CO_U32_e64_2]], %subreg.sub0, [[V_ADDC_U32_e64_2]], %subreg.sub1
+ ; CHECK-NEXT: [[S_ASHR_I32_2:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_2]], 31, implicit-def dead $scc
+ ; CHECK-NEXT: [[REG_SEQUENCE31:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_LSHL_B32_2]], %subreg.sub0, [[S_ASHR_I32_2]], %subreg.sub1
+ ; CHECK-NEXT: [[S_LSHL_B64_6:%[0-9]+]]:sreg_64 = nsw S_LSHL_B64 killed [[REG_SEQUENCE31]], [[S_MOV_B32_18]], implicit-def dead $scc
+ ; CHECK-NEXT: [[S_MOV_B32_29:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
+ ; CHECK-NEXT: [[S_MOV_B64_4:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; CHECK-NEXT: [[COPY114:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_29]], implicit $exec
+ ; CHECK-NEXT: [[COPY115:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_29]], implicit $exec
+ ; CHECK-NEXT: [[COPY116:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_29]], implicit $exec
+ ; CHECK-NEXT: [[COPY117:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_29]], implicit $exec
+ ; CHECK-NEXT: [[COPY118:%[0-9]+]]:sreg_64 = COPY [[COPY97]]
+ ; CHECK-NEXT: [[COPY119:%[0-9]+]]:sreg_64 = COPY [[COPY98]]
+ ; CHECK-NEXT: [[COPY120:%[0-9]+]]:sreg_64 = COPY [[COPY99]]
+ ; CHECK-NEXT: [[COPY121:%[0-9]+]]:sreg_64 = COPY [[COPY100]]
+ ; CHECK-NEXT: S_BRANCH %bb.13
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.12.Flow89:
+ ; CHECK-NEXT: successors: %bb.24(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI10:%[0-9]+]]:vgpr_32 = PHI [[V_LSHLREV_B32_e64_]], %bb.10, %98, %bb.22
+ ; CHECK-NEXT: [[PHI11:%[0-9]+]]:vgpr_32 = PHI [[COPY92]], %bb.10, %106, %bb.22
+ ; CHECK-NEXT: [[PHI12:%[0-9]+]]:vgpr_32 = PHI [[COPY93]], %bb.10, %105, %bb.22
+ ; CHECK-NEXT: [[PHI13:%[0-9]+]]:vgpr_32 = PHI [[COPY94]], %bb.10, %104, %bb.22
+ ; CHECK-NEXT: [[PHI14:%[0-9]+]]:vgpr_32 = PHI [[COPY95]], %bb.10, %103, %bb.22
+ ; CHECK-NEXT: SI_END_CF [[SI_IF2]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: [[S_SUB_I32_:%[0-9]+]]:sreg_32 = S_SUB_I32 [[COPY91]], [[S_AND_B32_]], implicit-def dead $scc
+ ; CHECK-NEXT: S_BRANCH %bb.24
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.13 (%ir-block.146):
+ ; CHECK-NEXT: successors: %bb.14(0x40000000), %bb.21(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI15:%[0-9]+]]:sreg_64 = PHI [[S_MOV_B64_4]], %bb.11, %102, %bb.21
+ ; CHECK-NEXT: [[PHI16:%[0-9]+]]:vreg_64_align2 = PHI [[REG_SEQUENCE30]], %bb.11, %101, %bb.21
+ ; CHECK-NEXT: [[PHI17:%[0-9]+]]:sreg_32 = PHI [[S_MOV_B32_29]], %bb.11, %100, %bb.21
+ ; CHECK-NEXT: [[PHI18:%[0-9]+]]:vgpr_32 = PHI [[V_ADD_U32_e64_]], %bb.11, %99, %bb.21
+ ; CHECK-NEXT: [[PHI19:%[0-9]+]]:vgpr_32 = PHI [[V_LSHLREV_B32_e64_]], %bb.11, %98, %bb.21
+ ; CHECK-NEXT: [[PHI20:%[0-9]+]]:vgpr_32 = PHI [[COPY114]], %bb.11, %97, %bb.21
+ ; CHECK-NEXT: [[PHI21:%[0-9]+]]:vgpr_32 = PHI [[COPY115]], %bb.11, %96, %bb.21
+ ; CHECK-NEXT: [[PHI22:%[0-9]+]]:vgpr_32 = PHI [[COPY116]], %bb.11, %95, %bb.21
+ ; CHECK-NEXT: [[PHI23:%[0-9]+]]:vgpr_32 = PHI [[COPY117]], %bb.11, %94, %bb.21
+ ; CHECK-NEXT: [[SI_IF3:%[0-9]+]]:sreg_64 = SI_IF [[COPY118]], %bb.21, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.14
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.14 (%ir-block.150):
+ ; CHECK-NEXT: successors: %bb.15(0x40000000), %bb.20(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[PHI16]], 0, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.lsr.iv33, !tbaa !13, addrspace 1)
+ ; CHECK-NEXT: [[V_ADD_U32_e64_8:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_ADD_U32_e64_7]], [[PHI17]], 0, implicit $exec
+ ; CHECK-NEXT: [[V_ASHRREV_I32_e64_2:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 31, [[V_ADD_U32_e64_8]], implicit $exec
+ ; CHECK-NEXT: [[REG_SEQUENCE32:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_ADD_U32_e64_8]], %subreg.sub0, [[V_ASHRREV_I32_e64_2]], %subreg.sub1
+ ; CHECK-NEXT: [[V_LSHLREV_B64_e64_2:%[0-9]+]]:vreg_64_align2 = V_LSHLREV_B64_e64 [[S_MOV_B32_18]], killed [[REG_SEQUENCE32]], implicit $exec
+ ; CHECK-NEXT: [[COPY122:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY [[REG_SEQUENCE26]].sub0
+ ; CHECK-NEXT: [[COPY123:%[0-9]+]]:vgpr_32 = COPY [[V_LSHLREV_B64_e64_2]].sub0
+ ; CHECK-NEXT: [[COPY124:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY [[REG_SEQUENCE26]].sub1
+ ; CHECK-NEXT: [[COPY125:%[0-9]+]]:vgpr_32 = COPY [[V_LSHLREV_B64_e64_2]].sub1
+ ; CHECK-NEXT: [[V_ADD_CO_U32_e64_4:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_5:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY122]], [[COPY123]], 0, implicit $exec
+ ; CHECK-NEXT: [[COPY126:%[0-9]+]]:vgpr_32 = COPY [[COPY124]]
+ ; CHECK-NEXT: [[V_ADDC_U32_e64_4:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_5:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY126]], [[COPY125]], killed [[V_ADD_CO_U32_e64_5]], 0, implicit $exec
+ ; CHECK-NEXT: [[REG_SEQUENCE33:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_ADD_CO_U32_e64_4]], %subreg.sub0, [[V_ADDC_U32_e64_4]], %subreg.sub1
+ ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD2:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD killed [[REG_SEQUENCE33]], 0, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.154, !tbaa !13, addrspace 1)
+ ; CHECK-NEXT: [[V_ADD_U32_e64_9:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_MUL_LO_U32_e64_8]], [[PHI17]], 0, implicit $exec
+ ; CHECK-NEXT: [[V_ASHRREV_I32_e64_3:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 31, [[V_ADD_U32_e64_9]], implicit $exec
+ ; CHECK-NEXT: [[REG_SEQUENCE34:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_ADD_U32_e64_9]], %subreg.sub0, [[V_ASHRREV_I32_e64_3]], %subreg.sub1
+ ; CHECK-NEXT: [[V_LSHLREV_B64_e64_3:%[0-9]+]]:vreg_64_align2 = V_LSHLREV_B64_e64 [[S_MOV_B32_18]], killed [[REG_SEQUENCE34]], implicit $exec
+ ; CHECK-NEXT: [[COPY127:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY [[REG_SEQUENCE26]].sub0
+ ; CHECK-NEXT: [[COPY128:%[0-9]+]]:vgpr_32 = COPY [[V_LSHLREV_B64_e64_3]].sub0
+ ; CHECK-NEXT: [[COPY129:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY [[REG_SEQUENCE26]].sub1
+ ; CHECK-NEXT: [[COPY130:%[0-9]+]]:vgpr_32 = COPY [[V_LSHLREV_B64_e64_3]].sub1
+ ; CHECK-NEXT: [[V_ADD_CO_U32_e64_6:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_7:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY127]], [[COPY128]], 0, implicit $exec
+ ; CHECK-NEXT: [[COPY131:%[0-9]+]]:vgpr_32 = COPY [[COPY129]]
+ ; CHECK-NEXT: [[V_ADDC_U32_e64_6:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_7:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY131]], [[COPY130]], killed [[V_ADD_CO_U32_e64_7]], 0, implicit $exec
+ ; CHECK-NEXT: [[REG_SEQUENCE35:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_ADD_CO_U32_e64_6]], %subreg.sub0, [[V_ADDC_U32_e64_6]], %subreg.sub1
+ ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD3:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD killed [[REG_SEQUENCE35]], 0, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.158, !tbaa !13, addrspace 1)
+ ; CHECK-NEXT: [[V_ADD_U32_e64_10:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_MUL_LO_U32_e64_9]], [[PHI17]], 0, implicit $exec
+ ; CHECK-NEXT: [[V_ASHRREV_I32_e64_4:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 31, [[V_ADD_U32_e64_10]], implicit $exec
+ ; CHECK-NEXT: [[REG_SEQUENCE36:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_ADD_U32_e64_10]], %subreg.sub0, [[V_ASHRREV_I32_e64_4]], %subreg.sub1
+ ; CHECK-NEXT: [[V_LSHLREV_B64_e64_4:%[0-9]+]]:vreg_64_align2 = V_LSHLREV_B64_e64 [[S_MOV_B32_18]], killed [[REG_SEQUENCE36]], implicit $exec
+ ; CHECK-NEXT: [[COPY132:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY [[REG_SEQUENCE26]].sub0
+ ; CHECK-NEXT: [[COPY133:%[0-9]+]]:vgpr_32 = COPY [[V_LSHLREV_B64_e64_4]].sub0
+ ; CHECK-NEXT: [[COPY134:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY [[REG_SEQUENCE26]].sub1
+ ; CHECK-NEXT: [[COPY135:%[0-9]+]]:vgpr_32 = COPY [[V_LSHLREV_B64_e64_4]].sub1
+ ; CHECK-NEXT: [[V_ADD_CO_U32_e64_8:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_9:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY132]], [[COPY133]], 0, implicit $exec
+ ; CHECK-NEXT: [[COPY136:%[0-9]+]]:vgpr_32 = COPY [[COPY134]]
+ ; CHECK-NEXT: [[V_ADDC_U32_e64_8:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_9:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY136]], [[COPY135]], killed [[V_ADD_CO_U32_e64_9]], 0, implicit $exec
+ ; CHECK-NEXT: [[REG_SEQUENCE37:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_ADD_CO_U32_e64_8]], %subreg.sub0, [[V_ADDC_U32_e64_8]], %subreg.sub1
+ ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD4:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD killed [[REG_SEQUENCE37]], 0, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.162, !tbaa !13, addrspace 1)
+ ; CHECK-NEXT: [[V_ADD_U32_e64_11:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_LSHLREV_B32_e64_1]], [[PHI18]], 0, implicit $exec
+ ; CHECK-NEXT: [[V_ASHRREV_I32_e64_5:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 31, [[V_ADD_U32_e64_11]], implicit $exec
+ ; CHECK-NEXT: [[REG_SEQUENCE38:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_ADD_U32_e64_11]], %subreg.sub0, [[V_ASHRREV_I32_e64_5]], %subreg.sub1
+ ; CHECK-NEXT: [[V_LSHLREV_B64_e64_5:%[0-9]+]]:vreg_64_align2 = V_LSHLREV_B64_e64 [[S_MOV_B32_18]], killed [[REG_SEQUENCE38]], implicit $exec
+ ; CHECK-NEXT: [[COPY137:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY [[REG_SEQUENCE23]].sub0
+ ; CHECK-NEXT: [[COPY138:%[0-9]+]]:vgpr_32 = COPY [[V_LSHLREV_B64_e64_5]].sub0
+ ; CHECK-NEXT: [[COPY139:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY [[REG_SEQUENCE23]].sub1
+ ; CHECK-NEXT: [[COPY140:%[0-9]+]]:vgpr_32 = COPY [[V_LSHLREV_B64_e64_5]].sub1
+ ; CHECK-NEXT: [[V_ADD_CO_U32_e64_10:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_11:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY137]], [[COPY138]], 0, implicit $exec
+ ; CHECK-NEXT: [[COPY141:%[0-9]+]]:vgpr_32 = COPY [[COPY139]]
+ ; CHECK-NEXT: [[V_ADDC_U32_e64_10:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_11:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY141]], [[COPY140]], killed [[V_ADD_CO_U32_e64_11]], 0, implicit $exec
+ ; CHECK-NEXT: [[REG_SEQUENCE39:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_ADD_CO_U32_e64_10]], %subreg.sub0, [[V_ADDC_U32_e64_10]], %subreg.sub1
+ ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD5:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE39]], 0, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.166, !tbaa !13, addrspace 1)
+ ; CHECK-NEXT: [[V_FMAC_F32_e64_:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[GLOBAL_LOAD_DWORD1]], 0, killed [[GLOBAL_LOAD_DWORD5]], 0, [[PHI20]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_ADD_U32_e64_12:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_ADD_U32_e64_4]], [[PHI18]], 0, implicit $exec
+ ; CHECK-NEXT: [[V_ASHRREV_I32_e64_6:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 31, [[V_ADD_U32_e64_12]], implicit $exec
+ ; CHECK-NEXT: [[REG_SEQUENCE40:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_ADD_U32_e64_12]], %subreg.sub0, [[V_ASHRREV_I32_e64_6]], %subreg.sub1
+ ; CHECK-NEXT: [[V_LSHLREV_B64_e64_6:%[0-9]+]]:vreg_64_align2 = V_LSHLREV_B64_e64 [[S_MOV_B32_18]], killed [[REG_SEQUENCE40]], implicit $exec
+ ; CHECK-NEXT: [[COPY142:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY [[REG_SEQUENCE23]].sub0
+ ; CHECK-NEXT: [[COPY143:%[0-9]+]]:vgpr_32 = COPY [[V_LSHLREV_B64_e64_6]].sub0
+ ; CHECK-NEXT: [[COPY144:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY [[REG_SEQUENCE23]].sub1
+ ; CHECK-NEXT: [[COPY145:%[0-9]+]]:vgpr_32 = COPY [[V_LSHLREV_B64_e64_6]].sub1
+ ; CHECK-NEXT: [[V_ADD_CO_U32_e64_12:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_13:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY142]], [[COPY143]], 0, implicit $exec
+ ; CHECK-NEXT: [[COPY146:%[0-9]+]]:vgpr_32 = COPY [[COPY144]]
+ ; CHECK-NEXT: [[V_ADDC_U32_e64_12:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_13:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY146]], [[COPY145]], killed [[V_ADD_CO_U32_e64_13]], 0, implicit $exec
+ ; CHECK-NEXT: [[REG_SEQUENCE41:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_ADD_CO_U32_e64_12]], %subreg.sub0, [[V_ADDC_U32_e64_12]], %subreg.sub1
+ ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD6:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE41]], 0, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.172, !tbaa !13, addrspace 1)
+ ; CHECK-NEXT: [[V_FMAC_F32_e64_1:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[GLOBAL_LOAD_DWORD2]], 0, killed [[GLOBAL_LOAD_DWORD6]], 0, [[V_FMAC_F32_e64_]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_ADD_U32_e64_13:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_MUL_LO_U32_e64_4]], [[PHI18]], 0, implicit $exec
+ ; CHECK-NEXT: [[V_ASHRREV_I32_e64_7:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 31, [[V_ADD_U32_e64_13]], implicit $exec
+ ; CHECK-NEXT: [[REG_SEQUENCE42:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_ADD_U32_e64_13]], %subreg.sub0, [[V_ASHRREV_I32_e64_7]], %subreg.sub1
+ ; CHECK-NEXT: [[V_LSHLREV_B64_e64_7:%[0-9]+]]:vreg_64_align2 = V_LSHLREV_B64_e64 [[S_MOV_B32_18]], killed [[REG_SEQUENCE42]], implicit $exec
+ ; CHECK-NEXT: [[COPY147:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY [[REG_SEQUENCE23]].sub0
+ ; CHECK-NEXT: [[COPY148:%[0-9]+]]:vgpr_32 = COPY [[V_LSHLREV_B64_e64_7]].sub0
+ ; CHECK-NEXT: [[COPY149:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY [[REG_SEQUENCE23]].sub1
+ ; CHECK-NEXT: [[COPY150:%[0-9]+]]:vgpr_32 = COPY [[V_LSHLREV_B64_e64_7]].sub1
+ ; CHECK-NEXT: [[V_ADD_CO_U32_e64_14:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_15:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY147]], [[COPY148]], 0, implicit $exec
+ ; CHECK-NEXT: [[COPY151:%[0-9]+]]:vgpr_32 = COPY [[COPY149]]
+ ; CHECK-NEXT: [[V_ADDC_U32_e64_14:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_15:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY151]], [[COPY150]], killed [[V_ADD_CO_U32_e64_15]], 0, implicit $exec
+ ; CHECK-NEXT: [[REG_SEQUENCE43:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_ADD_CO_U32_e64_14]], %subreg.sub0, [[V_ADDC_U32_e64_14]], %subreg.sub1
+ ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD7:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE43]], 0, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.178, !tbaa !13, addrspace 1)
+ ; CHECK-NEXT: [[V_FMAC_F32_e64_2:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[GLOBAL_LOAD_DWORD3]], 0, killed [[GLOBAL_LOAD_DWORD7]], 0, [[V_FMAC_F32_e64_1]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_ADD_U32_e64_14:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_MUL_LO_U32_e64_5]], [[PHI18]], 0, implicit $exec
+ ; CHECK-NEXT: [[V_ASHRREV_I32_e64_8:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 31, [[V_ADD_U32_e64_14]], implicit $exec
+ ; CHECK-NEXT: [[REG_SEQUENCE44:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_ADD_U32_e64_14]], %subreg.sub0, [[V_ASHRREV_I32_e64_8]], %subreg.sub1
+ ; CHECK-NEXT: [[V_LSHLREV_B64_e64_8:%[0-9]+]]:vreg_64_align2 = V_LSHLREV_B64_e64 [[S_MOV_B32_18]], killed [[REG_SEQUENCE44]], implicit $exec
+ ; CHECK-NEXT: [[COPY152:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY [[REG_SEQUENCE23]].sub0
+ ; CHECK-NEXT: [[COPY153:%[0-9]+]]:vgpr_32 = COPY [[V_LSHLREV_B64_e64_8]].sub0
+ ; CHECK-NEXT: [[COPY154:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY [[REG_SEQUENCE23]].sub1
+ ; CHECK-NEXT: [[COPY155:%[0-9]+]]:vgpr_32 = COPY [[V_LSHLREV_B64_e64_8]].sub1
+ ; CHECK-NEXT: [[V_ADD_CO_U32_e64_16:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_17:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY152]], [[COPY153]], 0, implicit $exec
+ ; CHECK-NEXT: [[COPY156:%[0-9]+]]:vgpr_32 = COPY [[COPY154]]
+ ; CHECK-NEXT: [[V_ADDC_U32_e64_16:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_17:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY156]], [[COPY155]], killed [[V_ADD_CO_U32_e64_17]], 0, implicit $exec
+ ; CHECK-NEXT: [[REG_SEQUENCE45:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_ADD_CO_U32_e64_16]], %subreg.sub0, [[V_ADDC_U32_e64_16]], %subreg.sub1
+ ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD8:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE45]], 0, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.184, !tbaa !13, addrspace 1)
+ ; CHECK-NEXT: [[V_FMAC_F32_e64_3:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[GLOBAL_LOAD_DWORD4]], 0, killed [[GLOBAL_LOAD_DWORD8]], 0, [[V_FMAC_F32_e64_2]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[SI_IF4:%[0-9]+]]:sreg_64 = SI_IF [[COPY119]], %bb.20, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.15
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.15 (%ir-block.191):
+ ; CHECK-NEXT: successors: %bb.16(0x40000000), %bb.19(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD9:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE39]], 256, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.192, !tbaa !13, addrspace 1)
+ ; CHECK-NEXT: [[V_FMAC_F32_e64_4:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[GLOBAL_LOAD_DWORD1]], 0, killed [[GLOBAL_LOAD_DWORD9]], 0, [[PHI21]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD10:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE41]], 256, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.196, !tbaa !13, addrspace 1)
+ ; CHECK-NEXT: [[V_FMAC_F32_e64_5:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[GLOBAL_LOAD_DWORD2]], 0, killed [[GLOBAL_LOAD_DWORD10]], 0, [[V_FMAC_F32_e64_4]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD11:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE43]], 256, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.200, !tbaa !13, addrspace 1)
+ ; CHECK-NEXT: [[V_FMAC_F32_e64_6:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[GLOBAL_LOAD_DWORD3]], 0, killed [[GLOBAL_LOAD_DWORD11]], 0, [[V_FMAC_F32_e64_5]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD12:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE45]], 256, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.204, !tbaa !13, addrspace 1)
+ ; CHECK-NEXT: [[V_FMAC_F32_e64_7:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[GLOBAL_LOAD_DWORD4]], 0, killed [[GLOBAL_LOAD_DWORD12]], 0, [[V_FMAC_F32_e64_6]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[SI_IF5:%[0-9]+]]:sreg_64 = SI_IF [[COPY120]], %bb.19, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.16
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.16 (%ir-block.211):
+ ; CHECK-NEXT: successors: %bb.17(0x40000000), %bb.18(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD13:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE39]], 512, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.212, !tbaa !13, addrspace 1)
+ ; CHECK-NEXT: [[V_FMAC_F32_e64_8:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[GLOBAL_LOAD_DWORD1]], 0, killed [[GLOBAL_LOAD_DWORD13]], 0, [[PHI22]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD14:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE41]], 512, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.216, !tbaa !13, addrspace 1)
+ ; CHECK-NEXT: [[V_FMAC_F32_e64_9:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[GLOBAL_LOAD_DWORD2]], 0, killed [[GLOBAL_LOAD_DWORD14]], 0, [[V_FMAC_F32_e64_8]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD15:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE43]], 512, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.220, !tbaa !13, addrspace 1)
+ ; CHECK-NEXT: [[V_FMAC_F32_e64_10:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[GLOBAL_LOAD_DWORD3]], 0, killed [[GLOBAL_LOAD_DWORD15]], 0, [[V_FMAC_F32_e64_9]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD16:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE45]], 512, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.224, !tbaa !13, addrspace 1)
+ ; CHECK-NEXT: [[V_FMAC_F32_e64_11:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[GLOBAL_LOAD_DWORD4]], 0, killed [[GLOBAL_LOAD_DWORD16]], 0, [[V_FMAC_F32_e64_10]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[SI_IF6:%[0-9]+]]:sreg_64 = SI_IF [[COPY121]], %bb.18, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.17
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.17 (%ir-block.231):
+ ; CHECK-NEXT: successors: %bb.18(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD17:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE39]], 768, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.232, !tbaa !13, addrspace 1)
+ ; CHECK-NEXT: [[V_FMAC_F32_e64_12:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[GLOBAL_LOAD_DWORD1]], 0, killed [[GLOBAL_LOAD_DWORD17]], 0, [[PHI23]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD18:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE41]], 768, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.236, !tbaa !13, addrspace 1)
+ ; CHECK-NEXT: [[V_FMAC_F32_e64_13:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[GLOBAL_LOAD_DWORD2]], 0, killed [[GLOBAL_LOAD_DWORD18]], 0, [[V_FMAC_F32_e64_12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD19:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE43]], 768, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.240, !tbaa !13, addrspace 1)
+ ; CHECK-NEXT: [[V_FMAC_F32_e64_14:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[GLOBAL_LOAD_DWORD3]], 0, killed [[GLOBAL_LOAD_DWORD19]], 0, [[V_FMAC_F32_e64_13]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD20:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE45]], 768, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.244, !tbaa !13, addrspace 1)
+ ; CHECK-NEXT: [[V_FMAC_F32_e64_15:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[GLOBAL_LOAD_DWORD4]], 0, killed [[GLOBAL_LOAD_DWORD20]], 0, [[V_FMAC_F32_e64_14]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.18.Flow85:
+ ; CHECK-NEXT: successors: %bb.19(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI24:%[0-9]+]]:vgpr_32 = PHI [[PHI23]], %bb.16, [[V_FMAC_F32_e64_15]], %bb.17
+ ; CHECK-NEXT: SI_END_CF [[SI_IF6]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.19.Flow86:
+ ; CHECK-NEXT: successors: %bb.20(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI25:%[0-9]+]]:vgpr_32 = PHI [[PHI22]], %bb.15, [[V_FMAC_F32_e64_11]], %bb.18
+ ; CHECK-NEXT: [[PHI26:%[0-9]+]]:vgpr_32 = PHI [[PHI23]], %bb.15, [[PHI24]], %bb.18
+ ; CHECK-NEXT: SI_END_CF [[SI_IF5]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.20.Flow87:
+ ; CHECK-NEXT: successors: %bb.21(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI27:%[0-9]+]]:vgpr_32 = PHI [[PHI21]], %bb.14, [[V_FMAC_F32_e64_7]], %bb.19
+ ; CHECK-NEXT: [[PHI28:%[0-9]+]]:vgpr_32 = PHI [[PHI22]], %bb.14, [[PHI25]], %bb.19
+ ; CHECK-NEXT: [[PHI29:%[0-9]+]]:vgpr_32 = PHI [[PHI23]], %bb.14, [[PHI26]], %bb.19
+ ; CHECK-NEXT: SI_END_CF [[SI_IF4]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.21 (%ir-block.254):
+ ; CHECK-NEXT: successors: %bb.22(0x04000000), %bb.13(0x7c000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI30:%[0-9]+]]:vgpr_32 = PHI [[PHI23]], %bb.13, [[PHI29]], %bb.20
+ ; CHECK-NEXT: [[PHI31:%[0-9]+]]:vgpr_32 = PHI [[PHI22]], %bb.13, [[PHI28]], %bb.20
+ ; CHECK-NEXT: [[PHI32:%[0-9]+]]:vgpr_32 = PHI [[PHI21]], %bb.13, [[PHI27]], %bb.20
+ ; CHECK-NEXT: [[PHI33:%[0-9]+]]:vgpr_32 = PHI [[PHI20]], %bb.13, [[V_FMAC_F32_e64_3]], %bb.20
+ ; CHECK-NEXT: SI_END_CF [[SI_IF3]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: [[V_ADD_U32_e64_15:%[0-9]+]]:vgpr_32 = nuw nsw V_ADD_U32_e64 [[PHI19]], [[S_MOV_B32_24]], 0, implicit $exec
+ ; CHECK-NEXT: [[V_ADD_U32_e64_16:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[PHI18]], [[S_LSHL_B32_1]], 0, implicit $exec
+ ; CHECK-NEXT: [[S_ADD_I32_7:%[0-9]+]]:sreg_32 = S_ADD_I32 [[PHI17]], [[S_LSHL_B32_2]], implicit-def dead $scc
+ ; CHECK-NEXT: [[COPY157:%[0-9]+]]:vgpr_32 = COPY [[PHI16]].sub0
+ ; CHECK-NEXT: [[COPY158:%[0-9]+]]:sreg_32_xm0 = COPY [[S_LSHL_B64_6]].sub0
+ ; CHECK-NEXT: [[COPY159:%[0-9]+]]:vgpr_32 = COPY [[PHI16]].sub1
+ ; CHECK-NEXT: [[COPY160:%[0-9]+]]:sreg_32_xm0 = COPY [[S_LSHL_B64_6]].sub1
+ ; CHECK-NEXT: [[V_ADD_CO_U32_e64_18:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_19:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY157]], [[COPY158]], 0, implicit $exec
+ ; CHECK-NEXT: [[COPY161:%[0-9]+]]:vgpr_32 = COPY [[COPY160]]
+ ; CHECK-NEXT: [[V_ADDC_U32_e64_18:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_19:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY159]], [[COPY161]], killed [[V_ADD_CO_U32_e64_19]], 0, implicit $exec
+ ; CHECK-NEXT: [[REG_SEQUENCE46:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_ADD_CO_U32_e64_18]], %subreg.sub0, [[V_ADDC_U32_e64_18]], %subreg.sub1
+ ; CHECK-NEXT: [[V_CMP_GE_I32_e64_:%[0-9]+]]:sreg_64 = V_CMP_GE_I32_e64 [[V_ADD_U32_e64_15]], [[S_AND_B32_]], implicit $exec
+ ; CHECK-NEXT: [[SI_IF_BREAK:%[0-9]+]]:sreg_64 = SI_IF_BREAK killed [[V_CMP_GE_I32_e64_]], [[PHI15]], implicit-def dead $scc
+ ; CHECK-NEXT: SI_LOOP [[SI_IF_BREAK]], %bb.13, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.22
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.22.Flow88:
+ ; CHECK-NEXT: successors: %bb.12(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI34:%[0-9]+]]:vgpr_32 = PHI [[PHI30]], %bb.21
+ ; CHECK-NEXT: [[PHI35:%[0-9]+]]:vgpr_32 = PHI [[PHI31]], %bb.21
+ ; CHECK-NEXT: [[PHI36:%[0-9]+]]:vgpr_32 = PHI [[PHI32]], %bb.21
+ ; CHECK-NEXT: [[PHI37:%[0-9]+]]:vgpr_32 = PHI [[PHI33]], %bb.21
+ ; CHECK-NEXT: SI_END_CF [[SI_IF_BREAK]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.12
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.23.Flow93:
+ ; CHECK-NEXT: successors: %bb.51(0x40000000), %bb.52(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI38:%[0-9]+]]:vgpr_32 = PHI [[PHI1]], %bb.4, %165, %bb.45
+ ; CHECK-NEXT: [[PHI39:%[0-9]+]]:vreg_64_align2 = PHI [[PHI2]], %bb.4, %166, %bb.45
+ ; CHECK-NEXT: [[PHI40:%[0-9]+]]:sreg_64 = PHI [[PHI]], %bb.4, %167, %bb.45
+ ; CHECK-NEXT: [[SI_IF7:%[0-9]+]]:sreg_64 = SI_IF [[PHI40]], %bb.52, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.51
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.24.._crit_edge.i:
+ ; CHECK-NEXT: successors: %bb.25(0x50000000), %bb.33(0x30000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[S_MOV_B32_30:%[0-9]+]]:sreg_32 = S_MOV_B32 1
+ ; CHECK-NEXT: S_CMP_LT_I32 [[S_SUB_I32_]], killed [[S_MOV_B32_30]], implicit-def $scc
+ ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.33, implicit $scc
+ ; CHECK-NEXT: S_BRANCH %bb.25
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.25 (%ir-block.266):
+ ; CHECK-NEXT: successors: %bb.26(0x40000000), %bb.34(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[V_CMP_LT_I32_e64_7:%[0-9]+]]:sreg_64 = V_CMP_LT_I32_e64 [[PHI10]], [[COPY91]], implicit $exec
+ ; CHECK-NEXT: [[S_MOV_B32_31:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
+ ; CHECK-NEXT: [[COPY162:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_31]], implicit $exec
+ ; CHECK-NEXT: [[COPY163:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_31]], implicit $exec
+ ; CHECK-NEXT: [[COPY164:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_31]], implicit $exec
+ ; CHECK-NEXT: [[COPY165:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_31]], implicit $exec
+ ; CHECK-NEXT: [[SI_IF8:%[0-9]+]]:sreg_64 = SI_IF [[V_CMP_LT_I32_e64_7]], %bb.34, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.26
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.26 (%ir-block.271):
+ ; CHECK-NEXT: successors: %bb.27(0x40000000), %bb.32(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[V_MUL_LO_U32_e64_11:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[PHI10]], [[S_LOAD_DWORD_IMM4]], implicit $exec
+ ; CHECK-NEXT: [[V_ASHRREV_I32_e64_9:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 31, [[V_MUL_LO_U32_e64_11]], implicit $exec
+ ; CHECK-NEXT: [[REG_SEQUENCE47:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_MUL_LO_U32_e64_11]], %subreg.sub0, [[V_ASHRREV_I32_e64_9]], %subreg.sub1
+ ; CHECK-NEXT: [[S_MOV_B32_32:%[0-9]+]]:sreg_32 = S_MOV_B32 2
+ ; CHECK-NEXT: [[V_LSHLREV_B64_e64_9:%[0-9]+]]:vreg_64_align2 = V_LSHLREV_B64_e64 [[S_MOV_B32_32]], killed [[REG_SEQUENCE47]], implicit $exec
+ ; CHECK-NEXT: [[COPY166:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY [[REG_SEQUENCE26]].sub0
+ ; CHECK-NEXT: [[COPY167:%[0-9]+]]:vgpr_32 = COPY [[V_LSHLREV_B64_e64_9]].sub0
+ ; CHECK-NEXT: [[COPY168:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY [[REG_SEQUENCE26]].sub1
+ ; CHECK-NEXT: [[COPY169:%[0-9]+]]:vgpr_32 = COPY [[V_LSHLREV_B64_e64_9]].sub1
+ ; CHECK-NEXT: [[V_ADD_CO_U32_e64_20:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_21:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY166]], [[COPY167]], 0, implicit $exec
+ ; CHECK-NEXT: [[COPY170:%[0-9]+]]:vgpr_32 = COPY [[COPY168]]
+ ; CHECK-NEXT: [[V_ADDC_U32_e64_20:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_21:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY170]], [[COPY169]], killed [[V_ADD_CO_U32_e64_21]], 0, implicit $exec
+ ; CHECK-NEXT: [[REG_SEQUENCE48:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_ADD_CO_U32_e64_20]], %subreg.sub0, [[V_ADDC_U32_e64_20]], %subreg.sub1
+ ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD21:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD killed [[REG_SEQUENCE48]], 0, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.274, !tbaa !13, addrspace 1)
+ ; CHECK-NEXT: [[S_MOV_B32_33:%[0-9]+]]:sreg_32 = S_MOV_B32 1
+ ; CHECK-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[PHI10]], killed [[S_MOV_B32_33]], implicit $exec
+ ; CHECK-NEXT: [[V_CMP_LT_I32_e64_8:%[0-9]+]]:sreg_64 = V_CMP_LT_I32_e64 [[V_OR_B32_e64_]], [[COPY91]], implicit $exec
+ ; CHECK-NEXT: [[S_MOV_B32_34:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
+ ; CHECK-NEXT: [[COPY171:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_34]], implicit $exec
+ ; CHECK-NEXT: [[COPY172:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_34]], implicit $exec
+ ; CHECK-NEXT: [[COPY173:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_34]], implicit $exec
+ ; CHECK-NEXT: [[SI_IF9:%[0-9]+]]:sreg_64 = SI_IF killed [[V_CMP_LT_I32_e64_8]], %bb.32, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.27
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.27 (%ir-block.281):
+ ; CHECK-NEXT: successors: %bb.28(0x40000000), %bb.31(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[V_MUL_LO_U32_e64_12:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[V_OR_B32_e64_]], [[S_LOAD_DWORD_IMM4]], implicit $exec
+ ; CHECK-NEXT: [[V_ASHRREV_I32_e64_10:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 31, [[V_MUL_LO_U32_e64_12]], implicit $exec
+ ; CHECK-NEXT: [[REG_SEQUENCE49:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_MUL_LO_U32_e64_12]], %subreg.sub0, [[V_ASHRREV_I32_e64_10]], %subreg.sub1
+ ; CHECK-NEXT: [[V_LSHLREV_B64_e64_10:%[0-9]+]]:vreg_64_align2 = V_LSHLREV_B64_e64 [[S_MOV_B32_32]], killed [[REG_SEQUENCE49]], implicit $exec
+ ; CHECK-NEXT: [[COPY174:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY [[REG_SEQUENCE26]].sub0
+ ; CHECK-NEXT: [[COPY175:%[0-9]+]]:vgpr_32 = COPY [[V_LSHLREV_B64_e64_10]].sub0
+ ; CHECK-NEXT: [[COPY176:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY [[REG_SEQUENCE26]].sub1
+ ; CHECK-NEXT: [[COPY177:%[0-9]+]]:vgpr_32 = COPY [[V_LSHLREV_B64_e64_10]].sub1
+ ; CHECK-NEXT: [[V_ADD_CO_U32_e64_22:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_23:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY174]], [[COPY175]], 0, implicit $exec
+ ; CHECK-NEXT: [[COPY178:%[0-9]+]]:vgpr_32 = COPY [[COPY176]]
+ ; CHECK-NEXT: [[V_ADDC_U32_e64_22:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_23:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY178]], [[COPY177]], killed [[V_ADD_CO_U32_e64_23]], 0, implicit $exec
+ ; CHECK-NEXT: [[REG_SEQUENCE50:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_ADD_CO_U32_e64_22]], %subreg.sub0, [[V_ADDC_U32_e64_22]], %subreg.sub1
+ ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD22:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD killed [[REG_SEQUENCE50]], 0, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.284, !tbaa !13, addrspace 1)
+ ; CHECK-NEXT: [[V_OR_B32_e64_1:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[PHI10]], [[S_MOV_B32_32]], implicit $exec
+ ; CHECK-NEXT: [[V_CMP_LT_I32_e64_9:%[0-9]+]]:sreg_64 = V_CMP_LT_I32_e64 [[V_OR_B32_e64_1]], [[COPY91]], implicit $exec
+ ; CHECK-NEXT: [[S_MOV_B32_35:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
+ ; CHECK-NEXT: [[COPY179:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_35]], implicit $exec
+ ; CHECK-NEXT: [[COPY180:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_35]], implicit $exec
+ ; CHECK-NEXT: [[SI_IF10:%[0-9]+]]:sreg_64 = SI_IF killed [[V_CMP_LT_I32_e64_9]], %bb.31, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.28
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.28 (%ir-block.291):
+ ; CHECK-NEXT: successors: %bb.29(0x40000000), %bb.30(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[V_MUL_LO_U32_e64_13:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[V_OR_B32_e64_1]], [[S_LOAD_DWORD_IMM4]], implicit $exec
+ ; CHECK-NEXT: [[V_ASHRREV_I32_e64_11:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 31, [[V_MUL_LO_U32_e64_13]], implicit $exec
+ ; CHECK-NEXT: [[REG_SEQUENCE51:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_MUL_LO_U32_e64_13]], %subreg.sub0, [[V_ASHRREV_I32_e64_11]], %subreg.sub1
+ ; CHECK-NEXT: [[S_MOV_B32_36:%[0-9]+]]:sreg_32 = S_MOV_B32 2
+ ; CHECK-NEXT: [[V_LSHLREV_B64_e64_11:%[0-9]+]]:vreg_64_align2 = V_LSHLREV_B64_e64 [[S_MOV_B32_36]], killed [[REG_SEQUENCE51]], implicit $exec
+ ; CHECK-NEXT: [[COPY181:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY [[REG_SEQUENCE26]].sub0
+ ; CHECK-NEXT: [[COPY182:%[0-9]+]]:vgpr_32 = COPY [[V_LSHLREV_B64_e64_11]].sub0
+ ; CHECK-NEXT: [[COPY183:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY [[REG_SEQUENCE26]].sub1
+ ; CHECK-NEXT: [[COPY184:%[0-9]+]]:vgpr_32 = COPY [[V_LSHLREV_B64_e64_11]].sub1
+ ; CHECK-NEXT: [[V_ADD_CO_U32_e64_24:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_25:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY181]], [[COPY182]], 0, implicit $exec
+ ; CHECK-NEXT: [[COPY185:%[0-9]+]]:vgpr_32 = COPY [[COPY183]]
+ ; CHECK-NEXT: [[V_ADDC_U32_e64_24:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_25:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY185]], [[COPY184]], killed [[V_ADD_CO_U32_e64_25]], 0, implicit $exec
+ ; CHECK-NEXT: [[REG_SEQUENCE52:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_ADD_CO_U32_e64_24]], %subreg.sub0, [[V_ADDC_U32_e64_24]], %subreg.sub1
+ ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD23:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD killed [[REG_SEQUENCE52]], 0, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.294, !tbaa !13, addrspace 1)
+ ; CHECK-NEXT: [[S_MOV_B32_37:%[0-9]+]]:sreg_32 = S_MOV_B32 3
+ ; CHECK-NEXT: [[V_OR_B32_e64_2:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[PHI10]], killed [[S_MOV_B32_37]], implicit $exec
+ ; CHECK-NEXT: [[V_CMP_LT_I32_e64_10:%[0-9]+]]:sreg_64 = V_CMP_LT_I32_e64 [[V_OR_B32_e64_2]], [[COPY91]], implicit $exec
+ ; CHECK-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK-NEXT: [[SI_IF11:%[0-9]+]]:sreg_64 = SI_IF killed [[V_CMP_LT_I32_e64_10]], %bb.30, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.29
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.29 (%ir-block.301):
+ ; CHECK-NEXT: successors: %bb.30(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[V_MUL_LO_U32_e64_14:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[V_OR_B32_e64_2]], [[S_LOAD_DWORD_IMM4]], implicit $exec
+ ; CHECK-NEXT: [[V_ASHRREV_I32_e64_12:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 31, [[V_MUL_LO_U32_e64_14]], implicit $exec
+ ; CHECK-NEXT: [[REG_SEQUENCE53:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_MUL_LO_U32_e64_14]], %subreg.sub0, [[V_ASHRREV_I32_e64_12]], %subreg.sub1
+ ; CHECK-NEXT: [[V_LSHLREV_B64_e64_12:%[0-9]+]]:vreg_64_align2 = V_LSHLREV_B64_e64 [[S_MOV_B32_36]], killed [[REG_SEQUENCE53]], implicit $exec
+ ; CHECK-NEXT: [[COPY186:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY [[REG_SEQUENCE26]].sub0
+ ; CHECK-NEXT: [[COPY187:%[0-9]+]]:vgpr_32 = COPY [[V_LSHLREV_B64_e64_12]].sub0
+ ; CHECK-NEXT: [[COPY188:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY [[REG_SEQUENCE26]].sub1
+ ; CHECK-NEXT: [[COPY189:%[0-9]+]]:vgpr_32 = COPY [[V_LSHLREV_B64_e64_12]].sub1
+ ; CHECK-NEXT: [[V_ADD_CO_U32_e64_26:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_27:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY186]], [[COPY187]], 0, implicit $exec
+ ; CHECK-NEXT: [[COPY190:%[0-9]+]]:vgpr_32 = COPY [[COPY188]]
+ ; CHECK-NEXT: [[V_ADDC_U32_e64_26:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_27:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY190]], [[COPY189]], killed [[V_ADD_CO_U32_e64_27]], 0, implicit $exec
+ ; CHECK-NEXT: [[REG_SEQUENCE54:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_ADD_CO_U32_e64_26]], %subreg.sub0, [[V_ADDC_U32_e64_26]], %subreg.sub1
+ ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD24:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD killed [[REG_SEQUENCE54]], 0, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.304, !tbaa !13, addrspace 1)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.30.Flow81:
+ ; CHECK-NEXT: successors: %bb.31(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI41:%[0-9]+]]:vgpr_32 = PHI [[V_MOV_B32_e32_1]], %bb.28, [[GLOBAL_LOAD_DWORD24]], %bb.29
+ ; CHECK-NEXT: SI_END_CF [[SI_IF11]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.31.Flow82:
+ ; CHECK-NEXT: successors: %bb.32(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI42:%[0-9]+]]:vgpr_32 = PHI [[COPY179]], %bb.27, [[GLOBAL_LOAD_DWORD23]], %bb.30
+ ; CHECK-NEXT: [[PHI43:%[0-9]+]]:vgpr_32 = PHI [[COPY180]], %bb.27, [[PHI41]], %bb.30
+ ; CHECK-NEXT: SI_END_CF [[SI_IF10]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.32.Flow83:
+ ; CHECK-NEXT: successors: %bb.34(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI44:%[0-9]+]]:vgpr_32 = PHI [[COPY171]], %bb.26, [[GLOBAL_LOAD_DWORD22]], %bb.31
+ ; CHECK-NEXT: [[PHI45:%[0-9]+]]:vgpr_32 = PHI [[COPY172]], %bb.26, [[PHI42]], %bb.31
+ ; CHECK-NEXT: [[PHI46:%[0-9]+]]:vgpr_32 = PHI [[COPY173]], %bb.26, [[PHI43]], %bb.31
+ ; CHECK-NEXT: SI_END_CF [[SI_IF9]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.34
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.33.Flow84:
+ ; CHECK-NEXT: successors: %bb.43(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI47:%[0-9]+]]:vgpr_32 = PHI [[PHI11]], %bb.24, %157, %bb.42
+ ; CHECK-NEXT: [[PHI48:%[0-9]+]]:vgpr_32 = PHI [[PHI12]], %bb.24, %158, %bb.42
+ ; CHECK-NEXT: [[PHI49:%[0-9]+]]:vgpr_32 = PHI [[PHI13]], %bb.24, %159, %bb.42
+ ; CHECK-NEXT: [[PHI50:%[0-9]+]]:vgpr_32 = PHI [[PHI14]], %bb.24, %160, %bb.42
+ ; CHECK-NEXT: S_BRANCH %bb.43
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.34 (%ir-block.316):
+ ; CHECK-NEXT: successors: %bb.35(0x40000000), %bb.42(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI51:%[0-9]+]]:vgpr_32 = PHI [[COPY162]], %bb.25, [[PHI46]], %bb.32
+ ; CHECK-NEXT: [[PHI52:%[0-9]+]]:vgpr_32 = PHI [[COPY163]], %bb.25, [[PHI45]], %bb.32
+ ; CHECK-NEXT: [[PHI53:%[0-9]+]]:vgpr_32 = PHI [[COPY164]], %bb.25, [[PHI44]], %bb.32
+ ; CHECK-NEXT: [[PHI54:%[0-9]+]]:vgpr_32 = PHI [[COPY165]], %bb.25, [[GLOBAL_LOAD_DWORD21]], %bb.32
+ ; CHECK-NEXT: SI_END_CF [[SI_IF8]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: [[V_CMP_LT_I32_e64_11:%[0-9]+]]:sreg_64 = V_CMP_LT_I32_e64 [[V_ADD_U32_e64_]], [[COPY28]], implicit $exec
+ ; CHECK-NEXT: [[SI_IF12:%[0-9]+]]:sreg_64 = SI_IF killed [[V_CMP_LT_I32_e64_11]], %bb.42, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.35
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.35 (%ir-block.321):
+ ; CHECK-NEXT: successors: %bb.36(0x40000000), %bb.41(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY191:%[0-9]+]]:sreg_64_xexec = COPY [[V_CMP_LT_I32_e64_7]]
+ ; CHECK-NEXT: [[V_MUL_LO_U32_e64_15:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[PHI10]], [[S_LOAD_DWORD_IMM3]], implicit $exec
+ ; CHECK-NEXT: [[S_MOV_B32_38:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; CHECK-NEXT: [[COPY192:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_38]]
+ ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY192]], 0, killed [[V_MUL_LO_U32_e64_15]], [[COPY191]], implicit $exec
+ ; CHECK-NEXT: [[V_ADD_U32_e64_17:%[0-9]+]]:vgpr_32 = nsw V_ADD_U32_e64 killed [[V_CNDMASK_B32_e64_]], [[V_ADD_U32_e64_]], 0, implicit $exec
+ ; CHECK-NEXT: [[V_ASHRREV_I32_e64_13:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 31, [[V_ADD_U32_e64_17]], implicit $exec
+ ; CHECK-NEXT: [[REG_SEQUENCE55:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_ADD_U32_e64_17]], %subreg.sub0, [[V_ASHRREV_I32_e64_13]], %subreg.sub1
+ ; CHECK-NEXT: [[S_MOV_B32_39:%[0-9]+]]:sreg_32 = S_MOV_B32 2
+ ; CHECK-NEXT: [[V_LSHLREV_B64_e64_13:%[0-9]+]]:vreg_64_align2 = V_LSHLREV_B64_e64 [[S_MOV_B32_39]], killed [[REG_SEQUENCE55]], implicit $exec
+ ; CHECK-NEXT: [[COPY193:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY [[REG_SEQUENCE23]].sub0
+ ; CHECK-NEXT: [[COPY194:%[0-9]+]]:vgpr_32 = COPY [[V_LSHLREV_B64_e64_13]].sub0
+ ; CHECK-NEXT: [[COPY195:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY [[REG_SEQUENCE23]].sub1
+ ; CHECK-NEXT: [[COPY196:%[0-9]+]]:vgpr_32 = COPY [[V_LSHLREV_B64_e64_13]].sub1
+ ; CHECK-NEXT: [[V_ADD_CO_U32_e64_28:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_29:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY193]], [[COPY194]], 0, implicit $exec
+ ; CHECK-NEXT: [[COPY197:%[0-9]+]]:vgpr_32 = COPY [[COPY195]]
+ ; CHECK-NEXT: [[V_ADDC_U32_e64_28:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_29:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY197]], [[COPY196]], killed [[V_ADD_CO_U32_e64_29]], 0, implicit $exec
+ ; CHECK-NEXT: [[REG_SEQUENCE56:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_ADD_CO_U32_e64_28]], %subreg.sub0, [[V_ADDC_U32_e64_28]], %subreg.sub1
+ ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD25:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE56]], 0, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.326, !tbaa !13, addrspace 1)
+ ; CHECK-NEXT: [[V_FMAC_F32_e64_16:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[PHI54]], 0, killed [[GLOBAL_LOAD_DWORD25]], 0, [[PHI11]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[S_MOV_B32_40:%[0-9]+]]:sreg_32 = S_MOV_B32 1
+ ; CHECK-NEXT: [[V_OR_B32_e64_3:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[PHI10]], killed [[S_MOV_B32_40]], implicit $exec
+ ; CHECK-NEXT: [[V_MUL_LO_U32_e64_16:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[V_OR_B32_e64_3]], [[S_LOAD_DWORD_IMM3]], implicit $exec
+ ; CHECK-NEXT: [[V_CMP_LT_I32_e64_12:%[0-9]+]]:sreg_64_xexec = V_CMP_LT_I32_e64 [[V_OR_B32_e64_3]], [[COPY91]], implicit $exec
+ ; CHECK-NEXT: [[COPY198:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_38]]
+ ; CHECK-NEXT: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY198]], 0, killed [[V_MUL_LO_U32_e64_16]], killed [[V_CMP_LT_I32_e64_12]], implicit $exec
+ ; CHECK-NEXT: [[V_ADD_U32_e64_18:%[0-9]+]]:vgpr_32 = nsw V_ADD_U32_e64 killed [[V_CNDMASK_B32_e64_1]], [[V_ADD_U32_e64_]], 0, implicit $exec
+ ; CHECK-NEXT: [[V_ASHRREV_I32_e64_14:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 31, [[V_ADD_U32_e64_18]], implicit $exec
+ ; CHECK-NEXT: [[REG_SEQUENCE57:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_ADD_U32_e64_18]], %subreg.sub0, [[V_ASHRREV_I32_e64_14]], %subreg.sub1
+ ; CHECK-NEXT: [[V_LSHLREV_B64_e64_14:%[0-9]+]]:vreg_64_align2 = V_LSHLREV_B64_e64 [[S_MOV_B32_39]], killed [[REG_SEQUENCE57]], implicit $exec
+ ; CHECK-NEXT: [[COPY199:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY [[REG_SEQUENCE23]].sub0
+ ; CHECK-NEXT: [[COPY200:%[0-9]+]]:vgpr_32 = COPY [[V_LSHLREV_B64_e64_14]].sub0
+ ; CHECK-NEXT: [[COPY201:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY [[REG_SEQUENCE23]].sub1
+ ; CHECK-NEXT: [[COPY202:%[0-9]+]]:vgpr_32 = COPY [[V_LSHLREV_B64_e64_14]].sub1
+ ; CHECK-NEXT: [[V_ADD_CO_U32_e64_30:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_31:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY199]], [[COPY200]], 0, implicit $exec
+ ; CHECK-NEXT: [[COPY203:%[0-9]+]]:vgpr_32 = COPY [[COPY201]]
+ ; CHECK-NEXT: [[V_ADDC_U32_e64_30:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_31:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY203]], [[COPY202]], killed [[V_ADD_CO_U32_e64_31]], 0, implicit $exec
+ ; CHECK-NEXT: [[REG_SEQUENCE58:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_ADD_CO_U32_e64_30]], %subreg.sub0, [[V_ADDC_U32_e64_30]], %subreg.sub1
+ ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD26:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE58]], 0, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.336, !tbaa !13, addrspace 1)
+ ; CHECK-NEXT: [[V_FMAC_F32_e64_17:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[PHI53]], 0, killed [[GLOBAL_LOAD_DWORD26]], 0, [[V_FMAC_F32_e64_16]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_OR_B32_e64_4:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[PHI10]], [[S_MOV_B32_39]], implicit $exec
+ ; CHECK-NEXT: [[V_MUL_LO_U32_e64_17:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[V_OR_B32_e64_4]], [[S_LOAD_DWORD_IMM3]], implicit $exec
+ ; CHECK-NEXT: [[V_CMP_LT_I32_e64_13:%[0-9]+]]:sreg_64_xexec = V_CMP_LT_I32_e64 [[V_OR_B32_e64_4]], [[COPY91]], implicit $exec
+ ; CHECK-NEXT: [[COPY204:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_38]]
+ ; CHECK-NEXT: [[V_CNDMASK_B32_e64_2:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY204]], 0, killed [[V_MUL_LO_U32_e64_17]], killed [[V_CMP_LT_I32_e64_13]], implicit $exec
+ ; CHECK-NEXT: [[V_ADD_U32_e64_19:%[0-9]+]]:vgpr_32 = nsw V_ADD_U32_e64 killed [[V_CNDMASK_B32_e64_2]], [[V_ADD_U32_e64_]], 0, implicit $exec
+ ; CHECK-NEXT: [[V_ASHRREV_I32_e64_15:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 31, [[V_ADD_U32_e64_19]], implicit $exec
+ ; CHECK-NEXT: [[REG_SEQUENCE59:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_ADD_U32_e64_19]], %subreg.sub0, [[V_ASHRREV_I32_e64_15]], %subreg.sub1
+ ; CHECK-NEXT: [[V_LSHLREV_B64_e64_15:%[0-9]+]]:vreg_64_align2 = V_LSHLREV_B64_e64 [[S_MOV_B32_39]], killed [[REG_SEQUENCE59]], implicit $exec
+ ; CHECK-NEXT: [[COPY205:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY [[REG_SEQUENCE23]].sub0
+ ; CHECK-NEXT: [[COPY206:%[0-9]+]]:vgpr_32 = COPY [[V_LSHLREV_B64_e64_15]].sub0
+ ; CHECK-NEXT: [[COPY207:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY [[REG_SEQUENCE23]].sub1
+ ; CHECK-NEXT: [[COPY208:%[0-9]+]]:vgpr_32 = COPY [[V_LSHLREV_B64_e64_15]].sub1
+ ; CHECK-NEXT: [[V_ADD_CO_U32_e64_32:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_33:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY205]], [[COPY206]], 0, implicit $exec
+ ; CHECK-NEXT: [[COPY209:%[0-9]+]]:vgpr_32 = COPY [[COPY207]]
+ ; CHECK-NEXT: [[V_ADDC_U32_e64_32:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_33:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY209]], [[COPY208]], killed [[V_ADD_CO_U32_e64_33]], 0, implicit $exec
+ ; CHECK-NEXT: [[REG_SEQUENCE60:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_ADD_CO_U32_e64_32]], %subreg.sub0, [[V_ADDC_U32_e64_32]], %subreg.sub1
+ ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD27:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE60]], 0, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.346, !tbaa !13, addrspace 1)
+ ; CHECK-NEXT: [[V_FMAC_F32_e64_18:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[PHI52]], 0, killed [[GLOBAL_LOAD_DWORD27]], 0, [[V_FMAC_F32_e64_17]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[S_MOV_B32_41:%[0-9]+]]:sreg_32 = S_MOV_B32 3
+ ; CHECK-NEXT: [[V_OR_B32_e64_5:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[PHI10]], killed [[S_MOV_B32_41]], implicit $exec
+ ; CHECK-NEXT: [[V_MUL_LO_U32_e64_18:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[V_OR_B32_e64_5]], [[S_LOAD_DWORD_IMM3]], implicit $exec
+ ; CHECK-NEXT: [[V_CMP_LT_I32_e64_14:%[0-9]+]]:sreg_64_xexec = V_CMP_LT_I32_e64 [[V_OR_B32_e64_5]], [[COPY91]], implicit $exec
+ ; CHECK-NEXT: [[COPY210:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_38]]
+ ; CHECK-NEXT: [[V_CNDMASK_B32_e64_3:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY210]], 0, killed [[V_MUL_LO_U32_e64_18]], killed [[V_CMP_LT_I32_e64_14]], implicit $exec
+ ; CHECK-NEXT: [[V_ADD_U32_e64_20:%[0-9]+]]:vgpr_32 = nsw V_ADD_U32_e64 killed [[V_CNDMASK_B32_e64_3]], [[V_ADD_U32_e64_]], 0, implicit $exec
+ ; CHECK-NEXT: [[V_ASHRREV_I32_e64_16:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 31, [[V_ADD_U32_e64_20]], implicit $exec
+ ; CHECK-NEXT: [[REG_SEQUENCE61:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_ADD_U32_e64_20]], %subreg.sub0, [[V_ASHRREV_I32_e64_16]], %subreg.sub1
+ ; CHECK-NEXT: [[V_LSHLREV_B64_e64_16:%[0-9]+]]:vreg_64_align2 = V_LSHLREV_B64_e64 [[S_MOV_B32_39]], killed [[REG_SEQUENCE61]], implicit $exec
+ ; CHECK-NEXT: [[COPY211:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY [[REG_SEQUENCE23]].sub0
+ ; CHECK-NEXT: [[COPY212:%[0-9]+]]:vgpr_32 = COPY [[V_LSHLREV_B64_e64_16]].sub0
+ ; CHECK-NEXT: [[COPY213:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY [[REG_SEQUENCE23]].sub1
+ ; CHECK-NEXT: [[COPY214:%[0-9]+]]:vgpr_32 = COPY [[V_LSHLREV_B64_e64_16]].sub1
+ ; CHECK-NEXT: [[V_ADD_CO_U32_e64_34:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_35:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY211]], [[COPY212]], 0, implicit $exec
+ ; CHECK-NEXT: [[COPY215:%[0-9]+]]:vgpr_32 = COPY [[COPY213]]
+ ; CHECK-NEXT: [[V_ADDC_U32_e64_34:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_35:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY215]], [[COPY214]], killed [[V_ADD_CO_U32_e64_35]], 0, implicit $exec
+ ; CHECK-NEXT: [[REG_SEQUENCE62:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_ADD_CO_U32_e64_34]], %subreg.sub0, [[V_ADDC_U32_e64_34]], %subreg.sub1
+ ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD28:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE62]], 0, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.356, !tbaa !13, addrspace 1)
+ ; CHECK-NEXT: [[V_FMAC_F32_e64_19:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[PHI51]], 0, killed [[GLOBAL_LOAD_DWORD28]], 0, [[V_FMAC_F32_e64_18]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[S_MOV_B32_42:%[0-9]+]]:sreg_32 = S_MOV_B32 64
+ ; CHECK-NEXT: [[V_ADD_U32_e64_21:%[0-9]+]]:vgpr_32 = nsw V_ADD_U32_e64 [[V_ADD_U32_e64_]], killed [[S_MOV_B32_42]], 0, implicit $exec
+ ; CHECK-NEXT: [[V_CMP_LT_I32_e64_15:%[0-9]+]]:sreg_64 = V_CMP_LT_I32_e64 killed [[V_ADD_U32_e64_21]], [[COPY28]], implicit $exec
+ ; CHECK-NEXT: [[SI_IF13:%[0-9]+]]:sreg_64 = SI_IF killed [[V_CMP_LT_I32_e64_15]], %bb.41, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.36
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.36 (%ir-block.365):
+ ; CHECK-NEXT: successors: %bb.37(0x40000000), %bb.40(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD29:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE56]], 256, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.366, !tbaa !13, addrspace 1)
+ ; CHECK-NEXT: [[V_FMAC_F32_e64_20:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[PHI54]], 0, killed [[GLOBAL_LOAD_DWORD29]], 0, [[PHI12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD30:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE58]], 256, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.370, !tbaa !13, addrspace 1)
+ ; CHECK-NEXT: [[V_FMAC_F32_e64_21:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[PHI53]], 0, killed [[GLOBAL_LOAD_DWORD30]], 0, [[V_FMAC_F32_e64_20]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD31:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE60]], 256, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.374, !tbaa !13, addrspace 1)
+ ; CHECK-NEXT: [[V_FMAC_F32_e64_22:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[PHI52]], 0, killed [[GLOBAL_LOAD_DWORD31]], 0, [[V_FMAC_F32_e64_21]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD32:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE62]], 256, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.378, !tbaa !13, addrspace 1)
+ ; CHECK-NEXT: [[V_FMAC_F32_e64_23:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[PHI51]], 0, killed [[GLOBAL_LOAD_DWORD32]], 0, [[V_FMAC_F32_e64_22]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[S_MOV_B32_43:%[0-9]+]]:sreg_32 = S_MOV_B32 128
+ ; CHECK-NEXT: [[V_ADD_U32_e64_22:%[0-9]+]]:vgpr_32 = nsw V_ADD_U32_e64 [[V_ADD_U32_e64_]], killed [[S_MOV_B32_43]], 0, implicit $exec
+ ; CHECK-NEXT: [[V_CMP_LT_I32_e64_16:%[0-9]+]]:sreg_64 = V_CMP_LT_I32_e64 killed [[V_ADD_U32_e64_22]], [[COPY28]], implicit $exec
+ ; CHECK-NEXT: [[SI_IF14:%[0-9]+]]:sreg_64 = SI_IF killed [[V_CMP_LT_I32_e64_16]], %bb.40, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.37
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.37 (%ir-block.387):
+ ; CHECK-NEXT: successors: %bb.38(0x40000000), %bb.39(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD33:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE56]], 512, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.388, !tbaa !13, addrspace 1)
+ ; CHECK-NEXT: [[V_FMAC_F32_e64_24:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[PHI54]], 0, killed [[GLOBAL_LOAD_DWORD33]], 0, [[PHI13]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD34:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE58]], 512, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.392, !tbaa !13, addrspace 1)
+ ; CHECK-NEXT: [[V_FMAC_F32_e64_25:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[PHI53]], 0, killed [[GLOBAL_LOAD_DWORD34]], 0, [[V_FMAC_F32_e64_24]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD35:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE60]], 512, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.396, !tbaa !13, addrspace 1)
+ ; CHECK-NEXT: [[V_FMAC_F32_e64_26:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[PHI52]], 0, killed [[GLOBAL_LOAD_DWORD35]], 0, [[V_FMAC_F32_e64_25]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD36:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE62]], 512, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.400, !tbaa !13, addrspace 1)
+ ; CHECK-NEXT: [[V_FMAC_F32_e64_27:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[PHI51]], 0, killed [[GLOBAL_LOAD_DWORD36]], 0, [[V_FMAC_F32_e64_26]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[S_MOV_B32_44:%[0-9]+]]:sreg_32 = S_MOV_B32 192
+ ; CHECK-NEXT: [[V_ADD_U32_e64_23:%[0-9]+]]:vgpr_32 = nsw V_ADD_U32_e64 [[V_ADD_U32_e64_]], killed [[S_MOV_B32_44]], 0, implicit $exec
+ ; CHECK-NEXT: [[V_CMP_LT_I32_e64_17:%[0-9]+]]:sreg_64 = V_CMP_LT_I32_e64 killed [[V_ADD_U32_e64_23]], [[COPY28]], implicit $exec
+ ; CHECK-NEXT: [[SI_IF15:%[0-9]+]]:sreg_64 = SI_IF killed [[V_CMP_LT_I32_e64_17]], %bb.39, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.38
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.38 (%ir-block.409):
+ ; CHECK-NEXT: successors: %bb.39(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD37:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE56]], 768, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.410, !tbaa !13, addrspace 1)
+ ; CHECK-NEXT: [[V_FMAC_F32_e64_28:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[PHI54]], 0, killed [[GLOBAL_LOAD_DWORD37]], 0, [[PHI14]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD38:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE58]], 768, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.414, !tbaa !13, addrspace 1)
+ ; CHECK-NEXT: [[V_FMAC_F32_e64_29:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[PHI53]], 0, killed [[GLOBAL_LOAD_DWORD38]], 0, [[V_FMAC_F32_e64_28]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD39:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE60]], 768, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.418, !tbaa !13, addrspace 1)
+ ; CHECK-NEXT: [[V_FMAC_F32_e64_30:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[PHI52]], 0, killed [[GLOBAL_LOAD_DWORD39]], 0, [[V_FMAC_F32_e64_29]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD40:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE62]], 768, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.422, !tbaa !13, addrspace 1)
+ ; CHECK-NEXT: [[V_FMAC_F32_e64_31:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[PHI51]], 0, killed [[GLOBAL_LOAD_DWORD40]], 0, [[V_FMAC_F32_e64_30]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.39.Flow77:
+ ; CHECK-NEXT: successors: %bb.40(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI55:%[0-9]+]]:vgpr_32 = PHI [[PHI14]], %bb.37, [[V_FMAC_F32_e64_31]], %bb.38
+ ; CHECK-NEXT: SI_END_CF [[SI_IF15]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.40.Flow78:
+ ; CHECK-NEXT: successors: %bb.41(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI56:%[0-9]+]]:vgpr_32 = PHI [[PHI13]], %bb.36, [[V_FMAC_F32_e64_27]], %bb.39
+ ; CHECK-NEXT: [[PHI57:%[0-9]+]]:vgpr_32 = PHI [[PHI14]], %bb.36, [[PHI55]], %bb.39
+ ; CHECK-NEXT: SI_END_CF [[SI_IF14]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.41.Flow79:
+ ; CHECK-NEXT: successors: %bb.42(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI58:%[0-9]+]]:vgpr_32 = PHI [[PHI12]], %bb.35, [[V_FMAC_F32_e64_23]], %bb.40
+ ; CHECK-NEXT: [[PHI59:%[0-9]+]]:vgpr_32 = PHI [[PHI13]], %bb.35, [[PHI56]], %bb.40
+ ; CHECK-NEXT: [[PHI60:%[0-9]+]]:vgpr_32 = PHI [[PHI14]], %bb.35, [[PHI57]], %bb.40
+ ; CHECK-NEXT: SI_END_CF [[SI_IF13]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.42.Flow80:
+ ; CHECK-NEXT: successors: %bb.33(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI61:%[0-9]+]]:vgpr_32 = PHI [[PHI11]], %bb.34, [[V_FMAC_F32_e64_19]], %bb.41
+ ; CHECK-NEXT: [[PHI62:%[0-9]+]]:vgpr_32 = PHI [[PHI12]], %bb.34, [[PHI58]], %bb.41
+ ; CHECK-NEXT: [[PHI63:%[0-9]+]]:vgpr_32 = PHI [[PHI13]], %bb.34, [[PHI59]], %bb.41
+ ; CHECK-NEXT: [[PHI64:%[0-9]+]]:vgpr_32 = PHI [[PHI14]], %bb.34, [[PHI60]], %bb.41
+ ; CHECK-NEXT: SI_END_CF [[SI_IF12]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.33
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.43 (%ir-block.436):
+ ; CHECK-NEXT: successors: %bb.44(0x40000000), %bb.45(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[S_MOV_B32_45:%[0-9]+]]:sreg_32 = S_MOV_B32 8
+ ; CHECK-NEXT: [[V_LSHLREV_B32_e64_3:%[0-9]+]]:vgpr_32 = nuw nsw V_LSHLREV_B32_e64 killed [[S_MOV_B32_45]], [[V_BFE_U32_e64_]], implicit $exec
+ ; CHECK-NEXT: [[S_MOV_B32_46:%[0-9]+]]:sreg_32 = S_MOV_B32 2
+ ; CHECK-NEXT: [[V_ADD_LSHL_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_LSHL_U32_e64 killed [[V_LSHLREV_B32_e64_3]], [[V_AND_B32_e64_]], [[S_MOV_B32_46]], implicit $exec
+ ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[V_ADD_LSHL_U32_e64_]], [[PHI47]], 0, 0, implicit $exec :: (store (s32) into %ir.439, !tbaa !13, addrspace 3)
+ ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[V_ADD_LSHL_U32_e64_]], [[PHI48]], 256, 0, implicit $exec :: (store (s32) into %ir.440, !tbaa !13, addrspace 3)
+ ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[V_ADD_LSHL_U32_e64_]], [[PHI49]], 512, 0, implicit $exec :: (store (s32) into %ir.441, !tbaa !13, addrspace 3)
+ ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[V_ADD_LSHL_U32_e64_]], [[PHI50]], 768, 0, implicit $exec :: (store (s32) into %ir.442, !tbaa !13, addrspace 3)
+ ; CHECK-NEXT: ATOMIC_FENCE 5, 3
+ ; CHECK-NEXT: S_BARRIER
+ ; CHECK-NEXT: ATOMIC_FENCE 4, 3
+ ; CHECK-NEXT: [[S_MOV_B32_47:%[0-9]+]]:sreg_32 = S_MOV_B32 256
+ ; CHECK-NEXT: [[V_CMP_LT_U32_e64_1:%[0-9]+]]:sreg_64 = V_CMP_LT_U32_e64 [[V_MAD_U32_U24_e64_]], killed [[S_MOV_B32_47]], implicit $exec
+ ; CHECK-NEXT: [[DEF6:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF7:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY216:%[0-9]+]]:vgpr_32 = COPY [[DEF7]]
+ ; CHECK-NEXT: [[COPY217:%[0-9]+]]:vreg_64_align2 = COPY [[DEF6]]
+ ; CHECK-NEXT: [[SI_IF16:%[0-9]+]]:sreg_64 = SI_IF killed [[V_CMP_LT_U32_e64_1]], %bb.45, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.44
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.44..preheader.i:
+ ; CHECK-NEXT: successors: %bb.46(0x40000000), %bb.50(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[V_LSHLREV_B32_e64_4:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[S_MOV_B32_46]], [[V_MAD_U32_U24_e64_]], implicit $exec
+ ; CHECK-NEXT: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[V_LSHLREV_B32_e64_4]], 0, 0, implicit $exec :: (load (s32) from %ir.447, !tbaa !13, addrspace 3)
+ ; CHECK-NEXT: [[DS_READ_B32_gfx9_1:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[V_LSHLREV_B32_e64_4]], 1024, 0, implicit $exec :: (load (s32) from %ir.448, !tbaa !13, addrspace 3)
+ ; CHECK-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = contract nofpexcept V_ADD_F32_e64 0, killed [[DS_READ_B32_gfx9_]], 0, killed [[DS_READ_B32_gfx9_1]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[DS_READ_B32_gfx9_2:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[V_LSHLREV_B32_e64_4]], 2048, 0, implicit $exec :: (load (s32) from %ir.451, !tbaa !13, addrspace 3)
+ ; CHECK-NEXT: [[V_ADD_F32_e64_1:%[0-9]+]]:vgpr_32 = contract nofpexcept V_ADD_F32_e64 0, killed [[DS_READ_B32_gfx9_2]], 0, killed [[V_ADD_F32_e64_]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[DS_READ_B32_gfx9_3:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[V_LSHLREV_B32_e64_4]], 3072, 0, implicit $exec :: (load (s32) from %ir.454, !tbaa !13, addrspace 3)
+ ; CHECK-NEXT: [[V_ADD_F32_e64_2:%[0-9]+]]:vgpr_32 = contract nofpexcept V_ADD_F32_e64 0, killed [[DS_READ_B32_gfx9_3]], 0, killed [[V_ADD_F32_e64_1]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[DS_READ_B32_gfx9_4:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[V_LSHLREV_B32_e64_4]], 4096, 0, implicit $exec :: (load (s32) from %ir.457, !tbaa !13, addrspace 3)
+ ; CHECK-NEXT: [[V_ADD_F32_e64_3:%[0-9]+]]:vgpr_32 = contract nofpexcept V_ADD_F32_e64 0, killed [[DS_READ_B32_gfx9_4]], 0, killed [[V_ADD_F32_e64_2]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[DS_READ_B32_gfx9_5:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[V_LSHLREV_B32_e64_4]], 5120, 0, implicit $exec :: (load (s32) from %ir.460, !tbaa !13, addrspace 3)
+ ; CHECK-NEXT: [[V_ADD_F32_e64_4:%[0-9]+]]:vgpr_32 = contract nofpexcept V_ADD_F32_e64 0, killed [[DS_READ_B32_gfx9_5]], 0, killed [[V_ADD_F32_e64_3]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[DS_READ_B32_gfx9_6:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[V_LSHLREV_B32_e64_4]], 6144, 0, implicit $exec :: (load (s32) from %ir.463, !tbaa !13, addrspace 3)
+ ; CHECK-NEXT: [[V_ADD_F32_e64_5:%[0-9]+]]:vgpr_32 = contract nofpexcept V_ADD_F32_e64 0, killed [[DS_READ_B32_gfx9_6]], 0, killed [[V_ADD_F32_e64_4]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[DS_READ_B32_gfx9_7:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[V_LSHLREV_B32_e64_4]], 7168, 0, implicit $exec :: (load (s32) from %ir.466, !tbaa !13, addrspace 3)
+ ; CHECK-NEXT: [[V_ADD_F32_e64_6:%[0-9]+]]:vgpr_32 = contract nofpexcept V_ADD_F32_e64 0, killed [[DS_READ_B32_gfx9_7]], 0, killed [[V_ADD_F32_e64_5]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[DS_READ_B32_gfx9_8:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[V_LSHLREV_B32_e64_4]], 8192, 0, implicit $exec :: (load (s32) from %ir.469, !tbaa !13, addrspace 3)
+ ; CHECK-NEXT: [[V_ADD_F32_e64_7:%[0-9]+]]:vgpr_32 = contract nofpexcept V_ADD_F32_e64 0, killed [[DS_READ_B32_gfx9_8]], 0, killed [[V_ADD_F32_e64_6]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[DS_READ_B32_gfx9_9:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[V_LSHLREV_B32_e64_4]], 9216, 0, implicit $exec :: (load (s32) from %ir.472, !tbaa !13, addrspace 3)
+ ; CHECK-NEXT: [[V_ADD_F32_e64_8:%[0-9]+]]:vgpr_32 = contract nofpexcept V_ADD_F32_e64 0, killed [[DS_READ_B32_gfx9_9]], 0, killed [[V_ADD_F32_e64_7]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[DS_READ_B32_gfx9_10:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[V_LSHLREV_B32_e64_4]], 10240, 0, implicit $exec :: (load (s32) from %ir.475, !tbaa !13, addrspace 3)
+ ; CHECK-NEXT: [[V_ADD_F32_e64_9:%[0-9]+]]:vgpr_32 = contract nofpexcept V_ADD_F32_e64 0, killed [[DS_READ_B32_gfx9_10]], 0, killed [[V_ADD_F32_e64_8]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[DS_READ_B32_gfx9_11:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[V_LSHLREV_B32_e64_4]], 11264, 0, implicit $exec :: (load (s32) from %ir.478, !tbaa !13, addrspace 3)
+ ; CHECK-NEXT: [[V_ADD_F32_e64_10:%[0-9]+]]:vgpr_32 = contract nofpexcept V_ADD_F32_e64 0, killed [[DS_READ_B32_gfx9_11]], 0, killed [[V_ADD_F32_e64_9]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[DS_READ_B32_gfx9_12:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[V_LSHLREV_B32_e64_4]], 12288, 0, implicit $exec :: (load (s32) from %ir.481, !tbaa !13, addrspace 3)
+ ; CHECK-NEXT: [[V_ADD_F32_e64_11:%[0-9]+]]:vgpr_32 = contract nofpexcept V_ADD_F32_e64 0, killed [[DS_READ_B32_gfx9_12]], 0, killed [[V_ADD_F32_e64_10]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[DS_READ_B32_gfx9_13:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[V_LSHLREV_B32_e64_4]], 13312, 0, implicit $exec :: (load (s32) from %ir.484, !tbaa !13, addrspace 3)
+ ; CHECK-NEXT: [[V_ADD_F32_e64_12:%[0-9]+]]:vgpr_32 = contract nofpexcept V_ADD_F32_e64 0, killed [[DS_READ_B32_gfx9_13]], 0, killed [[V_ADD_F32_e64_11]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[DS_READ_B32_gfx9_14:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[V_LSHLREV_B32_e64_4]], 14336, 0, implicit $exec :: (load (s32) from %ir.487, !tbaa !13, addrspace 3)
+ ; CHECK-NEXT: [[V_ADD_F32_e64_13:%[0-9]+]]:vgpr_32 = contract nofpexcept V_ADD_F32_e64 0, killed [[DS_READ_B32_gfx9_14]], 0, killed [[V_ADD_F32_e64_12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[DS_READ_B32_gfx9_15:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[V_LSHLREV_B32_e64_4]], 15360, 0, implicit $exec :: (load (s32) from %ir.490, !tbaa !13, addrspace 3)
+ ; CHECK-NEXT: [[V_ADD_F32_e64_14:%[0-9]+]]:vgpr_32 = contract nofpexcept V_ADD_F32_e64 0, killed [[DS_READ_B32_gfx9_15]], 0, killed [[V_ADD_F32_e64_13]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[V_LSHLREV_B32_e64_4]], [[V_ADD_F32_e64_14]], 0, 0, implicit $exec :: (store (s32) into %ir.447, !tbaa !13, addrspace 3)
+ ; CHECK-NEXT: [[V_ADD_U32_e64_24:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_MAD_U32_U24_e64_]], [[S_LSHL_B32_]], 0, implicit $exec
+ ; CHECK-NEXT: [[V_CMP_LT_I32_e64_18:%[0-9]+]]:sreg_64 = V_CMP_LT_I32_e64 [[V_ADD_U32_e64_24]], [[COPY28]], implicit $exec
+ ; CHECK-NEXT: [[DEF8:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF9:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY218:%[0-9]+]]:vgpr_32 = COPY [[DEF9]]
+ ; CHECK-NEXT: [[COPY219:%[0-9]+]]:vreg_64_align2 = COPY [[DEF8]]
+ ; CHECK-NEXT: [[SI_IF17:%[0-9]+]]:sreg_64 = SI_IF killed [[V_CMP_LT_I32_e64_18]], %bb.50, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.46
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.45.Flow94:
+ ; CHECK-NEXT: successors: %bb.23(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI65:%[0-9]+]]:sreg_64 = PHI [[PHI]], %bb.43, %674, %bb.50
+ ; CHECK-NEXT: [[PHI66:%[0-9]+]]:vgpr_32 = PHI [[COPY216]], %bb.43, %173, %bb.50
+ ; CHECK-NEXT: [[PHI67:%[0-9]+]]:vreg_64_align2 = PHI [[COPY217]], %bb.43, %174, %bb.50
+ ; CHECK-NEXT: SI_END_CF [[SI_IF16]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.23
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.46 (%ir-block.501):
+ ; CHECK-NEXT: successors: %bb.47(0x50000000), %bb.49(0x30000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[S_MOV_B32_48:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
+ ; CHECK-NEXT: [[COPY220:%[0-9]+]]:vgpr_32 = COPY killed [[S_MOV_B32_48]]
+ ; CHECK-NEXT: [[V_CMP_EQ_F32_e64_3:%[0-9]+]]:sreg_64 = nofpexcept V_CMP_EQ_F32_e64 0, [[S_LOAD_DWORD_IMM1]], 0, [[COPY220]], 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MUL_F32_e64_1:%[0-9]+]]:vgpr_32 = contract nofpexcept V_MUL_F32_e64 0, [[V_ADD_F32_e64_14]], 0, [[COPY23]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MUL_LO_U32_e64_19:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[V_ADD_U32_e64_24]], [[S_LOAD_DWORD_IMM2]], implicit $exec
+ ; CHECK-NEXT: [[V_ASHRREV_I32_e64_17:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 31, [[V_MUL_LO_U32_e64_19]], implicit $exec
+ ; CHECK-NEXT: [[REG_SEQUENCE63:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_MUL_LO_U32_e64_19]], %subreg.sub0, [[V_ASHRREV_I32_e64_17]], %subreg.sub1
+ ; CHECK-NEXT: [[S_AND_B64_6:%[0-9]+]]:sreg_64 = S_AND_B64 $exec, killed [[V_CMP_EQ_F32_e64_3]], implicit-def dead $scc
+ ; CHECK-NEXT: $vcc = COPY [[S_AND_B64_6]]
+ ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.49, implicit $vcc
+ ; CHECK-NEXT: S_BRANCH %bb.47
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.47 (%ir-block.506):
+ ; CHECK-NEXT: successors: %bb.49(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[S_MOV_B32_49:%[0-9]+]]:sreg_32 = S_MOV_B32 2
+ ; CHECK-NEXT: [[V_LSHLREV_B64_e64_17:%[0-9]+]]:vreg_64_align2 = V_LSHLREV_B64_e64 killed [[S_MOV_B32_49]], [[REG_SEQUENCE63]], implicit $exec
+ ; CHECK-NEXT: [[COPY221:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY [[REG_SEQUENCE14]].sub0
+ ; CHECK-NEXT: [[COPY222:%[0-9]+]]:vgpr_32 = COPY [[V_LSHLREV_B64_e64_17]].sub0
+ ; CHECK-NEXT: [[COPY223:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY [[REG_SEQUENCE14]].sub1
+ ; CHECK-NEXT: [[COPY224:%[0-9]+]]:vgpr_32 = COPY [[V_LSHLREV_B64_e64_17]].sub1
+ ; CHECK-NEXT: [[V_ADD_CO_U32_e64_36:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_37:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY221]], [[COPY222]], 0, implicit $exec
+ ; CHECK-NEXT: [[COPY225:%[0-9]+]]:vgpr_32 = COPY [[COPY223]]
+ ; CHECK-NEXT: [[V_ADDC_U32_e64_36:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_37:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY225]], [[COPY224]], killed [[V_ADD_CO_U32_e64_37]], 0, implicit $exec
+ ; CHECK-NEXT: [[REG_SEQUENCE64:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_ADD_CO_U32_e64_36]], %subreg.sub0, [[V_ADDC_U32_e64_36]], %subreg.sub1
+ ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD41:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD killed [[REG_SEQUENCE64]], 0, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.507, !tbaa !13, addrspace 1)
+ ; CHECK-NEXT: [[V_FMAC_F32_e64_32:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, killed [[GLOBAL_LOAD_DWORD41]], 0, [[S_LOAD_DWORD_IMM1]], 0, [[V_MUL_F32_e64_1]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.49
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.48.Flow:
+ ; CHECK-NEXT: successors: %bb.9(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI68:%[0-9]+]]:vgpr_32 = PHI [[COPY53]], %bb.7, [[V_MUL_F32_e64_]], %bb.8
+ ; CHECK-NEXT: [[COPY226:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE15]]
+ ; CHECK-NEXT: [[COPY227:%[0-9]+]]:sreg_64 = COPY $exec
+ ; CHECK-NEXT: S_BRANCH %bb.9
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.49.Flow76:
+ ; CHECK-NEXT: successors: %bb.50(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI69:%[0-9]+]]:vgpr_32 = PHI [[V_MUL_F32_e64_1]], %bb.46, [[V_FMAC_F32_e64_32]], %bb.47
+ ; CHECK-NEXT: [[COPY228:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE63]]
+ ; CHECK-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64 = S_OR_B64 [[PHI]], $exec, implicit-def $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.50.Flow95:
+ ; CHECK-NEXT: successors: %bb.45(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI70:%[0-9]+]]:sreg_64 = PHI [[PHI]], %bb.44, [[S_OR_B64_]], %bb.49
+ ; CHECK-NEXT: [[PHI71:%[0-9]+]]:vgpr_32 = PHI [[COPY218]], %bb.44, [[PHI69]], %bb.49
+ ; CHECK-NEXT: [[PHI72:%[0-9]+]]:vreg_64_align2 = PHI [[COPY219]], %bb.44, [[COPY228]], %bb.49
+ ; CHECK-NEXT: SI_END_CF [[SI_IF17]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: [[S_ANDN2_B64_:%[0-9]+]]:sreg_64 = S_ANDN2_B64 [[PHI]], $exec, implicit-def $scc
+ ; CHECK-NEXT: [[S_AND_B64_7:%[0-9]+]]:sreg_64 = S_AND_B64 [[PHI70]], $exec, implicit-def $scc
+ ; CHECK-NEXT: [[S_OR_B64_1:%[0-9]+]]:sreg_64 = S_OR_B64 [[S_ANDN2_B64_]], [[S_AND_B64_7]], implicit-def $scc
+ ; CHECK-NEXT: S_BRANCH %bb.45
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.51..sink.split.i:
+ ; CHECK-NEXT: successors: %bb.52(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[S_MOV_B32_50:%[0-9]+]]:sreg_32 = S_MOV_B32 2
+ ; CHECK-NEXT: [[V_LSHLREV_B64_e64_18:%[0-9]+]]:vreg_64_align2 = V_LSHLREV_B64_e64 killed [[S_MOV_B32_50]], [[PHI39]], implicit $exec
+ ; CHECK-NEXT: [[COPY229:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY [[REG_SEQUENCE14]].sub0
+ ; CHECK-NEXT: [[COPY230:%[0-9]+]]:vgpr_32 = COPY [[V_LSHLREV_B64_e64_18]].sub0
+ ; CHECK-NEXT: [[COPY231:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY [[REG_SEQUENCE14]].sub1
+ ; CHECK-NEXT: [[COPY232:%[0-9]+]]:vgpr_32 = COPY [[V_LSHLREV_B64_e64_18]].sub1
+ ; CHECK-NEXT: [[V_ADD_CO_U32_e64_38:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_39:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY229]], [[COPY230]], 0, implicit $exec
+ ; CHECK-NEXT: [[COPY233:%[0-9]+]]:vgpr_32 = COPY [[COPY231]]
+ ; CHECK-NEXT: [[V_ADDC_U32_e64_38:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_39:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY233]], [[COPY232]], killed [[V_ADD_CO_U32_e64_39]], 0, implicit $exec
+ ; CHECK-NEXT: [[REG_SEQUENCE65:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_ADD_CO_U32_e64_38]], %subreg.sub0, [[V_ADDC_U32_e64_38]], %subreg.sub1
+ ; CHECK-NEXT: GLOBAL_STORE_DWORD killed [[REG_SEQUENCE65]], [[PHI38]], 0, 0, implicit $exec :: (store (s32) into %ir.516, !tbaa !13, addrspace 1)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.52.Flow96:
+ ; CHECK-NEXT: successors: %bb.53(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: SI_END_CF [[SI_IF7]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.53.Flow97:
+ ; CHECK-NEXT: successors: %bb.54(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.54._Z25rocblas_gemvn_kernel_calcILi64ELi16EiffLi0EEviiT3_PKT2_T1_S3_iS0_PS1_i.exit:
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0 (%ir-block.18):
+ successors: %bb.1(0x40000000), %bb.54(0x40000000)
+ liveins: $vgpr0, $sgpr4_sgpr5, $sgpr6, $sgpr7, $sgpr8
+
+ %181:sgpr_32 = COPY $sgpr8
+ %180:sgpr_32 = COPY $sgpr7
+ %179:sgpr_32 = COPY $sgpr6
+ %178:sgpr_64(p4) = COPY $sgpr4_sgpr5
+ %176:vgpr_32(s32) = COPY $vgpr0
+ %183:sreg_32 = S_MOV_B32 0
+ %184:sreg_32 = S_MOV_B32 136
+ %185:sreg_64 = REG_SEQUENCE killed %184, %subreg.sub0, %183, %subreg.sub1
+ %682:sreg_32 = COPY %178.sub0(p4)
+ %683:sreg_32 = COPY %178.sub1(p4)
+ %684:sreg_32 = COPY %185.sub0
+ %685:sreg_32 = COPY %185.sub1
+ %680:sreg_32 = S_ADD_U32 %682, %684, implicit-def $scc
+ %681:sreg_32 = S_ADDC_U32 %683, %685, implicit-def $scc, implicit $scc
+ %186:sreg_64 = REG_SEQUENCE %680, %subreg.sub0, %681, %subreg.sub1
+ %187:sreg_64_xexec = S_LOAD_DWORDX2_IMM %178(p4), 136, 0 :: (invariant load (s64) from %ir.20, align 4, addrspace 4)
+ %188:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %178(p4), 144, 0 :: (invariant load (s32) from %ir.20 + 8, addrspace 4)
+ %189:sreg_32 = COPY %187.sub1
+ %190:sreg_32 = COPY %187.sub0
+ S_CMP_LT_U32 %179, killed %190, implicit-def $scc
+ %191:sreg_32 = S_MOV_B32 18
+ %192:sreg_32 = S_MOV_B32 12
+ %193:sreg_32 = S_CSELECT_B32 killed %192, killed %191, implicit $scc
+ %194:sreg_64 = REG_SEQUENCE killed %193, %subreg.sub0, %183, %subreg.sub1
+ %688:sreg_32 = COPY %186.sub0
+ %689:sreg_32 = COPY %186.sub1
+ %690:sreg_32 = COPY %194.sub0
+ %691:sreg_32 = COPY %194.sub1
+ %686:sreg_32 = S_ADD_U32 %688, %690, implicit-def $scc
+ %687:sreg_32 = S_ADDC_U32 %689, %691, implicit-def $scc, implicit $scc
+ %195:sreg_64 = REG_SEQUENCE %686, %subreg.sub0, %687, %subreg.sub1
+ %196:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ %197:vgpr_32 = GLOBAL_LOAD_USHORT_SADDR killed %195, %196, 0, 0, implicit $exec :: (invariant load (s16) from %ir.27, !tbaa !10, addrspace 4)
+ S_CMP_LT_U32 %180, killed %189, implicit-def $scc
+ %198:sreg_32 = S_MOV_B32 20
+ %199:sreg_32 = S_MOV_B32 14
+ %200:sreg_32 = S_CSELECT_B32 killed %199, killed %198, implicit $scc
+ %201:sreg_64 = REG_SEQUENCE killed %200, %subreg.sub0, %183, %subreg.sub1
+ %694:sreg_32 = COPY %186.sub0
+ %695:sreg_32 = COPY %186.sub1
+ %696:sreg_32 = COPY %201.sub0
+ %697:sreg_32 = COPY %201.sub1
+ %692:sreg_32 = S_ADD_U32 %694, %696, implicit-def $scc
+ %693:sreg_32 = S_ADDC_U32 %695, %697, implicit-def $scc, implicit $scc
+ %202:sreg_64 = REG_SEQUENCE %692, %subreg.sub0, %693, %subreg.sub1
+ %203:vgpr_32 = GLOBAL_LOAD_USHORT_SADDR killed %202, %196, 0, 0, implicit $exec :: (invariant load (s16) from %ir.33, !tbaa !10, addrspace 4)
+ %631:vgpr_32 = nuw V_MUL_LO_U32_e64 %203, %197, implicit $exec
+ S_CMP_LT_U32 %181, killed %188, implicit-def $scc
+ %207:sreg_32 = S_MOV_B32 22
+ %208:sreg_32 = S_MOV_B32 16
+ %209:sreg_32 = S_CSELECT_B32 killed %208, killed %207, implicit $scc
+ %210:sreg_64 = REG_SEQUENCE killed %209, %subreg.sub0, %183, %subreg.sub1
+ %700:sreg_32 = COPY %186.sub0
+ %701:sreg_32 = COPY %186.sub1
+ %702:sreg_32 = COPY %210.sub0
+ %703:sreg_32 = COPY %210.sub1
+ %698:sreg_32 = S_ADD_U32 %700, %702, implicit-def $scc
+ %699:sreg_32 = S_ADDC_U32 %701, %703, implicit-def $scc, implicit $scc
+ %211:sreg_64 = REG_SEQUENCE %698, %subreg.sub0, %699, %subreg.sub1
+ %212:vgpr_32 = GLOBAL_LOAD_USHORT_SADDR killed %211, %196, 0, 0, implicit $exec :: (invariant load (s16) from %ir.40, !tbaa !10, addrspace 4)
+ %632:vgpr_32 = V_MUL_LO_U32_e64 killed %631, %212, implicit $exec
+ %215:sreg_32 = S_MOV_B32 1024
+ %633:sreg_64_xexec = V_CMP_NE_U32_e64 killed %632, killed %215, implicit $exec
+ $vcc = S_AND_B64 $exec, %633, implicit-def $scc
+ S_CBRANCH_VCCNZ %bb.54, implicit $vcc
+ S_BRANCH %bb.1
+
+ bb.1 (%ir-block.44):
+ successors: %bb.2(0x40000000), %bb.53(0x40000000)
+
+ %216:sgpr_128 = S_LOAD_DWORDX4_IMM %178(p4), 0, 0 :: (dereferenceable invariant load (s128) from %ir..kernarg.offset65, addrspace 4)
+ %5:sgpr_32 = COPY %216.sub2
+ %217:sreg_32 = COPY %216.sub1
+ %218:sreg_32 = COPY %216.sub0
+ %219:sgpr_96 = REG_SEQUENCE killed %218, %subreg.sub0, killed %217, %subreg.sub1, %5, %subreg.sub2
+ %4:sgpr_96 = COPY %219
+ %220:sgpr_32 = S_LOAD_DWORD_IMM %178(p4), 88, 0 :: (dereferenceable invariant load (s32) from %ir..kernarg.offset55, align 8, addrspace 4)
+ %221:sgpr_32 = S_MOV_B32 0
+ %223:vgpr_32 = COPY killed %221
+ %222:sreg_64 = contract nofpexcept V_CMP_EQ_F32_e64 0, %5, 0, %223, 0, implicit $mode, implicit $exec
+ %224:sgpr_32 = S_MOV_B32 1065353216
+ %226:vgpr_32 = COPY killed %224
+ %225:sreg_64 = contract nofpexcept V_CMP_EQ_F32_e64 0, %220, 0, %226, 0, implicit $mode, implicit $exec
+ %227:sreg_64 = S_AND_B64 killed %222, killed %225, implicit-def dead $scc
+ %228:sreg_64 = S_AND_B64 $exec, killed %227, implicit-def dead $scc
+ $vcc = COPY %228
+ S_CBRANCH_VCCNZ %bb.53, implicit $vcc
+ S_BRANCH %bb.2
+
+ bb.2 (%ir-block.49):
+ successors: %bb.3(0x50000000), %bb.4(0x30000000)
+
+ %7:sreg_32 = COPY %4.sub0
+ %233:sgpr_128 = S_LOAD_DWORDX4_IMM %178(p4), 104, 0 :: (dereferenceable invariant load (s128) from %ir..kernarg.offset57, align 8, addrspace 4)
+ %234:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %178(p4), 120, 0 :: (dereferenceable invariant load (s32) from %ir..kernarg.offset61, align 8, addrspace 4)
+ %235:sreg_64_xexec = S_LOAD_DWORDX2_IMM %178(p4), 128, 0 :: (dereferenceable invariant load (s64) from %ir..kernarg.offset63, align 16, addrspace 4)
+ %236:sreg_32 = COPY %233.sub1
+ %237:sreg_32 = COPY %233.sub0
+ %238:sreg_64 = REG_SEQUENCE killed %237, %subreg.sub0, killed %236, %subreg.sub1
+ %239:sreg_32 = COPY %233.sub3
+ %240:sreg_32 = COPY %233.sub2
+ %241:sreg_64 = REG_SEQUENCE killed %240, %subreg.sub0, killed %239, %subreg.sub1
+ %242:sreg_32 = COPY %235.sub1
+ %243:sreg_32_xm0 = S_ASHR_I32 %180, 31, implicit-def dead $scc
+ %245:sreg_64 = REG_SEQUENCE %180, %subreg.sub0, %243, %subreg.sub1
+ %246:sreg_32 = COPY %245.sub1
+ %9:sreg_64 = COPY %245
+ %247:sreg_32 = S_MUL_I32 %180, killed %242
+ %248:sreg_32 = COPY %235.sub0
+ %249:sreg_32 = S_MUL_HI_U32 %180, %248
+ %250:sreg_32 = S_ADD_I32 killed %249, killed %247, implicit-def dead $scc
+ %251:sreg_32 = S_MUL_I32 killed %246, %248
+ %252:sreg_32 = S_ADD_I32 killed %250, killed %251, implicit-def dead $scc
+ %253:sreg_32 = S_MUL_I32 %180, %248
+ %254:sreg_64 = REG_SEQUENCE killed %253, %subreg.sub0, killed %252, %subreg.sub1
+ %255:sreg_32 = S_MOV_B32 2
+ %256:sreg_64 = S_LSHL_B64 killed %254, %255, implicit-def dead $scc
+ %706:sreg_32 = COPY %238.sub0
+ %707:sreg_32 = COPY %238.sub1
+ %708:sreg_32 = COPY %256.sub0
+ %709:sreg_32 = COPY %256.sub1
+ %704:sreg_32 = S_ADD_U32 %706, %708, implicit-def $scc
+ %705:sreg_32 = S_ADDC_U32 %707, %709, implicit-def $scc, implicit $scc
+ %257:sreg_64 = REG_SEQUENCE %704, %subreg.sub0, %705, %subreg.sub1
+ %258:sreg_64 = S_LSHL_B64 killed %241, %255, implicit-def dead $scc
+ %712:sreg_32 = COPY %257.sub0
+ %713:sreg_32 = COPY %257.sub1
+ %714:sreg_32 = COPY %258.sub0
+ %715:sreg_32 = COPY %258.sub1
+ %710:sreg_32 = S_ADD_U32 %712, %714, implicit-def $scc
+ %711:sreg_32 = S_ADDC_U32 %713, %715, implicit-def $scc, implicit $scc
+ %10:sreg_64 = REG_SEQUENCE %710, %subreg.sub0, %711, %subreg.sub1
+ %259:sreg_32 = S_MOV_B32 1023
+ %11:vgpr_32 = V_AND_B32_e64 %176(s32), killed %259, implicit $exec
+ %12:vgpr_32 = V_BFE_U32_e64 %176(s32), 10, 10, implicit $exec
+ %13:vgpr_32 = nuw nsw V_MAD_U32_U24_e64 %12, %197, %11, 0, implicit $exec
+ %260:sgpr_32 = S_MOV_B32 0
+ %262:vgpr_32 = COPY killed %260
+ %261:sreg_64 = nofpexcept V_CMP_NEQ_F32_e64 0, %5, 0, %262, 0, implicit $mode, implicit $exec
+ %232:sreg_64 = S_MOV_B64 -1
+ %231:sreg_64 = S_MOV_B64 0
+ %230:sreg_64 = IMPLICIT_DEF
+ %229:sgpr_32 = IMPLICIT_DEF
+ %263:sreg_64 = S_AND_B64 $exec, killed %261, implicit-def dead $scc
+ $vcc = COPY %263
+ %634:vgpr_32 = COPY %229
+ %635:vreg_64_align2 = COPY %230
+ S_CBRANCH_VCCNZ %bb.4, implicit $vcc
+ S_BRANCH %bb.3
+
+ bb.3 (%ir-block.61):
+ successors: %bb.5(0x40000000), %bb.6(0x40000000)
+
+ %267:sreg_32 = S_MOV_B32 256
+ %268:sreg_64 = V_CMP_LT_U32_e64 %13, killed %267, implicit $exec
+ %266:sreg_64 = S_MOV_B64 0
+ %265:sreg_64 = IMPLICIT_DEF
+ %264:sgpr_32 = IMPLICIT_DEF
+ %637:vgpr_32 = COPY %264
+ %638:vreg_64_align2 = COPY %265
+ %14:sreg_64 = SI_IF killed %268, %bb.6, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_BRANCH %bb.5
+
+ bb.4.Flow90:
+ successors: %bb.10(0x40000000), %bb.23(0x40000000)
+
+ %17:sreg_64 = PHI %231, %bb.2, %23, %bb.6
+ %15:vgpr_32 = PHI %634, %bb.2, %21, %bb.6
+ %16:vreg_64_align2 = PHI %635, %bb.2, %22, %bb.6
+ %18:sreg_64 = PHI %232, %bb.2, %266, %bb.6
+ %289:sreg_64 = S_AND_B64 $exec, %18, implicit-def dead $scc
+ $vcc = COPY %289
+ S_CBRANCH_VCCNZ %bb.10, implicit $vcc
+ S_BRANCH %bb.23
+
+ bb.5 (%ir-block.70):
+ successors: %bb.7(0x40000000), %bb.9(0x40000000)
+
+ %272:sreg_32 = S_MOV_B32 8
+ %273:vgpr_32 = COPY killed %272
+ %19:vgpr_32 = nuw V_LSHL_ADD_U32_e64 %179, %273, %13, implicit $exec
+ %274:sreg_64 = V_CMP_LT_I32_e64 %19, %7, implicit $exec
+ %271:sreg_64 = S_MOV_B64 0
+ %270:sreg_64 = IMPLICIT_DEF
+ %269:sgpr_32 = IMPLICIT_DEF
+ %640:vgpr_32 = COPY %269
+ %641:vreg_64_align2 = COPY %270
+ %20:sreg_64 = SI_IF killed %274, %bb.9, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_BRANCH %bb.7
+
+ bb.6.Flow91:
+ successors: %bb.4(0x80000000)
+
+ %23:sreg_64 = PHI %266, %bb.3, %669, %bb.9
+ %21:vgpr_32 = PHI %637, %bb.3, %26, %bb.9
+ %22:vreg_64_align2 = PHI %638, %bb.3, %27, %bb.9
+ SI_END_CF %14, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_BRANCH %bb.4
+
+ bb.7 (%ir-block.80):
+ successors: %bb.8(0x50000000), %bb.48(0x30000000)
+
+ %275:sgpr_32 = S_MOV_B32 0
+ %277:vgpr_32 = COPY %275
+ %276:sreg_64 = nofpexcept V_CMP_EQ_F32_e64 0, %220, 0, %277, 0, implicit $mode, implicit $exec
+ %278:vgpr_32 = nsw V_MUL_LO_U32_e64 %19, %234, implicit $exec
+ %279:vgpr_32 = V_ASHRREV_I32_e64 31, %278, implicit $exec
+ %281:vreg_64_align2 = REG_SEQUENCE %278, %subreg.sub0, %279, %subreg.sub1
+ %24:vreg_64_align2 = COPY %281
+ %282:sreg_64 = S_AND_B64 $exec, killed %276, implicit-def dead $scc
+ $vcc = COPY %282
+ %664:vgpr_32 = COPY %275, implicit $exec
+ S_CBRANCH_VCCNZ %bb.48, implicit $vcc
+ S_BRANCH %bb.8
+
+ bb.8 (%ir-block.84):
+ successors: %bb.48(0x80000000)
+
+ %283:sreg_32 = S_MOV_B32 2
+ %284:vreg_64_align2 = V_LSHLREV_B64_e64 killed %283, %281, implicit $exec
+ %720:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY %10.sub0
+ %721:vgpr_32 = COPY %284.sub0
+ %722:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY %10.sub1
+ %723:vgpr_32 = COPY %284.sub1
+ %716:vgpr_32, %718:sreg_64_xexec = V_ADD_CO_U32_e64 %720, %721, 0, implicit $exec
+ %724:vgpr_32 = COPY %722
+ %717:vgpr_32, dead %719:sreg_64_xexec = V_ADDC_U32_e64 %724, %723, killed %718, 0, implicit $exec
+ %285:vreg_64_align2 = REG_SEQUENCE %716, %subreg.sub0, %717, %subreg.sub1
+ %286:vgpr_32 = GLOBAL_LOAD_DWORD killed %285, 0, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.85, !tbaa !13, addrspace 1)
+ %25:vgpr_32 = contract nofpexcept V_MUL_F32_e64 0, killed %286, 0, %220, 0, 0, implicit $mode, implicit $exec
+ S_BRANCH %bb.48
+
+ bb.9.Flow92:
+ successors: %bb.6(0x80000000)
+
+ %28:sreg_64 = PHI %271, %bb.5, %672, %bb.48
+ %26:vgpr_32 = PHI %640, %bb.5, %171, %bb.48
+ %27:vreg_64_align2 = PHI %641, %bb.5, %24, %bb.48
+ SI_END_CF %20, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ %670:sreg_64 = S_AND_B64 %28, $exec, implicit-def $scc
+ %669:sreg_64 = COPY %670
+ S_BRANCH %bb.6
+
+ bb.10 (%ir-block.91):
+ successors: %bb.11(0x40000000), %bb.12(0x40000000)
+
+ %291:sgpr_128 = S_LOAD_DWORDX4_IMM %178(p4), 24, 0 :: (dereferenceable invariant load (s128) from %ir..kernarg.offset39, align 8, addrspace 4)
+ %292:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %178(p4), 40, 0 :: (dereferenceable invariant load (s32) from %ir..kernarg.offset43, align 8, addrspace 4)
+ %293:sgpr_128 = S_LOAD_DWORDX4_IMM %178(p4), 48, 0 :: (dereferenceable invariant load (s128) from %ir..kernarg.offset45, addrspace 4)
+ %294:sreg_64_xexec = S_LOAD_DWORDX2_IMM %178(p4), 64, 0 :: (dereferenceable invariant load (s64) from %ir..kernarg.offset45 + 16, align 16, addrspace 4)
+ %295:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %178(p4), 72, 0 :: (dereferenceable invariant load (s32) from %ir..kernarg.offset51, align 8, addrspace 4)
+ %296:sreg_64_xexec = S_LOAD_DWORDX2_IMM %178(p4), 80, 0 :: (dereferenceable invariant load (s64) from %ir..kernarg.offset53, align 16, addrspace 4)
+ %297:sreg_32 = COPY %291.sub1
+ %298:sreg_32 = COPY %291.sub0
+ %299:sreg_64 = REG_SEQUENCE killed %298, %subreg.sub0, killed %297, %subreg.sub1
+ %300:sreg_32 = COPY %291.sub3
+ %301:sreg_32 = COPY %291.sub2
+ %302:sreg_64 = REG_SEQUENCE killed %301, %subreg.sub0, killed %300, %subreg.sub1
+ %303:sreg_32 = COPY %293.sub0
+ %304:sreg_32 = COPY %293.sub1
+ %305:sreg_32 = COPY %294.sub1
+ %306:sreg_32 = COPY %294.sub0
+ %307:sreg_32 = COPY %293.sub3
+ %308:sreg_32 = COPY %293.sub2
+ %309:sreg_64 = REG_SEQUENCE killed %308, %subreg.sub0, killed %307, %subreg.sub1
+ %29:sreg_64 = COPY %309
+ %310:sreg_64 = REG_SEQUENCE killed %306, %subreg.sub0, killed %305, %subreg.sub1
+ %311:sreg_32 = COPY %296.sub1
+ %312:sreg_32 = COPY %9.sub0
+ %313:sreg_32 = S_MUL_I32 %312, killed %304
+ %314:sreg_32 = S_MUL_HI_U32 %312, %303
+ %315:sreg_32 = S_ADD_I32 killed %314, killed %313, implicit-def dead $scc
+ %316:sreg_32 = COPY %9.sub1
+ %317:sreg_32 = S_MUL_I32 %316, %303
+ %318:sreg_32 = S_ADD_I32 killed %315, killed %317, implicit-def dead $scc
+ %319:sreg_32 = S_MUL_I32 %312, %303
+ %320:sreg_64 = REG_SEQUENCE killed %319, %subreg.sub0, killed %318, %subreg.sub1
+ %321:sreg_32 = S_MOV_B32 2
+ %322:sreg_64 = S_LSHL_B64 killed %320, %321, implicit-def dead $scc
+ %727:sreg_32 = COPY %299.sub0
+ %728:sreg_32 = COPY %299.sub1
+ %729:sreg_32 = COPY %322.sub0
+ %730:sreg_32 = COPY %322.sub1
+ %725:sreg_32 = S_ADD_U32 %727, %729, implicit-def $scc
+ %726:sreg_32 = S_ADDC_U32 %728, %730, implicit-def $scc, implicit $scc
+ %323:sreg_64 = REG_SEQUENCE %725, %subreg.sub0, %726, %subreg.sub1
+ %324:sreg_64 = S_LSHL_B64 killed %302, %321, implicit-def dead $scc
+ %733:sreg_32 = COPY %323.sub0
+ %734:sreg_32 = COPY %323.sub1
+ %735:sreg_32 = COPY %324.sub0
+ %736:sreg_32 = COPY %324.sub1
+ %731:sreg_32 = S_ADD_U32 %733, %735, implicit-def $scc
+ %732:sreg_32 = S_ADDC_U32 %734, %736, implicit-def $scc, implicit $scc
+ %31:sreg_64 = REG_SEQUENCE %731, %subreg.sub0, %732, %subreg.sub1
+ %325:sreg_32 = S_MUL_I32 %312, killed %311
+ %326:sreg_32 = COPY %296.sub0
+ %327:sreg_32 = S_MUL_HI_U32 %312, %326
+ %328:sreg_32 = S_ADD_I32 killed %327, killed %325, implicit-def dead $scc
+ %329:sreg_32 = S_MUL_I32 %316, %326
+ %330:sreg_32 = S_ADD_I32 killed %328, killed %329, implicit-def dead $scc
+ %331:sreg_32 = S_MUL_I32 %312, %326
+ %332:sreg_64 = REG_SEQUENCE killed %331, %subreg.sub0, killed %330, %subreg.sub1
+ %333:sreg_64 = S_LSHL_B64 %332, %321, implicit-def dead $scc
+ %739:sreg_32 = COPY %309.sub0
+ %740:sreg_32 = COPY %309.sub1
+ %741:sreg_32 = COPY %333.sub0
+ %742:sreg_32 = COPY %333.sub1
+ %737:sreg_32 = S_ADD_U32 %739, %741, implicit-def $scc
+ %738:sreg_32 = S_ADDC_U32 %740, %742, implicit-def $scc, implicit $scc
+ %334:sreg_64 = REG_SEQUENCE %737, %subreg.sub0, %738, %subreg.sub1
+ %335:sreg_64 = S_LSHL_B64 %310, %321, implicit-def dead $scc
+ %745:sreg_32 = COPY %334.sub0
+ %746:sreg_32 = COPY %334.sub1
+ %747:sreg_32 = COPY %335.sub0
+ %748:sreg_32 = COPY %335.sub1
+ %743:sreg_32 = S_ADD_U32 %745, %747, implicit-def $scc
+ %744:sreg_32 = S_ADDC_U32 %746, %748, implicit-def $scc, implicit $scc
+ %33:sreg_64 = REG_SEQUENCE %743, %subreg.sub0, %744, %subreg.sub1
+ %34:sreg_32 = COPY %4.sub1
+ %336:sreg_32 = S_MOV_B32 8
+ %37:sreg_32 = S_LSHL_B32 %179, killed %336, implicit-def dead $scc
+ %38:vgpr_32 = V_ADD_U32_e64 %37, %11, 0, implicit $exec
+ %337:sreg_32 = S_MOV_B32 31
+ %338:sreg_32 = S_ASHR_I32 %34, killed %337, implicit-def dead $scc
+ %339:sreg_32 = S_MOV_B32 26
+ %340:sreg_32 = S_LSHR_B32 killed %338, killed %339, implicit-def dead $scc
+ %341:sreg_32 = S_ADD_I32 %34, killed %340, implicit-def dead $scc
+ %342:sreg_32 = S_MOV_B32 -64
+ %41:sreg_32 = S_AND_B32 killed %341, killed %342, implicit-def dead $scc
+ %39:sreg_32 = S_SUB_I32 %34, %41, implicit-def dead $scc
+ %40:vgpr_32 = nuw nsw V_LSHLREV_B32_e64 %321, %12, implicit $exec
+ %343:sreg_64 = V_CMP_LT_I32_e64 %40, %41, implicit $exec
+ %290:sgpr_32 = S_MOV_B32 0
+ %644:vgpr_32 = COPY %290, implicit $exec
+ %645:vgpr_32 = COPY %290, implicit $exec
+ %646:vgpr_32 = COPY %290, implicit $exec
+ %647:vgpr_32 = COPY %290, implicit $exec
+ %932:sreg_64 = V_CMP_LT_I32_e64 %38, %7, implicit $exec
+ %42:sreg_64 = SI_IF killed %343, %bb.12, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_BRANCH %bb.11
+
+ bb.11..lr.ph.i:
+ successors: %bb.13(0x80000000)
+
+ %347:sreg_64 = V_CMP_LT_I32_e64 %38, %7, implicit $exec
+ %43:sreg_64 = COPY %347
+ %348:sreg_32 = S_MOV_B32 64
+ %349:vgpr_32 = nsw V_ADD_U32_e64 %38, %348, 0, implicit $exec
+ %350:sreg_64 = V_CMP_LT_I32_e64 killed %349, %7, implicit $exec
+ %44:sreg_64 = COPY %350
+ %351:sreg_32 = S_MOV_B32 128
+ %352:vgpr_32 = nsw V_ADD_U32_e64 %38, killed %351, 0, implicit $exec
+ %353:sreg_64 = V_CMP_LT_I32_e64 killed %352, %7, implicit $exec
+ %45:sreg_64 = COPY %353
+ %354:sreg_32 = S_MOV_B32 192
+ %355:vgpr_32 = nsw V_ADD_U32_e64 %38, killed %354, 0, implicit $exec
+ %356:sreg_64 = V_CMP_LT_I32_e64 killed %355, %7, implicit $exec
+ %46:sreg_64 = COPY %356
+ %357:vgpr_32 = V_MUL_LO_U32_e64 %292, %40, implicit $exec
+ %47:vgpr_32 = V_ADD_U32_e64 killed %357, %292, 0, implicit $exec
+ %358:sreg_32 = S_MOV_B32 6
+ %48:sreg_32 = S_LSHL_B32 %292, %358, implicit-def dead $scc
+ %360:vgpr_32 = nuw nsw V_ADD_U32_e64 %40, %321, 0, implicit $exec
+ %49:vgpr_32 = V_MUL_LO_U32_e64 %292, %360, implicit $exec
+ %361:sreg_32 = S_MOV_B32 3
+ %362:vgpr_32 = nuw nsw V_ADD_U32_e64 %40, killed %361, 0, implicit $exec
+ %50:vgpr_32 = V_MUL_LO_U32_e64 %292, %362, implicit $exec
+ %363:vgpr_32 = V_MUL_LO_U32_e64 %12, %292, implicit $exec
+ %51:vgpr_32 = V_LSHLREV_B32_e64 %321, killed %363, implicit $exec
+ %364:vgpr_32 = V_MUL_LO_U32_e64 %295, %40, implicit $exec
+ %52:vgpr_32 = V_ADD_U32_e64 killed %364, %295, 0, implicit $exec
+ %53:sreg_32 = S_LSHL_B32 %295, %358, implicit-def dead $scc
+ %54:vgpr_32 = V_MUL_LO_U32_e64 %295, %360, implicit $exec
+ %55:vgpr_32 = V_MUL_LO_U32_e64 %295, %362, implicit $exec
+ %751:sreg_32 = COPY %333.sub0
+ %752:sreg_32 = COPY %333.sub1
+ %753:sreg_32 = COPY %335.sub0
+ %754:sreg_32 = COPY %335.sub1
+ %749:sreg_32 = S_ADD_U32 %751, %753, implicit-def $scc
+ %750:sreg_32 = S_ADDC_U32 %752, %754, implicit-def $scc, implicit $scc
+ %367:sreg_64 = REG_SEQUENCE %749, %subreg.sub0, %750, %subreg.sub1
+ %368:vgpr_32 = V_MUL_LO_U32_e64 %12, %295, implicit $exec
+ %369:vgpr_32 = V_LSHLREV_B32_e64 %321, killed %368, implicit $exec
+ %370:vgpr_32 = V_ASHRREV_I32_e64 31, %369, implicit $exec
+ %372:vreg_64_align2 = REG_SEQUENCE %369, %subreg.sub0, %370, %subreg.sub1
+ %373:vreg_64_align2 = nsw V_LSHLREV_B64_e64 %321, killed %372, implicit $exec
+ %757:sreg_32 = COPY %29.sub0
+ %758:sreg_32 = COPY %29.sub1
+ %759:sreg_32 = COPY %367.sub0
+ %760:sreg_32 = COPY %367.sub1
+ %755:sreg_32 = S_ADD_U32 %757, %759, implicit-def $scc
+ %756:sreg_32 = S_ADDC_U32 %758, %760, implicit-def $scc, implicit $scc
+ %374:sreg_64 = REG_SEQUENCE %755, %subreg.sub0, %756, %subreg.sub1
+ %765:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY %374.sub0
+ %766:vgpr_32 = COPY %373.sub0
+ %767:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY %374.sub1
+ %768:vgpr_32 = COPY %373.sub1
+ %761:vgpr_32, %763:sreg_64_xexec = V_ADD_CO_U32_e64 %765, %766, 0, implicit $exec
+ %769:vgpr_32 = COPY %767
+ %762:vgpr_32, dead %764:sreg_64_xexec = V_ADDC_U32_e64 %769, %768, killed %763, 0, implicit $exec
+ %56:vreg_64_align2 = REG_SEQUENCE %761, %subreg.sub0, %762, %subreg.sub1
+ %375:sreg_32_xm0 = S_ASHR_I32 %53, 31, implicit-def dead $scc
+ %377:sreg_64 = REG_SEQUENCE %53, %subreg.sub0, %375, %subreg.sub1
+ %57:sreg_64 = nsw S_LSHL_B64 killed %377, %321, implicit-def dead $scc
+ %346:sgpr_32 = S_MOV_B32 0
+ %344:sreg_64 = S_MOV_B64 0
+ %648:vgpr_32 = COPY %346, implicit $exec
+ %649:vgpr_32 = COPY %346, implicit $exec
+ %650:vgpr_32 = COPY %346, implicit $exec
+ %651:vgpr_32 = COPY %346, implicit $exec
+ %378:sreg_64 = COPY %43
+ %425:sreg_64 = COPY %44
+ %433:sreg_64 = COPY %45
+ %441:sreg_64 = COPY %46
+ S_BRANCH %bb.13
+
+ bb.12.Flow89:
+ successors: %bb.24(0x80000000)
+
+ %58:vgpr_32 = PHI %40, %bb.10, %98, %bb.22
+ %59:vgpr_32 = PHI %644, %bb.10, %106, %bb.22
+ %60:vgpr_32 = PHI %645, %bb.10, %105, %bb.22
+ %61:vgpr_32 = PHI %646, %bb.10, %104, %bb.22
+ %62:vgpr_32 = PHI %647, %bb.10, %103, %bb.22
+ SI_END_CF %42, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_BRANCH %bb.24
+
+ bb.13 (%ir-block.146):
+ successors: %bb.14(0x40000000), %bb.21(0x40000000)
+
+ %63:sreg_64 = PHI %344, %bb.11, %102, %bb.21
+ %64:vreg_64_align2 = PHI %56, %bb.11, %101, %bb.21
+ %65:sreg_32 = PHI %346, %bb.11, %100, %bb.21
+ %66:vgpr_32 = PHI %38, %bb.11, %99, %bb.21
+ %67:vgpr_32 = PHI %40, %bb.11, %98, %bb.21
+ %68:vgpr_32 = PHI %648, %bb.11, %97, %bb.21
+ %69:vgpr_32 = PHI %649, %bb.11, %96, %bb.21
+ %70:vgpr_32 = PHI %650, %bb.11, %95, %bb.21
+ %71:vgpr_32 = PHI %651, %bb.11, %94, %bb.21
+ %72:sreg_64 = SI_IF %378, %bb.21, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_BRANCH %bb.14
+
+ bb.14 (%ir-block.150):
+ successors: %bb.15(0x40000000), %bb.20(0x40000000)
+
+ %73:vgpr_32 = GLOBAL_LOAD_DWORD %64, 0, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.lsr.iv33, !tbaa !13, addrspace 1)
+ %379:vgpr_32 = V_ADD_U32_e64 %52, %65, 0, implicit $exec
+ %380:vgpr_32 = V_ASHRREV_I32_e64 31, %379, implicit $exec
+ %382:vreg_64_align2 = REG_SEQUENCE %379, %subreg.sub0, %380, %subreg.sub1
+ %384:vreg_64_align2 = V_LSHLREV_B64_e64 %321, killed %382, implicit $exec
+ %774:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY %33.sub0
+ %775:vgpr_32 = COPY %384.sub0
+ %776:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY %33.sub1
+ %777:vgpr_32 = COPY %384.sub1
+ %770:vgpr_32, %772:sreg_64_xexec = V_ADD_CO_U32_e64 %774, %775, 0, implicit $exec
+ %778:vgpr_32 = COPY %776
+ %771:vgpr_32, dead %773:sreg_64_xexec = V_ADDC_U32_e64 %778, %777, killed %772, 0, implicit $exec
+ %385:vreg_64_align2 = REG_SEQUENCE %770, %subreg.sub0, %771, %subreg.sub1
+ %74:vgpr_32 = GLOBAL_LOAD_DWORD killed %385, 0, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.154, !tbaa !13, addrspace 1)
+ %386:vgpr_32 = V_ADD_U32_e64 %54, %65, 0, implicit $exec
+ %387:vgpr_32 = V_ASHRREV_I32_e64 31, %386, implicit $exec
+ %389:vreg_64_align2 = REG_SEQUENCE %386, %subreg.sub0, %387, %subreg.sub1
+ %390:vreg_64_align2 = V_LSHLREV_B64_e64 %321, killed %389, implicit $exec
+ %783:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY %33.sub0
+ %784:vgpr_32 = COPY %390.sub0
+ %785:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY %33.sub1
+ %786:vgpr_32 = COPY %390.sub1
+ %779:vgpr_32, %781:sreg_64_xexec = V_ADD_CO_U32_e64 %783, %784, 0, implicit $exec
+ %787:vgpr_32 = COPY %785
+ %780:vgpr_32, dead %782:sreg_64_xexec = V_ADDC_U32_e64 %787, %786, killed %781, 0, implicit $exec
+ %391:vreg_64_align2 = REG_SEQUENCE %779, %subreg.sub0, %780, %subreg.sub1
+ %75:vgpr_32 = GLOBAL_LOAD_DWORD killed %391, 0, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.158, !tbaa !13, addrspace 1)
+ %392:vgpr_32 = V_ADD_U32_e64 %55, %65, 0, implicit $exec
+ %393:vgpr_32 = V_ASHRREV_I32_e64 31, %392, implicit $exec
+ %395:vreg_64_align2 = REG_SEQUENCE %392, %subreg.sub0, %393, %subreg.sub1
+ %396:vreg_64_align2 = V_LSHLREV_B64_e64 %321, killed %395, implicit $exec
+ %792:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY %33.sub0
+ %793:vgpr_32 = COPY %396.sub0
+ %794:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY %33.sub1
+ %795:vgpr_32 = COPY %396.sub1
+ %788:vgpr_32, %790:sreg_64_xexec = V_ADD_CO_U32_e64 %792, %793, 0, implicit $exec
+ %796:vgpr_32 = COPY %794
+ %789:vgpr_32, dead %791:sreg_64_xexec = V_ADDC_U32_e64 %796, %795, killed %790, 0, implicit $exec
+ %397:vreg_64_align2 = REG_SEQUENCE %788, %subreg.sub0, %789, %subreg.sub1
+ %76:vgpr_32 = GLOBAL_LOAD_DWORD killed %397, 0, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.162, !tbaa !13, addrspace 1)
+ %398:vgpr_32 = V_ADD_U32_e64 %51, %66, 0, implicit $exec
+ %399:vgpr_32 = V_ASHRREV_I32_e64 31, %398, implicit $exec
+ %401:vreg_64_align2 = REG_SEQUENCE %398, %subreg.sub0, %399, %subreg.sub1
+ %402:vreg_64_align2 = V_LSHLREV_B64_e64 %321, killed %401, implicit $exec
+ %801:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY %31.sub0
+ %802:vgpr_32 = COPY %402.sub0
+ %803:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY %31.sub1
+ %804:vgpr_32 = COPY %402.sub1
+ %797:vgpr_32, %799:sreg_64_xexec = V_ADD_CO_U32_e64 %801, %802, 0, implicit $exec
+ %805:vgpr_32 = COPY %803
+ %798:vgpr_32, dead %800:sreg_64_xexec = V_ADDC_U32_e64 %805, %804, killed %799, 0, implicit $exec
+ %77:vreg_64_align2 = REG_SEQUENCE %797, %subreg.sub0, %798, %subreg.sub1
+ %403:vgpr_32 = GLOBAL_LOAD_DWORD %77, 0, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.166, !tbaa !13, addrspace 1)
+ %404:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, %73, 0, killed %403, 0, %68, 0, 0, implicit $mode, implicit $exec
+ %405:vgpr_32 = V_ADD_U32_e64 %47, %66, 0, implicit $exec
+ %406:vgpr_32 = V_ASHRREV_I32_e64 31, %405, implicit $exec
+ %408:vreg_64_align2 = REG_SEQUENCE %405, %subreg.sub0, %406, %subreg.sub1
+ %409:vreg_64_align2 = V_LSHLREV_B64_e64 %321, killed %408, implicit $exec
+ %810:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY %31.sub0
+ %811:vgpr_32 = COPY %409.sub0
+ %812:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY %31.sub1
+ %813:vgpr_32 = COPY %409.sub1
+ %806:vgpr_32, %808:sreg_64_xexec = V_ADD_CO_U32_e64 %810, %811, 0, implicit $exec
+ %814:vgpr_32 = COPY %812
+ %807:vgpr_32, dead %809:sreg_64_xexec = V_ADDC_U32_e64 %814, %813, killed %808, 0, implicit $exec
+ %78:vreg_64_align2 = REG_SEQUENCE %806, %subreg.sub0, %807, %subreg.sub1
+ %410:vgpr_32 = GLOBAL_LOAD_DWORD %78, 0, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.172, !tbaa !13, addrspace 1)
+ %411:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, %74, 0, killed %410, 0, %404, 0, 0, implicit $mode, implicit $exec
+ %412:vgpr_32 = V_ADD_U32_e64 %49, %66, 0, implicit $exec
+ %413:vgpr_32 = V_ASHRREV_I32_e64 31, %412, implicit $exec
+ %415:vreg_64_align2 = REG_SEQUENCE %412, %subreg.sub0, %413, %subreg.sub1
+ %416:vreg_64_align2 = V_LSHLREV_B64_e64 %321, killed %415, implicit $exec
+ %819:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY %31.sub0
+ %820:vgpr_32 = COPY %416.sub0
+ %821:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY %31.sub1
+ %822:vgpr_32 = COPY %416.sub1
+ %815:vgpr_32, %817:sreg_64_xexec = V_ADD_CO_U32_e64 %819, %820, 0, implicit $exec
+ %823:vgpr_32 = COPY %821
+ %816:vgpr_32, dead %818:sreg_64_xexec = V_ADDC_U32_e64 %823, %822, killed %817, 0, implicit $exec
+ %79:vreg_64_align2 = REG_SEQUENCE %815, %subreg.sub0, %816, %subreg.sub1
+ %417:vgpr_32 = GLOBAL_LOAD_DWORD %79, 0, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.178, !tbaa !13, addrspace 1)
+ %418:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, %75, 0, killed %417, 0, %411, 0, 0, implicit $mode, implicit $exec
+ %419:vgpr_32 = V_ADD_U32_e64 %50, %66, 0, implicit $exec
+ %420:vgpr_32 = V_ASHRREV_I32_e64 31, %419, implicit $exec
+ %422:vreg_64_align2 = REG_SEQUENCE %419, %subreg.sub0, %420, %subreg.sub1
+ %423:vreg_64_align2 = V_LSHLREV_B64_e64 %321, killed %422, implicit $exec
+ %828:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY %31.sub0
+ %829:vgpr_32 = COPY %423.sub0
+ %830:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY %31.sub1
+ %831:vgpr_32 = COPY %423.sub1
+ %824:vgpr_32, %826:sreg_64_xexec = V_ADD_CO_U32_e64 %828, %829, 0, implicit $exec
+ %832:vgpr_32 = COPY %830
+ %825:vgpr_32, dead %827:sreg_64_xexec = V_ADDC_U32_e64 %832, %831, killed %826, 0, implicit $exec
+ %80:vreg_64_align2 = REG_SEQUENCE %824, %subreg.sub0, %825, %subreg.sub1
+ %424:vgpr_32 = GLOBAL_LOAD_DWORD %80, 0, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.184, !tbaa !13, addrspace 1)
+ %81:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, %76, 0, killed %424, 0, %418, 0, 0, implicit $mode, implicit $exec
+ %82:sreg_64 = SI_IF %425, %bb.20, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_BRANCH %bb.15
+
+ bb.15 (%ir-block.191):
+ successors: %bb.16(0x40000000), %bb.19(0x40000000)
+
+ %426:vgpr_32 = GLOBAL_LOAD_DWORD %77, 256, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.192, !tbaa !13, addrspace 1)
+ %427:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, %73, 0, killed %426, 0, %69, 0, 0, implicit $mode, implicit $exec
+ %428:vgpr_32 = GLOBAL_LOAD_DWORD %78, 256, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.196, !tbaa !13, addrspace 1)
+ %429:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, %74, 0, killed %428, 0, %427, 0, 0, implicit $mode, implicit $exec
+ %430:vgpr_32 = GLOBAL_LOAD_DWORD %79, 256, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.200, !tbaa !13, addrspace 1)
+ %431:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, %75, 0, killed %430, 0, %429, 0, 0, implicit $mode, implicit $exec
+ %432:vgpr_32 = GLOBAL_LOAD_DWORD %80, 256, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.204, !tbaa !13, addrspace 1)
+ %83:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, %76, 0, killed %432, 0, %431, 0, 0, implicit $mode, implicit $exec
+ %84:sreg_64 = SI_IF %433, %bb.19, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_BRANCH %bb.16
+
+ bb.16 (%ir-block.211):
+ successors: %bb.17(0x40000000), %bb.18(0x40000000)
+
+ %434:vgpr_32 = GLOBAL_LOAD_DWORD %77, 512, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.212, !tbaa !13, addrspace 1)
+ %435:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, %73, 0, killed %434, 0, %70, 0, 0, implicit $mode, implicit $exec
+ %436:vgpr_32 = GLOBAL_LOAD_DWORD %78, 512, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.216, !tbaa !13, addrspace 1)
+ %437:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, %74, 0, killed %436, 0, %435, 0, 0, implicit $mode, implicit $exec
+ %438:vgpr_32 = GLOBAL_LOAD_DWORD %79, 512, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.220, !tbaa !13, addrspace 1)
+ %439:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, %75, 0, killed %438, 0, %437, 0, 0, implicit $mode, implicit $exec
+ %440:vgpr_32 = GLOBAL_LOAD_DWORD %80, 512, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.224, !tbaa !13, addrspace 1)
+ %85:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, %76, 0, killed %440, 0, %439, 0, 0, implicit $mode, implicit $exec
+ %86:sreg_64 = SI_IF %441, %bb.18, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_BRANCH %bb.17
+
+ bb.17 (%ir-block.231):
+ successors: %bb.18(0x80000000)
+
+ %442:vgpr_32 = GLOBAL_LOAD_DWORD %77, 768, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.232, !tbaa !13, addrspace 1)
+ %443:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, %73, 0, killed %442, 0, %71, 0, 0, implicit $mode, implicit $exec
+ %444:vgpr_32 = GLOBAL_LOAD_DWORD %78, 768, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.236, !tbaa !13, addrspace 1)
+ %445:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, %74, 0, killed %444, 0, %443, 0, 0, implicit $mode, implicit $exec
+ %446:vgpr_32 = GLOBAL_LOAD_DWORD %79, 768, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.240, !tbaa !13, addrspace 1)
+ %447:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, %75, 0, killed %446, 0, %445, 0, 0, implicit $mode, implicit $exec
+ %448:vgpr_32 = GLOBAL_LOAD_DWORD %80, 768, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.244, !tbaa !13, addrspace 1)
+ %87:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, %76, 0, killed %448, 0, %447, 0, 0, implicit $mode, implicit $exec
+
+ bb.18.Flow85:
+ successors: %bb.19(0x80000000)
+
+ %88:vgpr_32 = PHI %71, %bb.16, %87, %bb.17
+ SI_END_CF %86, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+
+ bb.19.Flow86:
+ successors: %bb.20(0x80000000)
+
+ %89:vgpr_32 = PHI %70, %bb.15, %85, %bb.18
+ %90:vgpr_32 = PHI %71, %bb.15, %88, %bb.18
+ SI_END_CF %84, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+
+ bb.20.Flow87:
+ successors: %bb.21(0x80000000)
+
+ %91:vgpr_32 = PHI %69, %bb.14, %83, %bb.19
+ %92:vgpr_32 = PHI %70, %bb.14, %89, %bb.19
+ %93:vgpr_32 = PHI %71, %bb.14, %90, %bb.19
+ SI_END_CF %82, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+
+ bb.21 (%ir-block.254):
+ successors: %bb.22(0x04000000), %bb.13(0x7c000000)
+
+ %94:vgpr_32 = PHI %71, %bb.13, %93, %bb.20
+ %95:vgpr_32 = PHI %70, %bb.13, %92, %bb.20
+ %96:vgpr_32 = PHI %69, %bb.13, %91, %bb.20
+ %97:vgpr_32 = PHI %68, %bb.13, %81, %bb.20
+ SI_END_CF %72, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ %98:vgpr_32 = nuw nsw V_ADD_U32_e64 %67, %348, 0, implicit $exec
+ %99:vgpr_32 = V_ADD_U32_e64 %66, %48, 0, implicit $exec
+ %100:sreg_32 = S_ADD_I32 %65, %53, implicit-def dead $scc
+ %837:vgpr_32 = COPY %64.sub0
+ %838:sreg_32_xm0 = COPY %57.sub0
+ %839:vgpr_32 = COPY %64.sub1
+ %840:sreg_32_xm0 = COPY %57.sub1
+ %833:vgpr_32, %835:sreg_64_xexec = V_ADD_CO_U32_e64 %837, %838, 0, implicit $exec
+ %841:vgpr_32 = COPY %840
+ %834:vgpr_32, dead %836:sreg_64_xexec = V_ADDC_U32_e64 %839, %841, killed %835, 0, implicit $exec
+ %101:vreg_64_align2 = REG_SEQUENCE %833, %subreg.sub0, %834, %subreg.sub1
+ %450:sreg_64 = V_CMP_GE_I32_e64 %98, %41, implicit $exec
+ %102:sreg_64 = SI_IF_BREAK killed %450, %63, implicit-def dead $scc
+ SI_LOOP %102, %bb.13, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_BRANCH %bb.22
+
+ bb.22.Flow88:
+ successors: %bb.12(0x80000000)
+
+ %103:vgpr_32 = PHI %94, %bb.21
+ %104:vgpr_32 = PHI %95, %bb.21
+ %105:vgpr_32 = PHI %96, %bb.21
+ %106:vgpr_32 = PHI %97, %bb.21
+ SI_END_CF %102, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_BRANCH %bb.12
+
+ bb.23.Flow93:
+ successors: %bb.51(0x40000000), %bb.52(0x40000000)
+
+ %109:vgpr_32 = PHI %15, %bb.4, %165, %bb.45
+ %110:vreg_64_align2 = PHI %16, %bb.4, %166, %bb.45
+ %111:sreg_64 = PHI %17, %bb.4, %167, %bb.45
+ %627:sreg_64 = COPY %111
+ %112:sreg_64 = SI_IF %627, %bb.52, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_BRANCH %bb.51
+
+ bb.24.._crit_edge.i:
+ successors: %bb.25(0x50000000), %bb.33(0x30000000)
+
+ %451:sreg_32 = S_MOV_B32 1
+ S_CMP_LT_I32 %39, killed %451, implicit-def $scc
+ S_CBRANCH_SCC1 %bb.33, implicit $scc
+ S_BRANCH %bb.25
+
+ bb.25 (%ir-block.266):
+ successors: %bb.26(0x40000000), %bb.34(0x40000000)
+
+ %453:sreg_64 = V_CMP_LT_I32_e64 %58, %34, implicit $exec
+ %113:sreg_64_xexec = COPY %453
+ %452:sgpr_32 = S_MOV_B32 0
+ %658:vgpr_32 = COPY %452, implicit $exec
+ %659:vgpr_32 = COPY %452, implicit $exec
+ %660:vgpr_32 = COPY %452, implicit $exec
+ %661:vgpr_32 = COPY %452, implicit $exec
+ %114:sreg_64 = SI_IF %453, %bb.34, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_BRANCH %bb.26
+
+ bb.26 (%ir-block.271):
+ successors: %bb.27(0x40000000), %bb.32(0x40000000)
+
+ %455:vgpr_32 = nsw V_MUL_LO_U32_e64 %58, %295, implicit $exec
+ %456:vgpr_32 = V_ASHRREV_I32_e64 31, %455, implicit $exec
+ %458:vreg_64_align2 = REG_SEQUENCE %455, %subreg.sub0, %456, %subreg.sub1
+ %459:sreg_32 = S_MOV_B32 2
+ %460:vreg_64_align2 = V_LSHLREV_B64_e64 %459, killed %458, implicit $exec
+ %846:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY %33.sub0
+ %847:vgpr_32 = COPY %460.sub0
+ %848:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY %33.sub1
+ %849:vgpr_32 = COPY %460.sub1
+ %842:vgpr_32, %844:sreg_64_xexec = V_ADD_CO_U32_e64 %846, %847, 0, implicit $exec
+ %850:vgpr_32 = COPY %848
+ %843:vgpr_32, dead %845:sreg_64_xexec = V_ADDC_U32_e64 %850, %849, killed %844, 0, implicit $exec
+ %461:vreg_64_align2 = REG_SEQUENCE %842, %subreg.sub0, %843, %subreg.sub1
+ %115:vgpr_32 = GLOBAL_LOAD_DWORD killed %461, 0, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.274, !tbaa !13, addrspace 1)
+ %462:sreg_32 = S_MOV_B32 1
+ %116:vgpr_32 = V_OR_B32_e64 %58, killed %462, implicit $exec
+ %463:sreg_64 = V_CMP_LT_I32_e64 %116, %34, implicit $exec
+ %454:sgpr_32 = S_MOV_B32 0
+ %655:vgpr_32 = COPY %454, implicit $exec
+ %656:vgpr_32 = COPY %454, implicit $exec
+ %657:vgpr_32 = COPY %454, implicit $exec
+ %117:sreg_64 = SI_IF killed %463, %bb.32, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_BRANCH %bb.27
+
+ bb.27 (%ir-block.281):
+ successors: %bb.28(0x40000000), %bb.31(0x40000000)
+
+ %465:vgpr_32 = nsw V_MUL_LO_U32_e64 %116, %295, implicit $exec
+ %466:vgpr_32 = V_ASHRREV_I32_e64 31, %465, implicit $exec
+ %468:vreg_64_align2 = REG_SEQUENCE %465, %subreg.sub0, %466, %subreg.sub1
+ %470:vreg_64_align2 = V_LSHLREV_B64_e64 %459, killed %468, implicit $exec
+ %855:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY %33.sub0
+ %856:vgpr_32 = COPY %470.sub0
+ %857:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY %33.sub1
+ %858:vgpr_32 = COPY %470.sub1
+ %851:vgpr_32, %853:sreg_64_xexec = V_ADD_CO_U32_e64 %855, %856, 0, implicit $exec
+ %859:vgpr_32 = COPY %857
+ %852:vgpr_32, dead %854:sreg_64_xexec = V_ADDC_U32_e64 %859, %858, killed %853, 0, implicit $exec
+ %471:vreg_64_align2 = REG_SEQUENCE %851, %subreg.sub0, %852, %subreg.sub1
+ %118:vgpr_32 = GLOBAL_LOAD_DWORD killed %471, 0, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.284, !tbaa !13, addrspace 1)
+ %119:vgpr_32 = V_OR_B32_e64 %58, %459, implicit $exec
+ %472:sreg_64 = V_CMP_LT_I32_e64 %119, %34, implicit $exec
+ %464:sgpr_32 = S_MOV_B32 0
+ %653:vgpr_32 = COPY %464, implicit $exec
+ %654:vgpr_32 = COPY %464, implicit $exec
+ %120:sreg_64 = SI_IF killed %472, %bb.31, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_BRANCH %bb.28
+
+ bb.28 (%ir-block.291):
+ successors: %bb.29(0x40000000), %bb.30(0x40000000)
+
+ %474:vgpr_32 = nsw V_MUL_LO_U32_e64 %119, %295, implicit $exec
+ %475:vgpr_32 = V_ASHRREV_I32_e64 31, %474, implicit $exec
+ %477:vreg_64_align2 = REG_SEQUENCE %474, %subreg.sub0, %475, %subreg.sub1
+ %478:sreg_32 = S_MOV_B32 2
+ %479:vreg_64_align2 = V_LSHLREV_B64_e64 %478, killed %477, implicit $exec
+ %864:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY %33.sub0
+ %865:vgpr_32 = COPY %479.sub0
+ %866:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY %33.sub1
+ %867:vgpr_32 = COPY %479.sub1
+ %860:vgpr_32, %862:sreg_64_xexec = V_ADD_CO_U32_e64 %864, %865, 0, implicit $exec
+ %868:vgpr_32 = COPY %866
+ %861:vgpr_32, dead %863:sreg_64_xexec = V_ADDC_U32_e64 %868, %867, killed %862, 0, implicit $exec
+ %480:vreg_64_align2 = REG_SEQUENCE %860, %subreg.sub0, %861, %subreg.sub1
+ %121:vgpr_32 = GLOBAL_LOAD_DWORD killed %480, 0, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.294, !tbaa !13, addrspace 1)
+ %481:sreg_32 = S_MOV_B32 3
+ %122:vgpr_32 = V_OR_B32_e64 %58, killed %481, implicit $exec
+ %482:sreg_64 = V_CMP_LT_I32_e64 %122, %34, implicit $exec
+ %652:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ %123:sreg_64 = SI_IF killed %482, %bb.30, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_BRANCH %bb.29
+
+ bb.29 (%ir-block.301):
+ successors: %bb.30(0x80000000)
+
+ %483:vgpr_32 = nsw V_MUL_LO_U32_e64 %122, %295, implicit $exec
+ %484:vgpr_32 = V_ASHRREV_I32_e64 31, %483, implicit $exec
+ %486:vreg_64_align2 = REG_SEQUENCE %483, %subreg.sub0, %484, %subreg.sub1
+ %488:vreg_64_align2 = V_LSHLREV_B64_e64 %478, killed %486, implicit $exec
+ %873:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY %33.sub0
+ %874:vgpr_32 = COPY %488.sub0
+ %875:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY %33.sub1
+ %876:vgpr_32 = COPY %488.sub1
+ %869:vgpr_32, %871:sreg_64_xexec = V_ADD_CO_U32_e64 %873, %874, 0, implicit $exec
+ %877:vgpr_32 = COPY %875
+ %870:vgpr_32, dead %872:sreg_64_xexec = V_ADDC_U32_e64 %877, %876, killed %871, 0, implicit $exec
+ %489:vreg_64_align2 = REG_SEQUENCE %869, %subreg.sub0, %870, %subreg.sub1
+ %124:vgpr_32 = GLOBAL_LOAD_DWORD killed %489, 0, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.304, !tbaa !13, addrspace 1)
+
+ bb.30.Flow81:
+ successors: %bb.31(0x80000000)
+
+ %125:vgpr_32 = PHI %652, %bb.28, %124, %bb.29
+ SI_END_CF %123, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+
+ bb.31.Flow82:
+ successors: %bb.32(0x80000000)
+
+ %126:vgpr_32 = PHI %653, %bb.27, %121, %bb.30
+ %127:vgpr_32 = PHI %654, %bb.27, %125, %bb.30
+ SI_END_CF %120, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+
+ bb.32.Flow83:
+ successors: %bb.34(0x80000000)
+
+ %128:vgpr_32 = PHI %655, %bb.26, %118, %bb.31
+ %129:vgpr_32 = PHI %656, %bb.26, %126, %bb.31
+ %130:vgpr_32 = PHI %657, %bb.26, %127, %bb.31
+ SI_END_CF %117, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_BRANCH %bb.34
+
+ bb.33.Flow84:
+ successors: %bb.43(0x80000000)
+
+ %131:vgpr_32 = PHI %59, %bb.24, %157, %bb.42
+ %132:vgpr_32 = PHI %60, %bb.24, %158, %bb.42
+ %133:vgpr_32 = PHI %61, %bb.24, %159, %bb.42
+ %134:vgpr_32 = PHI %62, %bb.24, %160, %bb.42
+ S_BRANCH %bb.43
+
+ bb.34 (%ir-block.316):
+ successors: %bb.35(0x40000000), %bb.42(0x40000000)
+
+ %135:vgpr_32 = PHI %658, %bb.25, %130, %bb.32
+ %136:vgpr_32 = PHI %659, %bb.25, %129, %bb.32
+ %137:vgpr_32 = PHI %660, %bb.25, %128, %bb.32
+ %138:vgpr_32 = PHI %661, %bb.25, %115, %bb.32
+ SI_END_CF %114, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ %490:sreg_64 = V_CMP_LT_I32_e64 %38, %7, implicit $exec
+ %139:sreg_64 = SI_IF killed %490, %bb.42, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_BRANCH %bb.35
+
+ bb.35 (%ir-block.321):
+ successors: %bb.36(0x40000000), %bb.41(0x40000000)
+
+ %491:vgpr_32 = nsw V_MUL_LO_U32_e64 %58, %292, implicit $exec
+ %492:sreg_32 = S_MOV_B32 0
+ %495:vgpr_32 = COPY %492
+ %493:vgpr_32 = V_CNDMASK_B32_e64 0, %495, 0, killed %491, %113, implicit $exec
+ %496:vgpr_32 = nsw V_ADD_U32_e64 killed %493, %38, 0, implicit $exec
+ %497:vgpr_32 = V_ASHRREV_I32_e64 31, %496, implicit $exec
+ %499:vreg_64_align2 = REG_SEQUENCE %496, %subreg.sub0, %497, %subreg.sub1
+ %500:sreg_32 = S_MOV_B32 2
+ %501:vreg_64_align2 = V_LSHLREV_B64_e64 %500, killed %499, implicit $exec
+ %882:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY %31.sub0
+ %883:vgpr_32 = COPY %501.sub0
+ %884:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY %31.sub1
+ %885:vgpr_32 = COPY %501.sub1
+ %878:vgpr_32, %880:sreg_64_xexec = V_ADD_CO_U32_e64 %882, %883, 0, implicit $exec
+ %886:vgpr_32 = COPY %884
+ %879:vgpr_32, dead %881:sreg_64_xexec = V_ADDC_U32_e64 %886, %885, killed %880, 0, implicit $exec
+ %140:vreg_64_align2 = REG_SEQUENCE %878, %subreg.sub0, %879, %subreg.sub1
+ %502:vgpr_32 = GLOBAL_LOAD_DWORD %140, 0, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.326, !tbaa !13, addrspace 1)
+ %503:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, %138, 0, killed %502, 0, %59, 0, 0, implicit $mode, implicit $exec
+ %504:sreg_32 = S_MOV_B32 1
+ %505:vgpr_32 = V_OR_B32_e64 %58, killed %504, implicit $exec
+ %506:vgpr_32 = nsw V_MUL_LO_U32_e64 %505, %292, implicit $exec
+ %507:sreg_64_xexec = V_CMP_LT_I32_e64 %505, %34, implicit $exec
+ %509:vgpr_32 = COPY %492
+ %508:vgpr_32 = V_CNDMASK_B32_e64 0, %509, 0, killed %506, killed %507, implicit $exec
+ %510:vgpr_32 = nsw V_ADD_U32_e64 killed %508, %38, 0, implicit $exec
+ %511:vgpr_32 = V_ASHRREV_I32_e64 31, %510, implicit $exec
+ %513:vreg_64_align2 = REG_SEQUENCE %510, %subreg.sub0, %511, %subreg.sub1
+ %514:vreg_64_align2 = V_LSHLREV_B64_e64 %500, killed %513, implicit $exec
+ %891:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY %31.sub0
+ %892:vgpr_32 = COPY %514.sub0
+ %893:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY %31.sub1
+ %894:vgpr_32 = COPY %514.sub1
+ %887:vgpr_32, %889:sreg_64_xexec = V_ADD_CO_U32_e64 %891, %892, 0, implicit $exec
+ %895:vgpr_32 = COPY %893
+ %888:vgpr_32, dead %890:sreg_64_xexec = V_ADDC_U32_e64 %895, %894, killed %889, 0, implicit $exec
+ %141:vreg_64_align2 = REG_SEQUENCE %887, %subreg.sub0, %888, %subreg.sub1
+ %515:vgpr_32 = GLOBAL_LOAD_DWORD %141, 0, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.336, !tbaa !13, addrspace 1)
+ %516:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, %137, 0, killed %515, 0, %503, 0, 0, implicit $mode, implicit $exec
+ %517:vgpr_32 = V_OR_B32_e64 %58, %500, implicit $exec
+ %518:vgpr_32 = nsw V_MUL_LO_U32_e64 %517, %292, implicit $exec
+ %519:sreg_64_xexec = V_CMP_LT_I32_e64 %517, %34, implicit $exec
+ %521:vgpr_32 = COPY %492
+ %520:vgpr_32 = V_CNDMASK_B32_e64 0, %521, 0, killed %518, killed %519, implicit $exec
+ %522:vgpr_32 = nsw V_ADD_U32_e64 killed %520, %38, 0, implicit $exec
+ %523:vgpr_32 = V_ASHRREV_I32_e64 31, %522, implicit $exec
+ %525:vreg_64_align2 = REG_SEQUENCE %522, %subreg.sub0, %523, %subreg.sub1
+ %526:vreg_64_align2 = V_LSHLREV_B64_e64 %500, killed %525, implicit $exec
+ %900:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY %31.sub0
+ %901:vgpr_32 = COPY %526.sub0
+ %902:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY %31.sub1
+ %903:vgpr_32 = COPY %526.sub1
+ %896:vgpr_32, %898:sreg_64_xexec = V_ADD_CO_U32_e64 %900, %901, 0, implicit $exec
+ %904:vgpr_32 = COPY %902
+ %897:vgpr_32, dead %899:sreg_64_xexec = V_ADDC_U32_e64 %904, %903, killed %898, 0, implicit $exec
+ %142:vreg_64_align2 = REG_SEQUENCE %896, %subreg.sub0, %897, %subreg.sub1
+ %527:vgpr_32 = GLOBAL_LOAD_DWORD %142, 0, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.346, !tbaa !13, addrspace 1)
+ %528:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, %136, 0, killed %527, 0, %516, 0, 0, implicit $mode, implicit $exec
+ %529:sreg_32 = S_MOV_B32 3
+ %530:vgpr_32 = V_OR_B32_e64 %58, killed %529, implicit $exec
+ %531:vgpr_32 = nsw V_MUL_LO_U32_e64 %530, %292, implicit $exec
+ %532:sreg_64_xexec = V_CMP_LT_I32_e64 %530, %34, implicit $exec
+ %534:vgpr_32 = COPY %492
+ %533:vgpr_32 = V_CNDMASK_B32_e64 0, %534, 0, killed %531, killed %532, implicit $exec
+ %535:vgpr_32 = nsw V_ADD_U32_e64 killed %533, %38, 0, implicit $exec
+ %536:vgpr_32 = V_ASHRREV_I32_e64 31, %535, implicit $exec
+ %538:vreg_64_align2 = REG_SEQUENCE %535, %subreg.sub0, %536, %subreg.sub1
+ %539:vreg_64_align2 = V_LSHLREV_B64_e64 %500, killed %538, implicit $exec
+ %909:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY %31.sub0
+ %910:vgpr_32 = COPY %539.sub0
+ %911:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY %31.sub1
+ %912:vgpr_32 = COPY %539.sub1
+ %905:vgpr_32, %907:sreg_64_xexec = V_ADD_CO_U32_e64 %909, %910, 0, implicit $exec
+ %913:vgpr_32 = COPY %911
+ %906:vgpr_32, dead %908:sreg_64_xexec = V_ADDC_U32_e64 %913, %912, killed %907, 0, implicit $exec
+ %143:vreg_64_align2 = REG_SEQUENCE %905, %subreg.sub0, %906, %subreg.sub1
+ %540:vgpr_32 = GLOBAL_LOAD_DWORD %143, 0, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.356, !tbaa !13, addrspace 1)
+ %144:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, %135, 0, killed %540, 0, %528, 0, 0, implicit $mode, implicit $exec
+ %541:sreg_32 = S_MOV_B32 64
+ %542:vgpr_32 = nsw V_ADD_U32_e64 %38, killed %541, 0, implicit $exec
+ %543:sreg_64 = V_CMP_LT_I32_e64 killed %542, %7, implicit $exec
+ %145:sreg_64 = SI_IF killed %543, %bb.41, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_BRANCH %bb.36
+
+ bb.36 (%ir-block.365):
+ successors: %bb.37(0x40000000), %bb.40(0x40000000)
+
+ %544:vgpr_32 = GLOBAL_LOAD_DWORD %140, 256, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.366, !tbaa !13, addrspace 1)
+ %545:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, %138, 0, killed %544, 0, %60, 0, 0, implicit $mode, implicit $exec
+ %546:vgpr_32 = GLOBAL_LOAD_DWORD %141, 256, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.370, !tbaa !13, addrspace 1)
+ %547:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, %137, 0, killed %546, 0, %545, 0, 0, implicit $mode, implicit $exec
+ %548:vgpr_32 = GLOBAL_LOAD_DWORD %142, 256, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.374, !tbaa !13, addrspace 1)
+ %549:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, %136, 0, killed %548, 0, %547, 0, 0, implicit $mode, implicit $exec
+ %550:vgpr_32 = GLOBAL_LOAD_DWORD %143, 256, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.378, !tbaa !13, addrspace 1)
+ %146:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, %135, 0, killed %550, 0, %549, 0, 0, implicit $mode, implicit $exec
+ %551:sreg_32 = S_MOV_B32 128
+ %552:vgpr_32 = nsw V_ADD_U32_e64 %38, killed %551, 0, implicit $exec
+ %553:sreg_64 = V_CMP_LT_I32_e64 killed %552, %7, implicit $exec
+ %147:sreg_64 = SI_IF killed %553, %bb.40, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_BRANCH %bb.37
+
+ bb.37 (%ir-block.387):
+ successors: %bb.38(0x40000000), %bb.39(0x40000000)
+
+ %554:vgpr_32 = GLOBAL_LOAD_DWORD %140, 512, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.388, !tbaa !13, addrspace 1)
+ %555:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, %138, 0, killed %554, 0, %61, 0, 0, implicit $mode, implicit $exec
+ %556:vgpr_32 = GLOBAL_LOAD_DWORD %141, 512, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.392, !tbaa !13, addrspace 1)
+ %557:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, %137, 0, killed %556, 0, %555, 0, 0, implicit $mode, implicit $exec
+ %558:vgpr_32 = GLOBAL_LOAD_DWORD %142, 512, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.396, !tbaa !13, addrspace 1)
+ %559:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, %136, 0, killed %558, 0, %557, 0, 0, implicit $mode, implicit $exec
+ %560:vgpr_32 = GLOBAL_LOAD_DWORD %143, 512, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.400, !tbaa !13, addrspace 1)
+ %148:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, %135, 0, killed %560, 0, %559, 0, 0, implicit $mode, implicit $exec
+ %561:sreg_32 = S_MOV_B32 192
+ %562:vgpr_32 = nsw V_ADD_U32_e64 %38, killed %561, 0, implicit $exec
+ %563:sreg_64 = V_CMP_LT_I32_e64 killed %562, %7, implicit $exec
+ %149:sreg_64 = SI_IF killed %563, %bb.39, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_BRANCH %bb.38
+
+ bb.38 (%ir-block.409):
+ successors: %bb.39(0x80000000)
+
+ %564:vgpr_32 = GLOBAL_LOAD_DWORD %140, 768, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.410, !tbaa !13, addrspace 1)
+ %565:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, %138, 0, killed %564, 0, %62, 0, 0, implicit $mode, implicit $exec
+ %566:vgpr_32 = GLOBAL_LOAD_DWORD %141, 768, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.414, !tbaa !13, addrspace 1)
+ %567:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, %137, 0, killed %566, 0, %565, 0, 0, implicit $mode, implicit $exec
+ %568:vgpr_32 = GLOBAL_LOAD_DWORD %142, 768, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.418, !tbaa !13, addrspace 1)
+ %569:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, %136, 0, killed %568, 0, %567, 0, 0, implicit $mode, implicit $exec
+ %570:vgpr_32 = GLOBAL_LOAD_DWORD %143, 768, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.422, !tbaa !13, addrspace 1)
+ %150:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, %135, 0, killed %570, 0, %569, 0, 0, implicit $mode, implicit $exec
+
+ bb.39.Flow77:
+ successors: %bb.40(0x80000000)
+
+ %151:vgpr_32 = PHI %62, %bb.37, %150, %bb.38
+ SI_END_CF %149, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+
+ bb.40.Flow78:
+ successors: %bb.41(0x80000000)
+
+ %152:vgpr_32 = PHI %61, %bb.36, %148, %bb.39
+ %153:vgpr_32 = PHI %62, %bb.36, %151, %bb.39
+ SI_END_CF %147, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+
+ bb.41.Flow79:
+ successors: %bb.42(0x80000000)
+
+ %154:vgpr_32 = PHI %60, %bb.35, %146, %bb.40
+ %155:vgpr_32 = PHI %61, %bb.35, %152, %bb.40
+ %156:vgpr_32 = PHI %62, %bb.35, %153, %bb.40
+ SI_END_CF %145, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+
+ bb.42.Flow80:
+ successors: %bb.33(0x80000000)
+
+ %157:vgpr_32 = PHI %59, %bb.34, %144, %bb.41
+ %158:vgpr_32 = PHI %60, %bb.34, %154, %bb.41
+ %159:vgpr_32 = PHI %61, %bb.34, %155, %bb.41
+ %160:vgpr_32 = PHI %62, %bb.34, %156, %bb.41
+ SI_END_CF %139, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_BRANCH %bb.33
+
+ bb.43 (%ir-block.436):
+ successors: %bb.44(0x40000000), %bb.45(0x40000000)
+
+ %573:sreg_32 = S_MOV_B32 8
+ %574:vgpr_32 = nuw nsw V_LSHLREV_B32_e64 killed %573, %12, implicit $exec
+ %575:sreg_32 = S_MOV_B32 2
+ %576:vgpr_32 = V_ADD_LSHL_U32_e64 killed %574, %11, %575, implicit $exec
+ DS_WRITE_B32_gfx9 %576, %131, 0, 0, implicit $exec :: (store (s32) into %ir.439, !tbaa !13, addrspace 3)
+ DS_WRITE_B32_gfx9 %576, %132, 256, 0, implicit $exec :: (store (s32) into %ir.440, !tbaa !13, addrspace 3)
+ DS_WRITE_B32_gfx9 %576, %133, 512, 0, implicit $exec :: (store (s32) into %ir.441, !tbaa !13, addrspace 3)
+ DS_WRITE_B32_gfx9 %576, %134, 768, 0, implicit $exec :: (store (s32) into %ir.442, !tbaa !13, addrspace 3)
+ ATOMIC_FENCE 5, 3
+ S_BARRIER
+ ATOMIC_FENCE 4, 3
+ %577:sreg_32 = S_MOV_B32 256
+ %578:sreg_64 = V_CMP_LT_U32_e64 %13, killed %577, implicit $exec
+ %572:sreg_64 = IMPLICIT_DEF
+ %571:sgpr_32 = IMPLICIT_DEF
+ %662:vgpr_32 = COPY %571
+ %663:vreg_64_align2 = COPY %572
+ %161:sreg_64 = SI_IF killed %578, %bb.45, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_BRANCH %bb.44
+
+ bb.44..preheader.i:
+ successors: %bb.46(0x40000000), %bb.50(0x40000000)
+
+ %582:vgpr_32 = V_LSHLREV_B32_e64 %575, %13, implicit $exec
+ %583:vgpr_32 = DS_READ_B32_gfx9 %582, 0, 0, implicit $exec :: (load (s32) from %ir.447, !tbaa !13, addrspace 3)
+ %584:vgpr_32 = DS_READ_B32_gfx9 %582, 1024, 0, implicit $exec :: (load (s32) from %ir.448, !tbaa !13, addrspace 3)
+ %585:vgpr_32 = contract nofpexcept V_ADD_F32_e64 0, killed %583, 0, killed %584, 0, 0, implicit $mode, implicit $exec
+ %586:vgpr_32 = DS_READ_B32_gfx9 %582, 2048, 0, implicit $exec :: (load (s32) from %ir.451, !tbaa !13, addrspace 3)
+ %587:vgpr_32 = contract nofpexcept V_ADD_F32_e64 0, killed %586, 0, killed %585, 0, 0, implicit $mode, implicit $exec
+ %588:vgpr_32 = DS_READ_B32_gfx9 %582, 3072, 0, implicit $exec :: (load (s32) from %ir.454, !tbaa !13, addrspace 3)
+ %589:vgpr_32 = contract nofpexcept V_ADD_F32_e64 0, killed %588, 0, killed %587, 0, 0, implicit $mode, implicit $exec
+ %590:vgpr_32 = DS_READ_B32_gfx9 %582, 4096, 0, implicit $exec :: (load (s32) from %ir.457, !tbaa !13, addrspace 3)
+ %591:vgpr_32 = contract nofpexcept V_ADD_F32_e64 0, killed %590, 0, killed %589, 0, 0, implicit $mode, implicit $exec
+ %592:vgpr_32 = DS_READ_B32_gfx9 %582, 5120, 0, implicit $exec :: (load (s32) from %ir.460, !tbaa !13, addrspace 3)
+ %593:vgpr_32 = contract nofpexcept V_ADD_F32_e64 0, killed %592, 0, killed %591, 0, 0, implicit $mode, implicit $exec
+ %594:vgpr_32 = DS_READ_B32_gfx9 %582, 6144, 0, implicit $exec :: (load (s32) from %ir.463, !tbaa !13, addrspace 3)
+ %595:vgpr_32 = contract nofpexcept V_ADD_F32_e64 0, killed %594, 0, killed %593, 0, 0, implicit $mode, implicit $exec
+ %596:vgpr_32 = DS_READ_B32_gfx9 %582, 7168, 0, implicit $exec :: (load (s32) from %ir.466, !tbaa !13, addrspace 3)
+ %597:vgpr_32 = contract nofpexcept V_ADD_F32_e64 0, killed %596, 0, killed %595, 0, 0, implicit $mode, implicit $exec
+ %598:vgpr_32 = DS_READ_B32_gfx9 %582, 8192, 0, implicit $exec :: (load (s32) from %ir.469, !tbaa !13, addrspace 3)
+ %599:vgpr_32 = contract nofpexcept V_ADD_F32_e64 0, killed %598, 0, killed %597, 0, 0, implicit $mode, implicit $exec
+ %600:vgpr_32 = DS_READ_B32_gfx9 %582, 9216, 0, implicit $exec :: (load (s32) from %ir.472, !tbaa !13, addrspace 3)
+ %601:vgpr_32 = contract nofpexcept V_ADD_F32_e64 0, killed %600, 0, killed %599, 0, 0, implicit $mode, implicit $exec
+ %602:vgpr_32 = DS_READ_B32_gfx9 %582, 10240, 0, implicit $exec :: (load (s32) from %ir.475, !tbaa !13, addrspace 3)
+ %603:vgpr_32 = contract nofpexcept V_ADD_F32_e64 0, killed %602, 0, killed %601, 0, 0, implicit $mode, implicit $exec
+ %604:vgpr_32 = DS_READ_B32_gfx9 %582, 11264, 0, implicit $exec :: (load (s32) from %ir.478, !tbaa !13, addrspace 3)
+ %605:vgpr_32 = contract nofpexcept V_ADD_F32_e64 0, killed %604, 0, killed %603, 0, 0, implicit $mode, implicit $exec
+ %606:vgpr_32 = DS_READ_B32_gfx9 %582, 12288, 0, implicit $exec :: (load (s32) from %ir.481, !tbaa !13, addrspace 3)
+ %607:vgpr_32 = contract nofpexcept V_ADD_F32_e64 0, killed %606, 0, killed %605, 0, 0, implicit $mode, implicit $exec
+ %608:vgpr_32 = DS_READ_B32_gfx9 %582, 13312, 0, implicit $exec :: (load (s32) from %ir.484, !tbaa !13, addrspace 3)
+ %609:vgpr_32 = contract nofpexcept V_ADD_F32_e64 0, killed %608, 0, killed %607, 0, 0, implicit $mode, implicit $exec
+ %610:vgpr_32 = DS_READ_B32_gfx9 %582, 14336, 0, implicit $exec :: (load (s32) from %ir.487, !tbaa !13, addrspace 3)
+ %611:vgpr_32 = contract nofpexcept V_ADD_F32_e64 0, killed %610, 0, killed %609, 0, 0, implicit $mode, implicit $exec
+ %612:vgpr_32 = DS_READ_B32_gfx9 %582, 15360, 0, implicit $exec :: (load (s32) from %ir.490, !tbaa !13, addrspace 3)
+ %162:vgpr_32 = contract nofpexcept V_ADD_F32_e64 0, killed %612, 0, killed %611, 0, 0, implicit $mode, implicit $exec
+ DS_WRITE_B32_gfx9 %582, %162, 0, 0, implicit $exec :: (store (s32) into %ir.447, !tbaa !13, addrspace 3)
+ %163:vgpr_32 = V_ADD_U32_e64 %13, %37, 0, implicit $exec
+ %613:sreg_64 = V_CMP_LT_I32_e64 %163, %7, implicit $exec
+ %580:sreg_64 = IMPLICIT_DEF
+ %579:sgpr_32 = IMPLICIT_DEF
+ %665:vgpr_32 = COPY %579
+ %666:vreg_64_align2 = COPY %580
+ %164:sreg_64 = SI_IF killed %613, %bb.50, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_BRANCH %bb.46
+
+ bb.45.Flow94:
+ successors: %bb.23(0x80000000)
+
+ %167:sreg_64 = PHI %17, %bb.43, %674, %bb.50
+ %165:vgpr_32 = PHI %662, %bb.43, %173, %bb.50
+ %166:vreg_64_align2 = PHI %663, %bb.43, %174, %bb.50
+ SI_END_CF %161, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_BRANCH %bb.23
+
+ bb.46 (%ir-block.501):
+ successors: %bb.47(0x50000000), %bb.49(0x30000000)
+
+ %614:sgpr_32 = S_MOV_B32 0
+ %616:vgpr_32 = COPY killed %614
+ %615:sreg_64 = nofpexcept V_CMP_EQ_F32_e64 0, %220, 0, %616, 0, implicit $mode, implicit $exec
+ %168:vgpr_32 = contract nofpexcept V_MUL_F32_e64 0, %162, 0, %5, 0, 0, implicit $mode, implicit $exec
+ %617:vgpr_32 = nsw V_MUL_LO_U32_e64 %163, %234, implicit $exec
+ %618:vgpr_32 = V_ASHRREV_I32_e64 31, %617, implicit $exec
+ %620:vreg_64_align2 = REG_SEQUENCE %617, %subreg.sub0, %618, %subreg.sub1
+ %169:vreg_64_align2 = COPY %620
+ %621:sreg_64 = S_AND_B64 $exec, killed %615, implicit-def dead $scc
+ $vcc = COPY %621
+ S_CBRANCH_VCCNZ %bb.49, implicit $vcc
+ S_BRANCH %bb.47
+
+ bb.47 (%ir-block.506):
+ successors: %bb.49(0x80000000)
+
+ %622:sreg_32 = S_MOV_B32 2
+ %623:vreg_64_align2 = V_LSHLREV_B64_e64 killed %622, %620, implicit $exec
+ %918:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY %10.sub0
+ %919:vgpr_32 = COPY %623.sub0
+ %920:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY %10.sub1
+ %921:vgpr_32 = COPY %623.sub1
+ %914:vgpr_32, %916:sreg_64_xexec = V_ADD_CO_U32_e64 %918, %919, 0, implicit $exec
+ %922:vgpr_32 = COPY %920
+ %915:vgpr_32, dead %917:sreg_64_xexec = V_ADDC_U32_e64 %922, %921, killed %916, 0, implicit $exec
+ %624:vreg_64_align2 = REG_SEQUENCE %914, %subreg.sub0, %915, %subreg.sub1
+ %625:vgpr_32 = GLOBAL_LOAD_DWORD killed %624, 0, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.507, !tbaa !13, addrspace 1)
+ %170:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, killed %625, 0, %220, 0, %168, 0, 0, implicit $mode, implicit $exec
+ S_BRANCH %bb.49
+
+ bb.48.Flow:
+ successors: %bb.9(0x80000000)
+
+ %171:vgpr_32 = PHI %664, %bb.7, %25, %bb.8
+ %672:sreg_64 = COPY $exec
+ S_BRANCH %bb.9
+
+ bb.49.Flow76:
+ successors: %bb.50(0x80000000)
+
+ %172:vgpr_32 = PHI %168, %bb.46, %170, %bb.47
+ %678:sreg_64 = S_OR_B64 %17, $exec, implicit-def $scc
+
+ bb.50.Flow95:
+ successors: %bb.45(0x80000000)
+
+ %175:sreg_64 = PHI %17, %bb.44, %678, %bb.49
+ %173:vgpr_32 = PHI %665, %bb.44, %172, %bb.49
+ %174:vreg_64_align2 = PHI %666, %bb.44, %169, %bb.49
+ SI_END_CF %164, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ %675:sreg_64 = S_ANDN2_B64 %17, $exec, implicit-def $scc
+ %676:sreg_64 = S_AND_B64 %175, $exec, implicit-def $scc
+ %674:sreg_64 = S_OR_B64 %675, %676, implicit-def $scc
+ S_BRANCH %bb.45
+
+ bb.51..sink.split.i:
+ successors: %bb.52(0x80000000)
+
+ %628:sreg_32 = S_MOV_B32 2
+ %629:vreg_64_align2 = V_LSHLREV_B64_e64 killed %628, %110, implicit $exec
+ %927:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY %10.sub0
+ %928:vgpr_32 = COPY %629.sub0
+ %929:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY %10.sub1
+ %930:vgpr_32 = COPY %629.sub1
+ %923:vgpr_32, %925:sreg_64_xexec = V_ADD_CO_U32_e64 %927, %928, 0, implicit $exec
+ %931:vgpr_32 = COPY %929
+ %924:vgpr_32, dead %926:sreg_64_xexec = V_ADDC_U32_e64 %931, %930, killed %925, 0, implicit $exec
+ %630:vreg_64_align2 = REG_SEQUENCE %923, %subreg.sub0, %924, %subreg.sub1
+ GLOBAL_STORE_DWORD killed %630, %109, 0, 0, implicit $exec :: (store (s32) into %ir.516, !tbaa !13, addrspace 1)
+
+ bb.52.Flow96:
+ successors: %bb.53(0x80000000)
+
+ SI_END_CF %112, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+
+ bb.53.Flow97:
+ successors: %bb.54(0x80000000)
+
+
+ bb.54._Z25rocblas_gemvn_kernel_calcILi64ELi16EiffLi0EEviiT3_PKT2_T1_S3_iS0_PS1_i.exit:
+ S_ENDPGM 0
+
+...
+## NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+# CHECK: {{.*}}
>From 0a642094f5bec27297a46eee0f3ba85819a1a047 Mon Sep 17 00:00:00 2001
From: Petar Avramovic <Petar.Avramovic at amd.com>
Date: Thu, 7 Sep 2023 15:46:45 +0200
Subject: [PATCH 2/2] MachineSink/AMDGPU: Allow sinking past SI_END_CF in a
simple case
SWEDEV-414443. Fixes performance regression introduced by D155343.
Allows sinking in a simple case when there are no loops.
---
llvm/include/llvm/CodeGen/TargetInstrInfo.h | 9 +++
llvm/lib/CodeGen/MachineSink.cpp | 4 +-
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 33 +++++++++
llvm/lib/Target/AMDGPU/SIInstrInfo.h | 4 +
.../AMDGPU/machine-sink-swdev414443.mir | 74 +++++++++----------
5 files changed, 86 insertions(+), 38 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
index 1c2ca867834647..310cb22d4862f2 100644
--- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
@@ -140,6 +140,15 @@ class TargetInstrInfo : public MCInstrInfo {
return false;
}
+ /// Attempting to move \p MoveCandidate after \p ModifierInstr .
+ /// \p MoveCandidate uses \p Reg but \p ModifierInstr redefines \p Reg.
+ /// Let target check it redefines it using same value.
+ virtual bool
+ modifiesRegisterImplicitly(Register Reg, const MachineInstr *MoveCandidate,
+ const MachineInstr *ModifierInstr) const {
+ return true;
+ }
+
protected:
/// For instructions with opcodes for which the M_REMATERIALIZABLE flag is
/// set, this hook lets the target specify whether the instruction is actually
diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp
index b4cbb93d758ef2..aa89dddb05d7e5 100644
--- a/llvm/lib/CodeGen/MachineSink.cpp
+++ b/llvm/lib/CodeGen/MachineSink.cpp
@@ -290,8 +290,10 @@ static bool blockPrologueInterferes(const MachineBasicBlock *BB,
if (MO.isUse()) {
if (Reg.isPhysical() && MRI && MRI->isConstantPhysReg(Reg))
continue;
- if (PI->modifiesRegister(Reg, TRI))
+ if (PI->modifiesRegister(Reg, TRI) &&
+ TII->modifiesRegisterImplicitly(Reg, &MI, &*PI)) {
return true;
+ }
} else {
if (PI->readsRegister(Reg, TRI))
return true;
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 38b5e0114903cd..c8a2728bb3445a 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -171,6 +171,39 @@ bool SIInstrInfo::isIgnorableUse(const MachineOperand &MO) const {
isVALU(*MO.getParent()) && !resultDependsOnExec(*MO.getParent());
}
+bool SIInstrInfo::modifiesRegisterImplicitly(
+ Register Reg, const MachineInstr *MoveCandidate,
+ const MachineInstr *ModifierInstr) const {
+
+ if (ModifierInstr->getOpcode() == AMDGPU::SI_END_CF && Reg == AMDGPU::EXEC) {
+ const MachineRegisterInfo &MRI = MoveCandidate->getMF()->getRegInfo();
+
+ // Looking if this is a simple case of:
+ //
+ // %0 = MoveCandidate %1, %2, implicit $exec
+ // %EndCF:sreg_64 = SI_IF %cond, %bb.B
+ // S_BRANCH %bb.A
+ //
+ // bb.A
+ // ...
+ //
+ // bb.B
+ // SI_END_CF %EndCF, implicit-def dead $exec
+ // ... MoveCandidate should be moved here
+
+ // MoveCandidate is from block that started divergent control flow via SI_IF
+ // it is a simple SI_IF (no loops) - only user of SI_IF is SI_END_CF
+ // SI_END_CF restores exec mask as it was before SI_IF (unchanged)
+ Register EndCF = ModifierInstr->getOperand(0).getReg();
+ MachineInstr *SIIF = MRI.getVRegDef(EndCF);
+ if (SIIF->getOpcode() == AMDGPU::SI_IF && MRI.hasOneUse(EndCF) &&
+ SIIF->getParent() == MoveCandidate->getParent())
+ return false;
+ }
+
+ return true;
+}
+
bool SIInstrInfo::areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1,
int64_t &Offset0,
int64_t &Offset1) const {
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index e85917a4c0f329..16187b91d3734f 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -222,6 +222,10 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
bool isIgnorableUse(const MachineOperand &MO) const override;
+ bool
+ modifiesRegisterImplicitly(Register Reg, const MachineInstr *MoveCandidate,
+ const MachineInstr *ModifierInstr) const override;
+
bool areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, int64_t &Offset0,
int64_t &Offset1) const override;
diff --git a/llvm/test/CodeGen/AMDGPU/machine-sink-swdev414443.mir b/llvm/test/CodeGen/AMDGPU/machine-sink-swdev414443.mir
index 84fc2a619a5c70..cb1e496721282e 100644
--- a/llvm/test/CodeGen/AMDGPU/machine-sink-swdev414443.mir
+++ b/llvm/test/CodeGen/AMDGPU/machine-sink-swdev414443.mir
@@ -2330,7 +2330,6 @@ body: |
; CHECK-NEXT: [[V_ADDC_U32_e64_10:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_11:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY141]], [[COPY140]], killed [[V_ADD_CO_U32_e64_11]], 0, implicit $exec
; CHECK-NEXT: [[REG_SEQUENCE39:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_ADD_CO_U32_e64_10]], %subreg.sub0, [[V_ADDC_U32_e64_10]], %subreg.sub1
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD5:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE39]], 0, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.166, !tbaa !13, addrspace 1)
- ; CHECK-NEXT: [[V_FMAC_F32_e64_:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[GLOBAL_LOAD_DWORD1]], 0, killed [[GLOBAL_LOAD_DWORD5]], 0, [[PHI20]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_ADD_U32_e64_12:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_ADD_U32_e64_4]], [[PHI18]], 0, implicit $exec
; CHECK-NEXT: [[V_ASHRREV_I32_e64_6:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 31, [[V_ADD_U32_e64_12]], implicit $exec
; CHECK-NEXT: [[REG_SEQUENCE40:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_ADD_U32_e64_12]], %subreg.sub0, [[V_ASHRREV_I32_e64_6]], %subreg.sub1
@@ -2344,7 +2343,6 @@ body: |
; CHECK-NEXT: [[V_ADDC_U32_e64_12:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_13:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY146]], [[COPY145]], killed [[V_ADD_CO_U32_e64_13]], 0, implicit $exec
; CHECK-NEXT: [[REG_SEQUENCE41:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_ADD_CO_U32_e64_12]], %subreg.sub0, [[V_ADDC_U32_e64_12]], %subreg.sub1
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD6:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE41]], 0, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.172, !tbaa !13, addrspace 1)
- ; CHECK-NEXT: [[V_FMAC_F32_e64_1:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[GLOBAL_LOAD_DWORD2]], 0, killed [[GLOBAL_LOAD_DWORD6]], 0, [[V_FMAC_F32_e64_]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_ADD_U32_e64_13:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_MUL_LO_U32_e64_4]], [[PHI18]], 0, implicit $exec
; CHECK-NEXT: [[V_ASHRREV_I32_e64_7:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 31, [[V_ADD_U32_e64_13]], implicit $exec
; CHECK-NEXT: [[REG_SEQUENCE42:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_ADD_U32_e64_13]], %subreg.sub0, [[V_ASHRREV_I32_e64_7]], %subreg.sub1
@@ -2358,7 +2356,6 @@ body: |
; CHECK-NEXT: [[V_ADDC_U32_e64_14:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_15:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY151]], [[COPY150]], killed [[V_ADD_CO_U32_e64_15]], 0, implicit $exec
; CHECK-NEXT: [[REG_SEQUENCE43:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_ADD_CO_U32_e64_14]], %subreg.sub0, [[V_ADDC_U32_e64_14]], %subreg.sub1
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD7:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE43]], 0, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.178, !tbaa !13, addrspace 1)
- ; CHECK-NEXT: [[V_FMAC_F32_e64_2:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[GLOBAL_LOAD_DWORD3]], 0, killed [[GLOBAL_LOAD_DWORD7]], 0, [[V_FMAC_F32_e64_1]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_ADD_U32_e64_14:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_MUL_LO_U32_e64_5]], [[PHI18]], 0, implicit $exec
; CHECK-NEXT: [[V_ASHRREV_I32_e64_8:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 31, [[V_ADD_U32_e64_14]], implicit $exec
; CHECK-NEXT: [[REG_SEQUENCE44:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_ADD_U32_e64_14]], %subreg.sub0, [[V_ASHRREV_I32_e64_8]], %subreg.sub1
@@ -2372,7 +2369,6 @@ body: |
; CHECK-NEXT: [[V_ADDC_U32_e64_16:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_17:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY156]], [[COPY155]], killed [[V_ADD_CO_U32_e64_17]], 0, implicit $exec
; CHECK-NEXT: [[REG_SEQUENCE45:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_ADD_CO_U32_e64_16]], %subreg.sub0, [[V_ADDC_U32_e64_16]], %subreg.sub1
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD8:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE45]], 0, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.184, !tbaa !13, addrspace 1)
- ; CHECK-NEXT: [[V_FMAC_F32_e64_3:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[GLOBAL_LOAD_DWORD4]], 0, killed [[GLOBAL_LOAD_DWORD8]], 0, [[V_FMAC_F32_e64_2]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[SI_IF4:%[0-9]+]]:sreg_64 = SI_IF [[COPY119]], %bb.20, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
; CHECK-NEXT: S_BRANCH %bb.15
; CHECK-NEXT: {{ $}}
@@ -2380,13 +2376,9 @@ body: |
; CHECK-NEXT: successors: %bb.16(0x40000000), %bb.19(0x40000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD9:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE39]], 256, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.192, !tbaa !13, addrspace 1)
- ; CHECK-NEXT: [[V_FMAC_F32_e64_4:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[GLOBAL_LOAD_DWORD1]], 0, killed [[GLOBAL_LOAD_DWORD9]], 0, [[PHI21]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD10:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE41]], 256, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.196, !tbaa !13, addrspace 1)
- ; CHECK-NEXT: [[V_FMAC_F32_e64_5:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[GLOBAL_LOAD_DWORD2]], 0, killed [[GLOBAL_LOAD_DWORD10]], 0, [[V_FMAC_F32_e64_4]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD11:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE43]], 256, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.200, !tbaa !13, addrspace 1)
- ; CHECK-NEXT: [[V_FMAC_F32_e64_6:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[GLOBAL_LOAD_DWORD3]], 0, killed [[GLOBAL_LOAD_DWORD11]], 0, [[V_FMAC_F32_e64_5]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD12:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE45]], 256, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.204, !tbaa !13, addrspace 1)
- ; CHECK-NEXT: [[V_FMAC_F32_e64_7:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[GLOBAL_LOAD_DWORD4]], 0, killed [[GLOBAL_LOAD_DWORD12]], 0, [[V_FMAC_F32_e64_6]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[SI_IF5:%[0-9]+]]:sreg_64 = SI_IF [[COPY120]], %bb.19, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
; CHECK-NEXT: S_BRANCH %bb.16
; CHECK-NEXT: {{ $}}
@@ -2394,13 +2386,9 @@ body: |
; CHECK-NEXT: successors: %bb.17(0x40000000), %bb.18(0x40000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD13:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE39]], 512, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.212, !tbaa !13, addrspace 1)
- ; CHECK-NEXT: [[V_FMAC_F32_e64_8:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[GLOBAL_LOAD_DWORD1]], 0, killed [[GLOBAL_LOAD_DWORD13]], 0, [[PHI22]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD14:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE41]], 512, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.216, !tbaa !13, addrspace 1)
- ; CHECK-NEXT: [[V_FMAC_F32_e64_9:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[GLOBAL_LOAD_DWORD2]], 0, killed [[GLOBAL_LOAD_DWORD14]], 0, [[V_FMAC_F32_e64_8]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD15:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE43]], 512, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.220, !tbaa !13, addrspace 1)
- ; CHECK-NEXT: [[V_FMAC_F32_e64_10:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[GLOBAL_LOAD_DWORD3]], 0, killed [[GLOBAL_LOAD_DWORD15]], 0, [[V_FMAC_F32_e64_9]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD16:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE45]], 512, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.224, !tbaa !13, addrspace 1)
- ; CHECK-NEXT: [[V_FMAC_F32_e64_11:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[GLOBAL_LOAD_DWORD4]], 0, killed [[GLOBAL_LOAD_DWORD16]], 0, [[V_FMAC_F32_e64_10]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[SI_IF6:%[0-9]+]]:sreg_64 = SI_IF [[COPY121]], %bb.18, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
; CHECK-NEXT: S_BRANCH %bb.17
; CHECK-NEXT: {{ $}}
@@ -2408,34 +2396,46 @@ body: |
; CHECK-NEXT: successors: %bb.18(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD17:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE39]], 768, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.232, !tbaa !13, addrspace 1)
- ; CHECK-NEXT: [[V_FMAC_F32_e64_12:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[GLOBAL_LOAD_DWORD1]], 0, killed [[GLOBAL_LOAD_DWORD17]], 0, [[PHI23]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_FMAC_F32_e64_:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[GLOBAL_LOAD_DWORD1]], 0, killed [[GLOBAL_LOAD_DWORD17]], 0, [[PHI23]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD18:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE41]], 768, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.236, !tbaa !13, addrspace 1)
- ; CHECK-NEXT: [[V_FMAC_F32_e64_13:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[GLOBAL_LOAD_DWORD2]], 0, killed [[GLOBAL_LOAD_DWORD18]], 0, [[V_FMAC_F32_e64_12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_FMAC_F32_e64_1:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[GLOBAL_LOAD_DWORD2]], 0, killed [[GLOBAL_LOAD_DWORD18]], 0, [[V_FMAC_F32_e64_]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD19:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE43]], 768, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.240, !tbaa !13, addrspace 1)
- ; CHECK-NEXT: [[V_FMAC_F32_e64_14:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[GLOBAL_LOAD_DWORD3]], 0, killed [[GLOBAL_LOAD_DWORD19]], 0, [[V_FMAC_F32_e64_13]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_FMAC_F32_e64_2:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[GLOBAL_LOAD_DWORD3]], 0, killed [[GLOBAL_LOAD_DWORD19]], 0, [[V_FMAC_F32_e64_1]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD20:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE45]], 768, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.244, !tbaa !13, addrspace 1)
- ; CHECK-NEXT: [[V_FMAC_F32_e64_15:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[GLOBAL_LOAD_DWORD4]], 0, killed [[GLOBAL_LOAD_DWORD20]], 0, [[V_FMAC_F32_e64_14]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_FMAC_F32_e64_3:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[GLOBAL_LOAD_DWORD4]], 0, killed [[GLOBAL_LOAD_DWORD20]], 0, [[V_FMAC_F32_e64_2]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.18.Flow85:
; CHECK-NEXT: successors: %bb.19(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[PHI24:%[0-9]+]]:vgpr_32 = PHI [[PHI23]], %bb.16, [[V_FMAC_F32_e64_15]], %bb.17
+ ; CHECK-NEXT: [[PHI24:%[0-9]+]]:vgpr_32 = PHI [[PHI23]], %bb.16, [[V_FMAC_F32_e64_3]], %bb.17
; CHECK-NEXT: SI_END_CF [[SI_IF6]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: [[V_FMAC_F32_e64_4:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[GLOBAL_LOAD_DWORD1]], 0, [[GLOBAL_LOAD_DWORD13]], 0, [[PHI22]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_FMAC_F32_e64_5:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[GLOBAL_LOAD_DWORD2]], 0, [[GLOBAL_LOAD_DWORD14]], 0, [[V_FMAC_F32_e64_4]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_FMAC_F32_e64_6:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[GLOBAL_LOAD_DWORD3]], 0, [[GLOBAL_LOAD_DWORD15]], 0, [[V_FMAC_F32_e64_5]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_FMAC_F32_e64_7:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[GLOBAL_LOAD_DWORD4]], 0, [[GLOBAL_LOAD_DWORD16]], 0, [[V_FMAC_F32_e64_6]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.19.Flow86:
; CHECK-NEXT: successors: %bb.20(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[PHI25:%[0-9]+]]:vgpr_32 = PHI [[PHI22]], %bb.15, [[V_FMAC_F32_e64_11]], %bb.18
+ ; CHECK-NEXT: [[PHI25:%[0-9]+]]:vgpr_32 = PHI [[PHI22]], %bb.15, [[V_FMAC_F32_e64_7]], %bb.18
; CHECK-NEXT: [[PHI26:%[0-9]+]]:vgpr_32 = PHI [[PHI23]], %bb.15, [[PHI24]], %bb.18
; CHECK-NEXT: SI_END_CF [[SI_IF5]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: [[V_FMAC_F32_e64_8:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[GLOBAL_LOAD_DWORD1]], 0, [[GLOBAL_LOAD_DWORD9]], 0, [[PHI21]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_FMAC_F32_e64_9:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[GLOBAL_LOAD_DWORD2]], 0, [[GLOBAL_LOAD_DWORD10]], 0, [[V_FMAC_F32_e64_8]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_FMAC_F32_e64_10:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[GLOBAL_LOAD_DWORD3]], 0, [[GLOBAL_LOAD_DWORD11]], 0, [[V_FMAC_F32_e64_9]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_FMAC_F32_e64_11:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[GLOBAL_LOAD_DWORD4]], 0, [[GLOBAL_LOAD_DWORD12]], 0, [[V_FMAC_F32_e64_10]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.20.Flow87:
; CHECK-NEXT: successors: %bb.21(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[PHI27:%[0-9]+]]:vgpr_32 = PHI [[PHI21]], %bb.14, [[V_FMAC_F32_e64_7]], %bb.19
+ ; CHECK-NEXT: [[PHI27:%[0-9]+]]:vgpr_32 = PHI [[PHI21]], %bb.14, [[V_FMAC_F32_e64_11]], %bb.19
; CHECK-NEXT: [[PHI28:%[0-9]+]]:vgpr_32 = PHI [[PHI22]], %bb.14, [[PHI25]], %bb.19
; CHECK-NEXT: [[PHI29:%[0-9]+]]:vgpr_32 = PHI [[PHI23]], %bb.14, [[PHI26]], %bb.19
; CHECK-NEXT: SI_END_CF [[SI_IF4]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: [[V_FMAC_F32_e64_12:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[GLOBAL_LOAD_DWORD1]], 0, [[GLOBAL_LOAD_DWORD5]], 0, [[PHI20]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_FMAC_F32_e64_13:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[GLOBAL_LOAD_DWORD2]], 0, [[GLOBAL_LOAD_DWORD6]], 0, [[V_FMAC_F32_e64_12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_FMAC_F32_e64_14:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[GLOBAL_LOAD_DWORD3]], 0, [[GLOBAL_LOAD_DWORD7]], 0, [[V_FMAC_F32_e64_13]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_FMAC_F32_e64_15:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[GLOBAL_LOAD_DWORD4]], 0, [[GLOBAL_LOAD_DWORD8]], 0, [[V_FMAC_F32_e64_14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.21 (%ir-block.254):
; CHECK-NEXT: successors: %bb.22(0x04000000), %bb.13(0x7c000000)
@@ -2443,7 +2443,7 @@ body: |
; CHECK-NEXT: [[PHI30:%[0-9]+]]:vgpr_32 = PHI [[PHI23]], %bb.13, [[PHI29]], %bb.20
; CHECK-NEXT: [[PHI31:%[0-9]+]]:vgpr_32 = PHI [[PHI22]], %bb.13, [[PHI28]], %bb.20
; CHECK-NEXT: [[PHI32:%[0-9]+]]:vgpr_32 = PHI [[PHI21]], %bb.13, [[PHI27]], %bb.20
- ; CHECK-NEXT: [[PHI33:%[0-9]+]]:vgpr_32 = PHI [[PHI20]], %bb.13, [[V_FMAC_F32_e64_3]], %bb.20
+ ; CHECK-NEXT: [[PHI33:%[0-9]+]]:vgpr_32 = PHI [[PHI20]], %bb.13, [[V_FMAC_F32_e64_15]], %bb.20
; CHECK-NEXT: SI_END_CF [[SI_IF3]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
; CHECK-NEXT: [[V_ADD_U32_e64_15:%[0-9]+]]:vgpr_32 = nuw nsw V_ADD_U32_e64 [[PHI19]], [[S_MOV_B32_24]], 0, implicit $exec
; CHECK-NEXT: [[V_ADD_U32_e64_16:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[PHI18]], [[S_LSHL_B32_1]], 0, implicit $exec
@@ -2716,7 +2716,6 @@ body: |
; CHECK-NEXT: [[V_ADDC_U32_e64_34:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_35:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY215]], [[COPY214]], killed [[V_ADD_CO_U32_e64_35]], 0, implicit $exec
; CHECK-NEXT: [[REG_SEQUENCE62:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_ADD_CO_U32_e64_34]], %subreg.sub0, [[V_ADDC_U32_e64_34]], %subreg.sub1
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD28:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE62]], 0, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.356, !tbaa !13, addrspace 1)
- ; CHECK-NEXT: [[V_FMAC_F32_e64_19:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[PHI51]], 0, killed [[GLOBAL_LOAD_DWORD28]], 0, [[V_FMAC_F32_e64_18]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[S_MOV_B32_42:%[0-9]+]]:sreg_32 = S_MOV_B32 64
; CHECK-NEXT: [[V_ADD_U32_e64_21:%[0-9]+]]:vgpr_32 = nsw V_ADD_U32_e64 [[V_ADD_U32_e64_]], killed [[S_MOV_B32_42]], 0, implicit $exec
; CHECK-NEXT: [[V_CMP_LT_I32_e64_15:%[0-9]+]]:sreg_64 = V_CMP_LT_I32_e64 killed [[V_ADD_U32_e64_21]], [[COPY28]], implicit $exec
@@ -2727,13 +2726,12 @@ body: |
; CHECK-NEXT: successors: %bb.37(0x40000000), %bb.40(0x40000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD29:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE56]], 256, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.366, !tbaa !13, addrspace 1)
- ; CHECK-NEXT: [[V_FMAC_F32_e64_20:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[PHI54]], 0, killed [[GLOBAL_LOAD_DWORD29]], 0, [[PHI12]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_FMAC_F32_e64_19:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[PHI54]], 0, killed [[GLOBAL_LOAD_DWORD29]], 0, [[PHI12]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD30:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE58]], 256, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.370, !tbaa !13, addrspace 1)
- ; CHECK-NEXT: [[V_FMAC_F32_e64_21:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[PHI53]], 0, killed [[GLOBAL_LOAD_DWORD30]], 0, [[V_FMAC_F32_e64_20]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_FMAC_F32_e64_20:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[PHI53]], 0, killed [[GLOBAL_LOAD_DWORD30]], 0, [[V_FMAC_F32_e64_19]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD31:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE60]], 256, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.374, !tbaa !13, addrspace 1)
- ; CHECK-NEXT: [[V_FMAC_F32_e64_22:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[PHI52]], 0, killed [[GLOBAL_LOAD_DWORD31]], 0, [[V_FMAC_F32_e64_21]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_FMAC_F32_e64_21:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[PHI52]], 0, killed [[GLOBAL_LOAD_DWORD31]], 0, [[V_FMAC_F32_e64_20]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD32:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE62]], 256, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.378, !tbaa !13, addrspace 1)
- ; CHECK-NEXT: [[V_FMAC_F32_e64_23:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[PHI51]], 0, killed [[GLOBAL_LOAD_DWORD32]], 0, [[V_FMAC_F32_e64_22]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[S_MOV_B32_43:%[0-9]+]]:sreg_32 = S_MOV_B32 128
; CHECK-NEXT: [[V_ADD_U32_e64_22:%[0-9]+]]:vgpr_32 = nsw V_ADD_U32_e64 [[V_ADD_U32_e64_]], killed [[S_MOV_B32_43]], 0, implicit $exec
; CHECK-NEXT: [[V_CMP_LT_I32_e64_16:%[0-9]+]]:sreg_64 = V_CMP_LT_I32_e64 killed [[V_ADD_U32_e64_22]], [[COPY28]], implicit $exec
@@ -2744,13 +2742,12 @@ body: |
; CHECK-NEXT: successors: %bb.38(0x40000000), %bb.39(0x40000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD33:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE56]], 512, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.388, !tbaa !13, addrspace 1)
- ; CHECK-NEXT: [[V_FMAC_F32_e64_24:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[PHI54]], 0, killed [[GLOBAL_LOAD_DWORD33]], 0, [[PHI13]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_FMAC_F32_e64_22:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[PHI54]], 0, killed [[GLOBAL_LOAD_DWORD33]], 0, [[PHI13]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD34:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE58]], 512, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.392, !tbaa !13, addrspace 1)
- ; CHECK-NEXT: [[V_FMAC_F32_e64_25:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[PHI53]], 0, killed [[GLOBAL_LOAD_DWORD34]], 0, [[V_FMAC_F32_e64_24]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_FMAC_F32_e64_23:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[PHI53]], 0, killed [[GLOBAL_LOAD_DWORD34]], 0, [[V_FMAC_F32_e64_22]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD35:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE60]], 512, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.396, !tbaa !13, addrspace 1)
- ; CHECK-NEXT: [[V_FMAC_F32_e64_26:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[PHI52]], 0, killed [[GLOBAL_LOAD_DWORD35]], 0, [[V_FMAC_F32_e64_25]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_FMAC_F32_e64_24:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[PHI52]], 0, killed [[GLOBAL_LOAD_DWORD35]], 0, [[V_FMAC_F32_e64_23]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD36:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE62]], 512, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.400, !tbaa !13, addrspace 1)
- ; CHECK-NEXT: [[V_FMAC_F32_e64_27:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[PHI51]], 0, killed [[GLOBAL_LOAD_DWORD36]], 0, [[V_FMAC_F32_e64_26]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[S_MOV_B32_44:%[0-9]+]]:sreg_32 = S_MOV_B32 192
; CHECK-NEXT: [[V_ADD_U32_e64_23:%[0-9]+]]:vgpr_32 = nsw V_ADD_U32_e64 [[V_ADD_U32_e64_]], killed [[S_MOV_B32_44]], 0, implicit $exec
; CHECK-NEXT: [[V_CMP_LT_I32_e64_17:%[0-9]+]]:sreg_64 = V_CMP_LT_I32_e64 killed [[V_ADD_U32_e64_23]], [[COPY28]], implicit $exec
@@ -2761,39 +2758,42 @@ body: |
; CHECK-NEXT: successors: %bb.39(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD37:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE56]], 768, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.410, !tbaa !13, addrspace 1)
- ; CHECK-NEXT: [[V_FMAC_F32_e64_28:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[PHI54]], 0, killed [[GLOBAL_LOAD_DWORD37]], 0, [[PHI14]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_FMAC_F32_e64_25:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[PHI54]], 0, killed [[GLOBAL_LOAD_DWORD37]], 0, [[PHI14]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD38:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE58]], 768, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.414, !tbaa !13, addrspace 1)
- ; CHECK-NEXT: [[V_FMAC_F32_e64_29:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[PHI53]], 0, killed [[GLOBAL_LOAD_DWORD38]], 0, [[V_FMAC_F32_e64_28]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_FMAC_F32_e64_26:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[PHI53]], 0, killed [[GLOBAL_LOAD_DWORD38]], 0, [[V_FMAC_F32_e64_25]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD39:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE60]], 768, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.418, !tbaa !13, addrspace 1)
- ; CHECK-NEXT: [[V_FMAC_F32_e64_30:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[PHI52]], 0, killed [[GLOBAL_LOAD_DWORD39]], 0, [[V_FMAC_F32_e64_29]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_FMAC_F32_e64_27:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[PHI52]], 0, killed [[GLOBAL_LOAD_DWORD39]], 0, [[V_FMAC_F32_e64_26]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD40:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE62]], 768, 0, implicit $exec :: ("amdgpu-noclobber" load (s32) from %ir.422, !tbaa !13, addrspace 1)
- ; CHECK-NEXT: [[V_FMAC_F32_e64_31:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[PHI51]], 0, killed [[GLOBAL_LOAD_DWORD40]], 0, [[V_FMAC_F32_e64_30]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_FMAC_F32_e64_28:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[PHI51]], 0, killed [[GLOBAL_LOAD_DWORD40]], 0, [[V_FMAC_F32_e64_27]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.39.Flow77:
; CHECK-NEXT: successors: %bb.40(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[PHI55:%[0-9]+]]:vgpr_32 = PHI [[PHI14]], %bb.37, [[V_FMAC_F32_e64_31]], %bb.38
+ ; CHECK-NEXT: [[PHI55:%[0-9]+]]:vgpr_32 = PHI [[PHI14]], %bb.37, [[V_FMAC_F32_e64_28]], %bb.38
; CHECK-NEXT: SI_END_CF [[SI_IF15]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: [[V_FMAC_F32_e64_29:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[PHI51]], 0, [[GLOBAL_LOAD_DWORD36]], 0, [[V_FMAC_F32_e64_24]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.40.Flow78:
; CHECK-NEXT: successors: %bb.41(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[PHI56:%[0-9]+]]:vgpr_32 = PHI [[PHI13]], %bb.36, [[V_FMAC_F32_e64_27]], %bb.39
+ ; CHECK-NEXT: [[PHI56:%[0-9]+]]:vgpr_32 = PHI [[PHI13]], %bb.36, [[V_FMAC_F32_e64_29]], %bb.39
; CHECK-NEXT: [[PHI57:%[0-9]+]]:vgpr_32 = PHI [[PHI14]], %bb.36, [[PHI55]], %bb.39
; CHECK-NEXT: SI_END_CF [[SI_IF14]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: [[V_FMAC_F32_e64_30:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[PHI51]], 0, [[GLOBAL_LOAD_DWORD32]], 0, [[V_FMAC_F32_e64_21]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.41.Flow79:
; CHECK-NEXT: successors: %bb.42(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[PHI58:%[0-9]+]]:vgpr_32 = PHI [[PHI12]], %bb.35, [[V_FMAC_F32_e64_23]], %bb.40
+ ; CHECK-NEXT: [[PHI58:%[0-9]+]]:vgpr_32 = PHI [[PHI12]], %bb.35, [[V_FMAC_F32_e64_30]], %bb.40
; CHECK-NEXT: [[PHI59:%[0-9]+]]:vgpr_32 = PHI [[PHI13]], %bb.35, [[PHI56]], %bb.40
; CHECK-NEXT: [[PHI60:%[0-9]+]]:vgpr_32 = PHI [[PHI14]], %bb.35, [[PHI57]], %bb.40
; CHECK-NEXT: SI_END_CF [[SI_IF13]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: [[V_FMAC_F32_e64_31:%[0-9]+]]:vgpr_32 = contract nofpexcept V_FMAC_F32_e64 0, [[PHI51]], 0, [[GLOBAL_LOAD_DWORD28]], 0, [[V_FMAC_F32_e64_18]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.42.Flow80:
; CHECK-NEXT: successors: %bb.33(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[PHI61:%[0-9]+]]:vgpr_32 = PHI [[PHI11]], %bb.34, [[V_FMAC_F32_e64_19]], %bb.41
+ ; CHECK-NEXT: [[PHI61:%[0-9]+]]:vgpr_32 = PHI [[PHI11]], %bb.34, [[V_FMAC_F32_e64_31]], %bb.41
; CHECK-NEXT: [[PHI62:%[0-9]+]]:vgpr_32 = PHI [[PHI12]], %bb.34, [[PHI58]], %bb.41
; CHECK-NEXT: [[PHI63:%[0-9]+]]:vgpr_32 = PHI [[PHI13]], %bb.34, [[PHI59]], %bb.41
; CHECK-NEXT: [[PHI64:%[0-9]+]]:vgpr_32 = PHI [[PHI14]], %bb.34, [[PHI60]], %bb.41
More information about the llvm-commits
mailing list