[llvm] [AMDGCN] Allow unscheduling of bundled insns (PR #129769)
Julian Brown via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 5 09:12:25 PST 2025
https://github.com/jtb20 updated https://github.com/llvm/llvm-project/pull/129769
>From 7a89d999350efe4c3a2f4b9a7855cdf52b3909c5 Mon Sep 17 00:00:00 2001
From: Julian Brown <julian.brown at amd.com>
Date: Tue, 4 Mar 2025 10:34:11 -0600
Subject: [PATCH] [AMDGCN] Allow unscheduling of bundled insns
This is a patch arising from AMD's fuzzing project.
In the test case, the scheduling algorithm decides to undo an attempted
schedule, but is unprepared to handle bundled instructions at that
point -- and those can arise via the expansion of intrinsics earlier
in compilation. The fix is to use the splice method instead of
remove/insert, since that can handle bundles properly.
---
llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 3 +-
.../AMDGPU/sema-v-unsched-bundle-2.mir | 929 ++++++++++++++++++
.../CodeGen/AMDGPU/sema-v-unsched-bundle.ll | 24 +
3 files changed, 954 insertions(+), 2 deletions(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/sema-v-unsched-bundle-2.mir
create mode 100644 llvm/test/CodeGen/AMDGPU/sema-v-unsched-bundle.ll
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index c277223de13ac..5dcf523430fd2 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -1567,8 +1567,7 @@ void GCNSchedStage::revertScheduling() {
}
if (MI->getIterator() != DAG.RegionEnd) {
- DAG.BB->remove(MI);
- DAG.BB->insert(DAG.RegionEnd, MI);
+ DAG.BB->splice(DAG.RegionEnd, DAG.BB, MI);
if (!MI->isDebugInstr())
DAG.LIS->handleMove(*MI, true);
}
diff --git a/llvm/test/CodeGen/AMDGPU/sema-v-unsched-bundle-2.mir b/llvm/test/CodeGen/AMDGPU/sema-v-unsched-bundle-2.mir
new file mode 100644
index 0000000000000..5770d6c6e0092
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/sema-v-unsched-bundle-2.mir
@@ -0,0 +1,929 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn -mcpu=gfx90a -misched=gcn-max-occupancy -run-pass=machine-scheduler %s -o - | FileCheck %s
+
+--- |
+ source_filename = "llvm.amdgcn.ds.gws.sema.v.ll"
+ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"
+ target triple = "amdgcn"
+
+ @G = global <32 x i8> splat (i8 1)
+ @G.1 = global <32 x i8> splat (i8 127)
+
+ define amdgpu_kernel void @gws_sema_v_offset0(i32 %val) #0 {
+ %gws_sema_v_offset0.kernarg.segment = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
+ %LGV1 = load <32 x i8>, ptr @G.1, align 32
+ %LGV = load <32 x i8>, ptr @G, align 32
+ call void @llvm.amdgcn.ds.gws.sema.v(i32 0)
+ %C = icmp ne <32 x i8> %LGV, %LGV1
+ store <32 x i1> %C, ptr poison, align 4
+ ret void
+ }
+
+ declare void @llvm.amdgcn.ds.gws.sema.v(i32) #1
+
+ declare noundef align 4 ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() #2
+
+ attributes #0 = { convergent nounwind memory(inaccessiblemem: readwrite) "target-cpu"="gfx90a" }
+ attributes #1 = { convergent nocallback nofree nounwind willreturn memory(inaccessiblemem: readwrite) "target-cpu"="gfx90a" }
+ attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+
+...
+---
+name: gws_sema_v_offset0
+alignment: 1
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+failedISel: false
+tracksRegLiveness: true
+hasWinCFI: false
+noPhis: true
+isSSA: false
+noVRegs: false
+hasFakeUses: false
+callsEHReturn: false
+callsUnwindInit: false
+hasEHCatchret: false
+hasEHScopes: false
+hasEHFunclets: false
+isOutlined: false
+debugInstrRef: false
+failsVerification: false
+tracksDebugUserValues: false
+registers:
+ - { id: 0, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 1, class: sgpr_64, preferred-register: '', flags: [ ] }
+ - { id: 2, class: sgpr_64, preferred-register: '', flags: [ ] }
+ - { id: 3, class: sgpr_64, preferred-register: '', flags: [ ] }
+ - { id: 4, class: sgpr_64, preferred-register: '', flags: [ ] }
+ - { id: 5, class: sgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 6, class: sgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 7, class: sgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 8, class: sgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 9, class: sreg_64, preferred-register: '', flags: [ ] }
+ - { id: 10, class: sreg_64_xexec, preferred-register: '', flags: [ ] }
+ - { id: 11, class: vreg_128_align2, preferred-register: '', flags: [ ] }
+ - { id: 12, class: vreg_64_align2, preferred-register: '', flags: [ ] }
+ - { id: 13, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 14, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 15, class: sreg_32, preferred-register: '', flags: [ ] }
+ - { id: 16, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 17, class: sreg_32, preferred-register: '', flags: [ ] }
+ - { id: 18, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 19, class: sreg_32, preferred-register: '', flags: [ ] }
+ - { id: 20, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 21, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 22, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 23, class: sreg_32, preferred-register: '', flags: [ ] }
+ - { id: 24, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 25, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 26, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 27, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 28, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 29, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 30, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 31, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 32, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 33, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 34, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 35, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 36, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 37, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 38, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 39, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 40, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 41, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 42, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 43, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 44, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 45, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 46, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 47, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 48, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 49, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 50, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 51, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 52, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 53, class: vreg_128_align2, preferred-register: '', flags: [ ] }
+ - { id: 54, class: vreg_64_align2, preferred-register: '', flags: [ ] }
+ - { id: 55, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 56, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 57, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 58, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 59, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 60, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 61, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 62, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 63, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 64, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 65, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 66, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 67, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 68, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 69, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 70, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 71, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 72, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 73, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 74, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 75, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 76, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 77, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 78, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 79, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 80, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 81, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 82, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 83, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 84, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 85, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 86, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 87, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 88, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 89, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 90, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 91, class: sreg_64, preferred-register: '', flags: [ ] }
+ - { id: 92, class: sreg_64_xexec, preferred-register: '', flags: [ ] }
+ - { id: 93, class: vreg_128_align2, preferred-register: '', flags: [ ] }
+ - { id: 94, class: vreg_64_align2, preferred-register: '', flags: [ ] }
+ - { id: 95, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 96, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 97, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 98, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 99, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 100, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 101, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 102, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 103, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 104, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 105, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 106, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 107, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 108, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 109, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 110, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 111, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 112, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 113, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 114, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 115, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 116, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 117, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 118, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 119, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 120, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 121, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 122, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 123, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 124, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 125, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 126, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 127, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 128, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 129, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 130, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 131, class: vreg_128_align2, preferred-register: '', flags: [ ] }
+ - { id: 132, class: vreg_64_align2, preferred-register: '', flags: [ ] }
+ - { id: 133, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 134, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 135, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 136, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 137, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 138, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 139, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 140, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 141, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 142, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 143, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 144, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 145, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 146, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 147, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 148, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 149, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 150, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 151, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 152, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 153, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 154, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 155, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 156, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 157, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 158, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 159, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 160, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 161, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 162, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 163, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 164, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 165, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 166, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 167, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 168, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 169, class: sreg_64_xexec, preferred-register: '$vcc', flags: [ ] }
+ - { id: 170, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 171, class: sreg_32, preferred-register: '', flags: [ ] }
+ - { id: 172, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 173, class: sreg_64_xexec, preferred-register: '$vcc', flags: [ ] }
+ - { id: 174, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 175, class: sreg_32, preferred-register: '', flags: [ ] }
+ - { id: 176, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 177, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 178, class: sreg_64_xexec, preferred-register: '$vcc', flags: [ ] }
+ - { id: 179, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 180, class: sreg_32, preferred-register: '', flags: [ ] }
+ - { id: 181, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 182, class: sreg_64_xexec, preferred-register: '$vcc', flags: [ ] }
+ - { id: 183, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 184, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 185, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 186, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 187, class: sreg_32, preferred-register: '', flags: [ ] }
+ - { id: 188, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 189, class: sreg_64_xexec, preferred-register: '$vcc', flags: [ ] }
+ - { id: 190, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 191, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 192, class: sreg_64_xexec, preferred-register: '$vcc', flags: [ ] }
+ - { id: 193, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 194, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 195, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 196, class: sreg_64_xexec, preferred-register: '$vcc', flags: [ ] }
+ - { id: 197, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 198, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 199, class: sreg_64_xexec, preferred-register: '$vcc', flags: [ ] }
+ - { id: 200, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 201, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 202, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 203, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 204, class: sreg_32, preferred-register: '', flags: [ ] }
+ - { id: 205, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 206, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 207, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 208, class: sreg_64_xexec, preferred-register: '$vcc', flags: [ ] }
+ - { id: 209, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 210, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 211, class: sreg_64_xexec, preferred-register: '$vcc', flags: [ ] }
+ - { id: 212, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 213, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 214, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 215, class: sreg_64_xexec, preferred-register: '$vcc', flags: [ ] }
+ - { id: 216, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 217, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 218, class: sreg_64_xexec, preferred-register: '$vcc', flags: [ ] }
+ - { id: 219, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 220, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 221, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 222, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 223, class: sreg_32, preferred-register: '', flags: [ ] }
+ - { id: 224, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 225, class: sreg_64_xexec, preferred-register: '$vcc', flags: [ ] }
+ - { id: 226, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 227, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 228, class: sreg_64_xexec, preferred-register: '$vcc', flags: [ ] }
+ - { id: 229, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 230, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 231, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 232, class: sreg_64_xexec, preferred-register: '$vcc', flags: [ ] }
+ - { id: 233, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 234, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 235, class: sreg_64_xexec, preferred-register: '$vcc', flags: [ ] }
+ - { id: 236, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 237, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 238, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 239, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 240, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 241, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 242, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 243, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 244, class: sreg_32, preferred-register: '', flags: [ ] }
+ - { id: 245, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 246, class: sreg_64_xexec, preferred-register: '$vcc', flags: [ ] }
+ - { id: 247, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 248, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 249, class: sreg_64_xexec, preferred-register: '$vcc', flags: [ ] }
+ - { id: 250, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 251, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 252, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 253, class: sreg_64_xexec, preferred-register: '$vcc', flags: [ ] }
+ - { id: 254, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 255, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 256, class: sreg_64_xexec, preferred-register: '$vcc', flags: [ ] }
+ - { id: 257, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 258, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 259, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 260, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 261, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 262, class: sreg_64_xexec, preferred-register: '$vcc', flags: [ ] }
+ - { id: 263, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 264, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 265, class: sreg_64_xexec, preferred-register: '$vcc', flags: [ ] }
+ - { id: 266, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 267, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 268, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 269, class: sreg_64_xexec, preferred-register: '$vcc', flags: [ ] }
+ - { id: 270, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 271, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 272, class: sreg_64_xexec, preferred-register: '$vcc', flags: [ ] }
+ - { id: 273, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 274, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 275, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 276, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 277, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 278, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 279, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 280, class: sreg_64_xexec, preferred-register: '$vcc', flags: [ ] }
+ - { id: 281, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 282, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 283, class: sreg_64_xexec, preferred-register: '$vcc', flags: [ ] }
+ - { id: 284, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 285, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 286, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 287, class: sreg_64_xexec, preferred-register: '$vcc', flags: [ ] }
+ - { id: 288, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 289, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 290, class: sreg_64_xexec, preferred-register: '$vcc', flags: [ ] }
+ - { id: 291, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 292, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 293, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 294, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 295, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 296, class: sreg_64_xexec, preferred-register: '$vcc', flags: [ ] }
+ - { id: 297, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 298, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 299, class: sreg_64_xexec, preferred-register: '$vcc', flags: [ ] }
+ - { id: 300, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 301, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 302, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 303, class: sreg_64_xexec, preferred-register: '$vcc', flags: [ ] }
+ - { id: 304, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 305, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 306, class: sreg_64_xexec, preferred-register: '$vcc', flags: [ ] }
+ - { id: 307, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 308, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 309, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 310, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 311, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 312, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 313, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 314, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 315, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 316, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 317, class: sreg_64, preferred-register: '', flags: [ ] }
+ - { id: 318, class: vreg_64_align2, preferred-register: '', flags: [ ] }
+liveins: []
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 0
+ offsetAdjustment: 0
+ maxAlignment: 1
+ adjustsStack: false
+ hasCalls: false
+ stackProtector: ''
+ functionContext: ''
+ maxCallFrameSize: 4294967295
+ cvBytesOfCalleeSavedRegisters: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+ hasTailCall: false
+ isCalleeSavedInfoValid: false
+ localFrameSize: 0
+ savePoint: ''
+ restorePoint: ''
+fixedStack: []
+stack: []
+entry_values: []
+callSites: []
+debugValueSubstitutions: []
+constants: []
+machineFunctionInfo:
+ explicitKernArgSize: 4
+ maxKernArgAlign: 4
+ ldsSize: 0
+ gdsSize: 0
+ dynLDSAlign: 1
+ isEntryFunction: true
+ isChainFunction: false
+ noSignedZerosFPMath: false
+ memoryBound: false
+ waveLimiter: false
+ hasSpilledSGPRs: false
+ hasSpilledVGPRs: false
+ scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
+ frameOffsetReg: '$fp_reg'
+ stackPtrOffsetReg: '$sgpr32'
+ bytesInStackArgArea: 0
+ returnsVoid: true
+ argumentInfo:
+ dispatchPtr: { reg: '$sgpr0_sgpr1' }
+ queuePtr: { reg: '$sgpr2_sgpr3' }
+ kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
+ dispatchID: { reg: '$sgpr6_sgpr7' }
+ workGroupIDX: { reg: '$sgpr8' }
+ workGroupIDY: { reg: '$sgpr9' }
+ workGroupIDZ: { reg: '$sgpr10' }
+ privateSegmentWaveByteOffset: { reg: '$sgpr11' }
+ workItemIDX: { reg: '$vgpr0', mask: 1023 }
+ workItemIDY: { reg: '$vgpr0', mask: 1047552 }
+ workItemIDZ: { reg: '$vgpr0', mask: 1072693248 }
+ psInputAddr: 0
+ psInputEnable: 0
+ maxMemoryClusterDWords: 8
+ mode:
+ ieee: true
+ dx10-clamp: true
+ fp32-input-denormals: true
+ fp32-output-denormals: true
+ fp64-fp16-input-denormals: true
+ fp64-fp16-output-denormals: true
+ highBitsOf32BitAddress: 0
+ occupancy: 8
+ vgprForAGPRCopy: ''
+ sgprForEXECCopy: '$sgpr100_sgpr101'
+ longBranchReservedReg: ''
+ hasInitWholeWave: false
+body: |
+ bb.0 (%ir-block.0):
+ ; CHECK-LABEL: name: gws_sema_v_offset0
+ ; CHECK: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @G.1, target-flags(amdgpu-gotprel32-hi) @G.1, implicit-def dead $scc
+ ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[SI_PC_ADD_REL_OFFSET]], 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4)
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY [[S_LOAD_DWORDX2_IMM]]
+ ; CHECK-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128_align2 = FLAT_LOAD_DWORDX4 [[COPY]], 16, 0, implicit $exec, implicit $flat_scr :: (dereferenceable load (s128) from @G.1 + 16)
+ ; CHECK-NEXT: [[FLAT_LOAD_DWORDX4_1:%[0-9]+]]:vreg_128_align2 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (dereferenceable load (s128) from @G.1, align 32)
+ ; CHECK-NEXT: [[SI_PC_ADD_REL_OFFSET1:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @G, target-flags(amdgpu-gotprel32-hi) @G, implicit-def dead $scc
+ ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM1:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[SI_PC_ADD_REL_OFFSET1]], 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4)
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_64_align2 = COPY [[S_LOAD_DWORDX2_IMM1]]
+ ; CHECK-NEXT: [[FLAT_LOAD_DWORDX4_2:%[0-9]+]]:vreg_128_align2 = FLAT_LOAD_DWORDX4 [[COPY1]], 16, 0, implicit $exec, implicit $flat_scr :: (dereferenceable load (s128) from @G + 16)
+ ; CHECK-NEXT: [[FLAT_LOAD_DWORDX4_3:%[0-9]+]]:vreg_128_align2 = FLAT_LOAD_DWORDX4 [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (dereferenceable load (s128) from @G, align 32)
+ ; CHECK-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 255, [[FLAT_LOAD_DWORDX4_]].sub0, implicit $exec
+ ; CHECK-NEXT: [[V_LSHRREV_B16_e32_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e32 8, [[FLAT_LOAD_DWORDX4_]].sub0, implicit $exec
+ ; CHECK-NEXT: [[V_LSHRREV_B32_e32_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e32 16, [[FLAT_LOAD_DWORDX4_]].sub0, implicit $exec
+ ; CHECK-NEXT: [[V_LSHRREV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e32 24, [[FLAT_LOAD_DWORDX4_]].sub0, implicit $exec
+ ; CHECK-NEXT: [[V_AND_B32_e32_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 255, [[FLAT_LOAD_DWORDX4_]].sub1, implicit $exec
+ ; CHECK-NEXT: [[V_LSHRREV_B16_e32_1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e32 8, [[FLAT_LOAD_DWORDX4_]].sub1, implicit $exec
+ ; CHECK-NEXT: [[V_LSHRREV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e32 16, [[FLAT_LOAD_DWORDX4_]].sub1, implicit $exec
+ ; CHECK-NEXT: [[V_LSHRREV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e32 24, [[FLAT_LOAD_DWORDX4_]].sub1, implicit $exec
+ ; CHECK-NEXT: [[V_AND_B32_e32_2:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 255, [[FLAT_LOAD_DWORDX4_]].sub2, implicit $exec
+ ; CHECK-NEXT: [[V_LSHRREV_B16_e32_2:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e32 8, [[FLAT_LOAD_DWORDX4_]].sub2, implicit $exec
+ ; CHECK-NEXT: [[V_LSHRREV_B32_e32_4:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e32 16, [[FLAT_LOAD_DWORDX4_]].sub2, implicit $exec
+ ; CHECK-NEXT: [[V_LSHRREV_B32_e32_5:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e32 24, [[FLAT_LOAD_DWORDX4_]].sub2, implicit $exec
+ ; CHECK-NEXT: [[V_AND_B32_e32_3:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 255, [[FLAT_LOAD_DWORDX4_]].sub3, implicit $exec
+ ; CHECK-NEXT: [[V_LSHRREV_B16_e32_3:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e32 8, [[FLAT_LOAD_DWORDX4_]].sub3, implicit $exec
+ ; CHECK-NEXT: [[V_LSHRREV_B32_e32_6:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e32 16, [[FLAT_LOAD_DWORDX4_]].sub3, implicit $exec
+ ; CHECK-NEXT: [[V_LSHRREV_B32_e32_7:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e32 24, [[FLAT_LOAD_DWORDX4_]].sub3, implicit $exec
+ ; CHECK-NEXT: [[V_AND_B32_e32_4:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 255, [[FLAT_LOAD_DWORDX4_1]].sub0, implicit $exec
+ ; CHECK-NEXT: [[V_LSHRREV_B16_e32_4:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e32 8, [[FLAT_LOAD_DWORDX4_1]].sub0, implicit $exec
+ ; CHECK-NEXT: [[V_LSHRREV_B32_e32_8:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e32 16, [[FLAT_LOAD_DWORDX4_1]].sub0, implicit $exec
+ ; CHECK-NEXT: [[V_LSHRREV_B32_e32_9:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e32 24, [[FLAT_LOAD_DWORDX4_1]].sub0, implicit $exec
+ ; CHECK-NEXT: [[V_AND_B32_e32_5:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 255, [[FLAT_LOAD_DWORDX4_1]].sub1, implicit $exec
+ ; CHECK-NEXT: [[V_LSHRREV_B16_e32_5:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e32 8, [[FLAT_LOAD_DWORDX4_1]].sub1, implicit $exec
+ ; CHECK-NEXT: [[V_LSHRREV_B32_e32_10:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e32 16, [[FLAT_LOAD_DWORDX4_1]].sub1, implicit $exec
+ ; CHECK-NEXT: [[V_LSHRREV_B32_e32_11:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e32 24, [[FLAT_LOAD_DWORDX4_1]].sub1, implicit $exec
+ ; CHECK-NEXT: [[V_AND_B32_e32_6:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 255, [[FLAT_LOAD_DWORDX4_1]].sub2, implicit $exec
+ ; CHECK-NEXT: [[V_LSHRREV_B16_e32_6:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e32 8, [[FLAT_LOAD_DWORDX4_1]].sub2, implicit $exec
+ ; CHECK-NEXT: [[V_LSHRREV_B32_e32_12:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e32 16, [[FLAT_LOAD_DWORDX4_1]].sub2, implicit $exec
+ ; CHECK-NEXT: [[V_LSHRREV_B32_e32_13:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e32 24, [[FLAT_LOAD_DWORDX4_1]].sub2, implicit $exec
+ ; CHECK-NEXT: [[V_AND_B32_e32_7:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 255, [[FLAT_LOAD_DWORDX4_1]].sub3, implicit $exec
+ ; CHECK-NEXT: [[V_LSHRREV_B16_e32_7:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e32 8, [[FLAT_LOAD_DWORDX4_1]].sub3, implicit $exec
+ ; CHECK-NEXT: [[V_LSHRREV_B32_e32_14:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e32 16, [[FLAT_LOAD_DWORDX4_1]].sub3, implicit $exec
+ ; CHECK-NEXT: [[V_LSHRREV_B32_e32_15:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e32 24, [[FLAT_LOAD_DWORDX4_1]].sub3, implicit $exec
+ ; CHECK-NEXT: [[V_AND_B32_e32_8:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 255, [[FLAT_LOAD_DWORDX4_2]].sub0, implicit $exec
+ ; CHECK-NEXT: [[V_LSHRREV_B16_e32_8:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e32 8, [[FLAT_LOAD_DWORDX4_2]].sub0, implicit $exec
+ ; CHECK-NEXT: [[V_LSHRREV_B32_e32_16:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e32 16, [[FLAT_LOAD_DWORDX4_2]].sub0, implicit $exec
+ ; CHECK-NEXT: [[V_LSHRREV_B32_e32_17:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e32 24, [[FLAT_LOAD_DWORDX4_2]].sub0, implicit $exec
+ ; CHECK-NEXT: [[V_AND_B32_e32_9:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 255, [[FLAT_LOAD_DWORDX4_2]].sub1, implicit $exec
+ ; CHECK-NEXT: [[V_LSHRREV_B16_e32_9:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e32 8, [[FLAT_LOAD_DWORDX4_2]].sub1, implicit $exec
+ ; CHECK-NEXT: [[V_LSHRREV_B32_e32_18:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e32 16, [[FLAT_LOAD_DWORDX4_2]].sub1, implicit $exec
+ ; CHECK-NEXT: [[V_LSHRREV_B32_e32_19:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e32 24, [[FLAT_LOAD_DWORDX4_2]].sub1, implicit $exec
+ ; CHECK-NEXT: [[V_AND_B32_e32_10:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 255, [[FLAT_LOAD_DWORDX4_2]].sub2, implicit $exec
+ ; CHECK-NEXT: [[V_LSHRREV_B16_e32_10:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e32 8, [[FLAT_LOAD_DWORDX4_2]].sub2, implicit $exec
+ ; CHECK-NEXT: [[V_LSHRREV_B32_e32_20:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e32 16, [[FLAT_LOAD_DWORDX4_2]].sub2, implicit $exec
+ ; CHECK-NEXT: [[V_LSHRREV_B32_e32_21:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e32 24, [[FLAT_LOAD_DWORDX4_2]].sub2, implicit $exec
+ ; CHECK-NEXT: [[V_AND_B32_e32_11:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 255, [[FLAT_LOAD_DWORDX4_2]].sub3, implicit $exec
+ ; CHECK-NEXT: [[V_LSHRREV_B16_e32_11:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e32 8, [[FLAT_LOAD_DWORDX4_2]].sub3, implicit $exec
+ ; CHECK-NEXT: [[V_LSHRREV_B32_e32_22:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e32 16, [[FLAT_LOAD_DWORDX4_2]].sub3, implicit $exec
+ ; CHECK-NEXT: [[V_LSHRREV_B32_e32_23:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e32 24, [[FLAT_LOAD_DWORDX4_2]].sub3, implicit $exec
+ ; CHECK-NEXT: [[V_AND_B32_e32_12:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 255, [[FLAT_LOAD_DWORDX4_3]].sub0, implicit $exec
+ ; CHECK-NEXT: [[V_LSHRREV_B16_e32_12:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e32 8, [[FLAT_LOAD_DWORDX4_3]].sub0, implicit $exec
+ ; CHECK-NEXT: [[V_LSHRREV_B32_e32_24:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e32 16, [[FLAT_LOAD_DWORDX4_3]].sub0, implicit $exec
+ ; CHECK-NEXT: [[V_LSHRREV_B32_e32_25:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e32 24, [[FLAT_LOAD_DWORDX4_3]].sub0, implicit $exec
+ ; CHECK-NEXT: [[V_AND_B32_e32_13:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 255, [[FLAT_LOAD_DWORDX4_3]].sub1, implicit $exec
+ ; CHECK-NEXT: [[V_LSHRREV_B16_e32_13:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e32 8, [[FLAT_LOAD_DWORDX4_3]].sub1, implicit $exec
+ ; CHECK-NEXT: [[V_LSHRREV_B32_e32_26:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e32 16, [[FLAT_LOAD_DWORDX4_3]].sub1, implicit $exec
+ ; CHECK-NEXT: [[V_LSHRREV_B32_e32_27:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e32 24, [[FLAT_LOAD_DWORDX4_3]].sub1, implicit $exec
+ ; CHECK-NEXT: [[V_AND_B32_e32_14:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 255, [[FLAT_LOAD_DWORDX4_3]].sub2, implicit $exec
+ ; CHECK-NEXT: [[V_LSHRREV_B16_e32_14:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e32 8, [[FLAT_LOAD_DWORDX4_3]].sub2, implicit $exec
+ ; CHECK-NEXT: [[V_LSHRREV_B32_e32_28:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e32 16, [[FLAT_LOAD_DWORDX4_3]].sub2, implicit $exec
+ ; CHECK-NEXT: [[V_LSHRREV_B32_e32_29:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e32 24, [[FLAT_LOAD_DWORDX4_3]].sub2, implicit $exec
+ ; CHECK-NEXT: [[V_AND_B32_e32_15:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 255, [[FLAT_LOAD_DWORDX4_3]].sub3, implicit $exec
+ ; CHECK-NEXT: [[V_LSHRREV_B16_e32_15:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e32 8, [[FLAT_LOAD_DWORDX4_3]].sub3, implicit $exec
+ ; CHECK-NEXT: [[V_LSHRREV_B32_e32_30:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e32 16, [[FLAT_LOAD_DWORDX4_3]].sub3, implicit $exec
+ ; CHECK-NEXT: [[V_LSHRREV_B32_e32_31:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e32 24, [[FLAT_LOAD_DWORDX4_3]].sub3, implicit $exec
+ ; CHECK-NEXT: [[V_CMP_NE_U16_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U16_e64 [[V_LSHRREV_B16_e32_15]], [[V_LSHRREV_B16_e32_7]], implicit $exec
+ ; CHECK-NEXT: [[V_AND_B32_e32_16:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 255, [[V_LSHRREV_B32_e32_14]], implicit $exec
+ ; CHECK-NEXT: [[V_AND_B32_e32_17:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 255, [[V_LSHRREV_B32_e32_30]], implicit $exec
+ ; CHECK-NEXT: [[V_CMP_NE_U16_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U16_e64 [[V_AND_B32_e32_17]], [[V_AND_B32_e32_16]], implicit $exec
+ ; CHECK-NEXT: [[V_AND_B32_e32_18:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 255, [[V_LSHRREV_B32_e32_]], implicit $exec
+ ; CHECK-NEXT: [[V_AND_B32_e32_19:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 255, [[V_LSHRREV_B32_e32_2]], implicit $exec
+ ; CHECK-NEXT: [[V_AND_B32_e32_20:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 255, [[V_LSHRREV_B32_e32_4]], implicit $exec
+ ; CHECK-NEXT: [[V_AND_B32_e32_21:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 255, [[V_LSHRREV_B32_e32_6]], implicit $exec
+ ; CHECK-NEXT: [[V_AND_B32_e32_22:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 255, [[V_LSHRREV_B32_e32_8]], implicit $exec
+ ; CHECK-NEXT: [[V_AND_B32_e32_23:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 255, [[V_LSHRREV_B32_e32_10]], implicit $exec
+ ; CHECK-NEXT: [[V_AND_B32_e32_24:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 255, [[V_LSHRREV_B32_e32_12]], implicit $exec
+ ; CHECK-NEXT: [[V_AND_B32_e32_25:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 255, [[V_LSHRREV_B32_e32_16]], implicit $exec
+ ; CHECK-NEXT: [[V_AND_B32_e32_26:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 255, [[V_LSHRREV_B32_e32_18]], implicit $exec
+ ; CHECK-NEXT: [[V_AND_B32_e32_27:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 255, [[V_LSHRREV_B32_e32_20]], implicit $exec
+ ; CHECK-NEXT: [[V_AND_B32_e32_28:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 255, [[V_LSHRREV_B32_e32_22]], implicit $exec
+ ; CHECK-NEXT: [[V_AND_B32_e32_29:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 255, [[V_LSHRREV_B32_e32_24]], implicit $exec
+ ; CHECK-NEXT: [[V_AND_B32_e32_30:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 255, [[V_LSHRREV_B32_e32_26]], implicit $exec
+ ; CHECK-NEXT: [[V_AND_B32_e32_31:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 255, [[V_LSHRREV_B32_e32_28]], implicit $exec
+ ; CHECK-NEXT: $m0 = S_MOV_B32 0
+ ; CHECK-NEXT: BUNDLE implicit $m0, implicit $exec {
+ ; CHECK-NEXT: DS_GWS_SEMA_V 0, implicit $m0, implicit $exec :: (store (s32) into custom "GWSResource")
+ ; CHECK-NEXT: S_WAITCNT 0
+ ; CHECK-NEXT: }
+ ; CHECK-NEXT: [[V_CMP_NE_U16_e64_2:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U16_e64 [[V_LSHRREV_B32_e32_31]], [[V_LSHRREV_B32_e32_15]], implicit $exec
+ ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[V_CMP_NE_U16_e64_2]], implicit $exec
+ ; CHECK-NEXT: [[V_LSHLREV_B16_e32_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e32 3, [[V_CNDMASK_B32_e64_]], implicit $exec
+ ; CHECK-NEXT: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[V_CMP_NE_U16_e64_1]], implicit $exec
+ ; CHECK-NEXT: [[V_LSHLREV_B16_e32_1:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e32 2, [[V_CNDMASK_B32_e64_1]], implicit $exec
+ ; CHECK-NEXT: [[V_OR_B32_e32_:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_LSHLREV_B16_e32_]], [[V_LSHLREV_B16_e32_1]], implicit $exec
+ ; CHECK-NEXT: [[V_CNDMASK_B32_e64_2:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[V_CMP_NE_U16_e64_]], implicit $exec
+ ; CHECK-NEXT: [[V_LSHLREV_B16_e32_2:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e32 1, [[V_CNDMASK_B32_e64_2]], implicit $exec
+ ; CHECK-NEXT: [[V_CMP_NE_U16_e64_3:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U16_e64 [[V_AND_B32_e32_15]], [[V_AND_B32_e32_7]], implicit $exec
+ ; CHECK-NEXT: [[V_CNDMASK_B32_e64_3:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[V_CMP_NE_U16_e64_3]], implicit $exec
+ ; CHECK-NEXT: [[V_OR_B32_e32_1:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_CNDMASK_B32_e64_3]], [[V_LSHLREV_B16_e32_2]], implicit $exec
+ ; CHECK-NEXT: [[V_AND_B32_e32_32:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 3, [[V_OR_B32_e32_1]], implicit $exec
+ ; CHECK-NEXT: [[V_OR_B32_e32_2:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_AND_B32_e32_32]], [[V_OR_B32_e32_]], implicit $exec
+ ; CHECK-NEXT: [[V_LSHLREV_B16_e32_3:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e32 12, [[V_OR_B32_e32_2]], implicit $exec
+ ; CHECK-NEXT: [[V_CMP_NE_U16_e64_4:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U16_e64 [[V_LSHRREV_B32_e32_29]], [[V_LSHRREV_B32_e32_13]], implicit $exec
+ ; CHECK-NEXT: [[V_CNDMASK_B32_e64_4:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[V_CMP_NE_U16_e64_4]], implicit $exec
+ ; CHECK-NEXT: [[V_LSHLREV_B16_e32_4:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e32 3, [[V_CNDMASK_B32_e64_4]], implicit $exec
+ ; CHECK-NEXT: [[V_CMP_NE_U16_e64_5:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U16_e64 [[V_AND_B32_e32_31]], [[V_AND_B32_e32_24]], implicit $exec
+ ; CHECK-NEXT: [[V_CNDMASK_B32_e64_5:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[V_CMP_NE_U16_e64_5]], implicit $exec
+ ; CHECK-NEXT: [[V_LSHLREV_B16_e32_5:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e32 2, [[V_CNDMASK_B32_e64_5]], implicit $exec
+ ; CHECK-NEXT: [[V_OR_B32_e32_3:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_LSHLREV_B16_e32_4]], [[V_LSHLREV_B16_e32_5]], implicit $exec
+ ; CHECK-NEXT: [[V_CMP_NE_U16_e64_6:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U16_e64 [[V_LSHRREV_B16_e32_14]], [[V_LSHRREV_B16_e32_6]], implicit $exec
+ ; CHECK-NEXT: [[V_CNDMASK_B32_e64_6:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[V_CMP_NE_U16_e64_6]], implicit $exec
+ ; CHECK-NEXT: [[V_LSHLREV_B16_e32_6:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e32 1, [[V_CNDMASK_B32_e64_6]], implicit $exec
+ ; CHECK-NEXT: [[V_CMP_NE_U16_e64_7:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U16_e64 [[V_AND_B32_e32_14]], [[V_AND_B32_e32_6]], implicit $exec
+ ; CHECK-NEXT: [[V_CNDMASK_B32_e64_7:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[V_CMP_NE_U16_e64_7]], implicit $exec
+ ; CHECK-NEXT: [[V_OR_B32_e32_4:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_CNDMASK_B32_e64_7]], [[V_LSHLREV_B16_e32_6]], implicit $exec
+ ; CHECK-NEXT: [[V_AND_B32_e32_33:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 3, [[V_OR_B32_e32_4]], implicit $exec
+ ; CHECK-NEXT: [[V_OR_B32_e32_5:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_AND_B32_e32_33]], [[V_OR_B32_e32_3]], implicit $exec
+ ; CHECK-NEXT: [[V_AND_B32_e32_34:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 15, [[V_OR_B32_e32_5]], implicit $exec
+ ; CHECK-NEXT: [[V_LSHLREV_B16_e32_7:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e32 8, [[V_AND_B32_e32_34]], implicit $exec
+ ; CHECK-NEXT: [[V_OR_B32_e32_6:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_LSHLREV_B16_e32_3]], [[V_LSHLREV_B16_e32_7]], implicit $exec
+ ; CHECK-NEXT: [[V_CMP_NE_U16_e64_8:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U16_e64 [[V_LSHRREV_B32_e32_27]], [[V_LSHRREV_B32_e32_11]], implicit $exec
+ ; CHECK-NEXT: [[V_CNDMASK_B32_e64_8:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[V_CMP_NE_U16_e64_8]], implicit $exec
+ ; CHECK-NEXT: [[V_LSHLREV_B16_e32_8:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e32 3, [[V_CNDMASK_B32_e64_8]], implicit $exec
+ ; CHECK-NEXT: [[V_CMP_NE_U16_e64_9:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U16_e64 [[V_AND_B32_e32_30]], [[V_AND_B32_e32_23]], implicit $exec
+ ; CHECK-NEXT: [[V_CNDMASK_B32_e64_9:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[V_CMP_NE_U16_e64_9]], implicit $exec
+ ; CHECK-NEXT: [[V_LSHLREV_B16_e32_9:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e32 2, [[V_CNDMASK_B32_e64_9]], implicit $exec
+ ; CHECK-NEXT: [[V_OR_B32_e32_7:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_LSHLREV_B16_e32_8]], [[V_LSHLREV_B16_e32_9]], implicit $exec
+ ; CHECK-NEXT: [[V_CMP_NE_U16_e64_10:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U16_e64 [[V_LSHRREV_B16_e32_13]], [[V_LSHRREV_B16_e32_5]], implicit $exec
+ ; CHECK-NEXT: [[V_CNDMASK_B32_e64_10:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[V_CMP_NE_U16_e64_10]], implicit $exec
+ ; CHECK-NEXT: [[V_LSHLREV_B16_e32_10:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e32 1, [[V_CNDMASK_B32_e64_10]], implicit $exec
+ ; CHECK-NEXT: [[V_CMP_NE_U16_e64_11:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U16_e64 [[V_AND_B32_e32_13]], [[V_AND_B32_e32_5]], implicit $exec
+ ; CHECK-NEXT: [[V_CNDMASK_B32_e64_11:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[V_CMP_NE_U16_e64_11]], implicit $exec
+ ; CHECK-NEXT: [[V_OR_B32_e32_8:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_CNDMASK_B32_e64_11]], [[V_LSHLREV_B16_e32_10]], implicit $exec
+ ; CHECK-NEXT: [[V_AND_B32_e32_35:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 3, [[V_OR_B32_e32_8]], implicit $exec
+ ; CHECK-NEXT: [[V_OR_B32_e32_9:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_AND_B32_e32_35]], [[V_OR_B32_e32_7]], implicit $exec
+ ; CHECK-NEXT: [[V_LSHLREV_B16_e32_11:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e32 4, [[V_OR_B32_e32_9]], implicit $exec
+ ; CHECK-NEXT: [[V_CMP_NE_U16_e64_12:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U16_e64 [[V_LSHRREV_B32_e32_25]], [[V_LSHRREV_B32_e32_9]], implicit $exec
+ ; CHECK-NEXT: [[V_CNDMASK_B32_e64_12:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[V_CMP_NE_U16_e64_12]], implicit $exec
+ ; CHECK-NEXT: [[V_LSHLREV_B16_e32_12:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e32 3, [[V_CNDMASK_B32_e64_12]], implicit $exec
+ ; CHECK-NEXT: [[V_CMP_NE_U16_e64_13:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U16_e64 [[V_AND_B32_e32_29]], [[V_AND_B32_e32_22]], implicit $exec
+ ; CHECK-NEXT: [[V_CNDMASK_B32_e64_13:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[V_CMP_NE_U16_e64_13]], implicit $exec
+ ; CHECK-NEXT: [[V_LSHLREV_B16_e32_13:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e32 2, [[V_CNDMASK_B32_e64_13]], implicit $exec
+ ; CHECK-NEXT: [[V_OR_B32_e32_10:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_LSHLREV_B16_e32_12]], [[V_LSHLREV_B16_e32_13]], implicit $exec
+ ; CHECK-NEXT: [[V_CMP_NE_U16_e64_14:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U16_e64 [[V_LSHRREV_B16_e32_12]], [[V_LSHRREV_B16_e32_4]], implicit $exec
+ ; CHECK-NEXT: [[V_CNDMASK_B32_e64_14:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[V_CMP_NE_U16_e64_14]], implicit $exec
+ ; CHECK-NEXT: [[V_LSHLREV_B16_e32_14:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e32 1, [[V_CNDMASK_B32_e64_14]], implicit $exec
+ ; CHECK-NEXT: [[V_CMP_NE_U16_e64_15:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U16_e64 [[V_AND_B32_e32_12]], [[V_AND_B32_e32_4]], implicit $exec
+ ; CHECK-NEXT: [[V_CNDMASK_B32_e64_15:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[V_CMP_NE_U16_e64_15]], implicit $exec
+ ; CHECK-NEXT: [[V_OR_B32_e32_11:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_CNDMASK_B32_e64_15]], [[V_LSHLREV_B16_e32_14]], implicit $exec
+ ; CHECK-NEXT: [[V_AND_B32_e32_36:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 3, [[V_OR_B32_e32_11]], implicit $exec
+ ; CHECK-NEXT: [[V_OR_B32_e32_12:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_AND_B32_e32_36]], [[V_OR_B32_e32_10]], implicit $exec
+ ; CHECK-NEXT: [[V_AND_B32_e32_37:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 15, [[V_OR_B32_e32_12]], implicit $exec
+ ; CHECK-NEXT: [[V_OR_B32_e32_13:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_AND_B32_e32_37]], [[V_LSHLREV_B16_e32_11]], implicit $exec
+ ; CHECK-NEXT: [[V_AND_B32_e32_38:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 255, [[V_OR_B32_e32_13]], implicit $exec
+ ; CHECK-NEXT: [[V_OR_B32_e32_14:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_AND_B32_e32_38]], [[V_OR_B32_e32_6]], implicit $exec
+ ; CHECK-NEXT: [[V_AND_B32_e32_39:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 65535, [[V_OR_B32_e32_14]], implicit $exec
+ ; CHECK-NEXT: [[V_CMP_NE_U16_e64_16:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U16_e64 [[V_LSHRREV_B32_e32_23]], [[V_LSHRREV_B32_e32_7]], implicit $exec
+ ; CHECK-NEXT: [[V_CNDMASK_B32_e64_16:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[V_CMP_NE_U16_e64_16]], implicit $exec
+ ; CHECK-NEXT: [[V_LSHLREV_B16_e32_15:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e32 3, [[V_CNDMASK_B32_e64_16]], implicit $exec
+ ; CHECK-NEXT: [[V_CMP_NE_U16_e64_17:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U16_e64 [[V_AND_B32_e32_28]], [[V_AND_B32_e32_21]], implicit $exec
+ ; CHECK-NEXT: [[V_CNDMASK_B32_e64_17:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[V_CMP_NE_U16_e64_17]], implicit $exec
+ ; CHECK-NEXT: [[V_LSHLREV_B16_e32_16:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e32 2, [[V_CNDMASK_B32_e64_17]], implicit $exec
+ ; CHECK-NEXT: [[V_OR_B32_e32_15:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_LSHLREV_B16_e32_15]], [[V_LSHLREV_B16_e32_16]], implicit $exec
+ ; CHECK-NEXT: [[V_CMP_NE_U16_e64_18:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U16_e64 [[V_LSHRREV_B16_e32_11]], [[V_LSHRREV_B16_e32_3]], implicit $exec
+ ; CHECK-NEXT: [[V_CNDMASK_B32_e64_18:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[V_CMP_NE_U16_e64_18]], implicit $exec
+ ; CHECK-NEXT: [[V_LSHLREV_B16_e32_17:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e32 1, [[V_CNDMASK_B32_e64_18]], implicit $exec
+ ; CHECK-NEXT: [[V_CMP_NE_U16_e64_19:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U16_e64 [[V_AND_B32_e32_11]], [[V_AND_B32_e32_3]], implicit $exec
+ ; CHECK-NEXT: [[V_CNDMASK_B32_e64_19:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[V_CMP_NE_U16_e64_19]], implicit $exec
+ ; CHECK-NEXT: [[V_OR_B32_e32_16:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_CNDMASK_B32_e64_19]], [[V_LSHLREV_B16_e32_17]], implicit $exec
+ ; CHECK-NEXT: [[V_AND_B32_e32_40:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 3, [[V_OR_B32_e32_16]], implicit $exec
+ ; CHECK-NEXT: [[V_OR_B32_e32_17:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_AND_B32_e32_40]], [[V_OR_B32_e32_15]], implicit $exec
+ ; CHECK-NEXT: [[V_LSHLREV_B16_e32_18:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e32 12, [[V_OR_B32_e32_17]], implicit $exec
+ ; CHECK-NEXT: [[V_CMP_NE_U16_e64_20:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U16_e64 [[V_LSHRREV_B32_e32_21]], [[V_LSHRREV_B32_e32_5]], implicit $exec
+ ; CHECK-NEXT: [[V_CNDMASK_B32_e64_20:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[V_CMP_NE_U16_e64_20]], implicit $exec
+ ; CHECK-NEXT: [[V_LSHLREV_B16_e32_19:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e32 3, [[V_CNDMASK_B32_e64_20]], implicit $exec
+ ; CHECK-NEXT: [[V_CMP_NE_U16_e64_21:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U16_e64 [[V_AND_B32_e32_27]], [[V_AND_B32_e32_20]], implicit $exec
+ ; CHECK-NEXT: [[V_CNDMASK_B32_e64_21:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[V_CMP_NE_U16_e64_21]], implicit $exec
+ ; CHECK-NEXT: [[V_LSHLREV_B16_e32_20:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e32 2, [[V_CNDMASK_B32_e64_21]], implicit $exec
+ ; CHECK-NEXT: [[V_OR_B32_e32_18:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_LSHLREV_B16_e32_19]], [[V_LSHLREV_B16_e32_20]], implicit $exec
+ ; CHECK-NEXT: [[V_CMP_NE_U16_e64_22:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U16_e64 [[V_LSHRREV_B16_e32_10]], [[V_LSHRREV_B16_e32_2]], implicit $exec
+ ; CHECK-NEXT: [[V_CNDMASK_B32_e64_22:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[V_CMP_NE_U16_e64_22]], implicit $exec
+ ; CHECK-NEXT: [[V_LSHLREV_B16_e32_21:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e32 1, [[V_CNDMASK_B32_e64_22]], implicit $exec
+ ; CHECK-NEXT: [[V_CMP_NE_U16_e64_23:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U16_e64 [[V_AND_B32_e32_10]], [[V_AND_B32_e32_2]], implicit $exec
+ ; CHECK-NEXT: [[V_CNDMASK_B32_e64_23:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[V_CMP_NE_U16_e64_23]], implicit $exec
+ ; CHECK-NEXT: [[V_OR_B32_e32_19:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_CNDMASK_B32_e64_23]], [[V_LSHLREV_B16_e32_21]], implicit $exec
+ ; CHECK-NEXT: [[V_AND_B32_e32_41:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 3, [[V_OR_B32_e32_19]], implicit $exec
+ ; CHECK-NEXT: [[V_OR_B32_e32_20:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_AND_B32_e32_41]], [[V_OR_B32_e32_18]], implicit $exec
+ ; CHECK-NEXT: [[V_AND_B32_e32_42:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 15, [[V_OR_B32_e32_20]], implicit $exec
+ ; CHECK-NEXT: [[V_LSHLREV_B16_e32_22:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e32 8, [[V_AND_B32_e32_42]], implicit $exec
+ ; CHECK-NEXT: [[V_OR_B32_e32_21:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_LSHLREV_B16_e32_18]], [[V_LSHLREV_B16_e32_22]], implicit $exec
+ ; CHECK-NEXT: [[V_CMP_NE_U16_e64_24:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U16_e64 [[V_LSHRREV_B32_e32_19]], [[V_LSHRREV_B32_e32_3]], implicit $exec
+ ; CHECK-NEXT: [[V_CNDMASK_B32_e64_24:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[V_CMP_NE_U16_e64_24]], implicit $exec
+ ; CHECK-NEXT: [[V_LSHLREV_B16_e32_23:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e32 3, [[V_CNDMASK_B32_e64_24]], implicit $exec
+ ; CHECK-NEXT: [[V_CMP_NE_U16_e64_25:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U16_e64 [[V_AND_B32_e32_26]], [[V_AND_B32_e32_19]], implicit $exec
+ ; CHECK-NEXT: [[V_CNDMASK_B32_e64_25:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[V_CMP_NE_U16_e64_25]], implicit $exec
+ ; CHECK-NEXT: [[V_LSHLREV_B16_e32_24:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e32 2, [[V_CNDMASK_B32_e64_25]], implicit $exec
+ ; CHECK-NEXT: [[V_OR_B32_e32_22:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_LSHLREV_B16_e32_23]], [[V_LSHLREV_B16_e32_24]], implicit $exec
+ ; CHECK-NEXT: [[V_CMP_NE_U16_e64_26:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U16_e64 [[V_LSHRREV_B16_e32_9]], [[V_LSHRREV_B16_e32_1]], implicit $exec
+ ; CHECK-NEXT: [[V_CNDMASK_B32_e64_26:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[V_CMP_NE_U16_e64_26]], implicit $exec
+ ; CHECK-NEXT: [[V_LSHLREV_B16_e32_25:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e32 1, [[V_CNDMASK_B32_e64_26]], implicit $exec
+ ; CHECK-NEXT: [[V_CMP_NE_U16_e64_27:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U16_e64 [[V_AND_B32_e32_9]], [[V_AND_B32_e32_1]], implicit $exec
+ ; CHECK-NEXT: [[V_CNDMASK_B32_e64_27:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[V_CMP_NE_U16_e64_27]], implicit $exec
+ ; CHECK-NEXT: [[V_OR_B32_e32_23:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_CNDMASK_B32_e64_27]], [[V_LSHLREV_B16_e32_25]], implicit $exec
+ ; CHECK-NEXT: [[V_AND_B32_e32_43:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 3, [[V_OR_B32_e32_23]], implicit $exec
+ ; CHECK-NEXT: [[V_OR_B32_e32_24:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_AND_B32_e32_43]], [[V_OR_B32_e32_22]], implicit $exec
+ ; CHECK-NEXT: [[V_LSHLREV_B16_e32_26:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e32 4, [[V_OR_B32_e32_24]], implicit $exec
+ ; CHECK-NEXT: [[V_CMP_NE_U16_e64_28:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U16_e64 [[V_LSHRREV_B32_e32_17]], [[V_LSHRREV_B32_e32_1]], implicit $exec
+ ; CHECK-NEXT: [[V_CNDMASK_B32_e64_28:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[V_CMP_NE_U16_e64_28]], implicit $exec
+ ; CHECK-NEXT: [[V_LSHLREV_B16_e32_27:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e32 3, [[V_CNDMASK_B32_e64_28]], implicit $exec
+ ; CHECK-NEXT: [[V_CMP_NE_U16_e64_29:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U16_e64 [[V_AND_B32_e32_25]], [[V_AND_B32_e32_18]], implicit $exec
+ ; CHECK-NEXT: [[V_CNDMASK_B32_e64_29:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[V_CMP_NE_U16_e64_29]], implicit $exec
+ ; CHECK-NEXT: [[V_LSHLREV_B16_e32_28:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e32 2, [[V_CNDMASK_B32_e64_29]], implicit $exec
+ ; CHECK-NEXT: [[V_OR_B32_e32_25:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_LSHLREV_B16_e32_27]], [[V_LSHLREV_B16_e32_28]], implicit $exec
+ ; CHECK-NEXT: [[V_CMP_NE_U16_e64_30:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U16_e64 [[V_LSHRREV_B16_e32_8]], [[V_LSHRREV_B16_e32_]], implicit $exec
+ ; CHECK-NEXT: [[V_CNDMASK_B32_e64_30:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[V_CMP_NE_U16_e64_30]], implicit $exec
+ ; CHECK-NEXT: [[V_LSHLREV_B16_e32_29:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e32 1, [[V_CNDMASK_B32_e64_30]], implicit $exec
+ ; CHECK-NEXT: [[V_CMP_NE_U16_e64_31:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U16_e64 [[V_AND_B32_e32_8]], [[V_AND_B32_e32_]], implicit $exec
+ ; CHECK-NEXT: [[V_CNDMASK_B32_e64_31:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[V_CMP_NE_U16_e64_31]], implicit $exec
+ ; CHECK-NEXT: [[V_OR_B32_e32_26:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_CNDMASK_B32_e64_31]], [[V_LSHLREV_B16_e32_29]], implicit $exec
+ ; CHECK-NEXT: [[V_AND_B32_e32_44:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 3, [[V_OR_B32_e32_26]], implicit $exec
+ ; CHECK-NEXT: [[V_OR_B32_e32_27:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_AND_B32_e32_44]], [[V_OR_B32_e32_25]], implicit $exec
+ ; CHECK-NEXT: [[V_AND_B32_e32_45:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 15, [[V_OR_B32_e32_27]], implicit $exec
+ ; CHECK-NEXT: [[V_OR_B32_e32_28:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_AND_B32_e32_45]], [[V_LSHLREV_B16_e32_26]], implicit $exec
+ ; CHECK-NEXT: [[V_AND_B32_e32_46:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 255, [[V_OR_B32_e32_28]], implicit $exec
+ ; CHECK-NEXT: [[V_OR_B32_e32_29:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_AND_B32_e32_46]], [[V_OR_B32_e32_21]], implicit $exec
+ ; CHECK-NEXT: [[V_LSHLREV_B32_e32_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e32 16, [[V_OR_B32_e32_29]], implicit $exec
+ ; CHECK-NEXT: [[V_OR_B32_e32_30:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_AND_B32_e32_39]], [[V_LSHLREV_B32_e32_]], implicit $exec
+ ; CHECK-NEXT: FLAT_STORE_DWORD undef %318:vreg_64_align2, [[V_OR_B32_e32_30]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `ptr poison`)
+ ; CHECK-NEXT: S_ENDPGM 0
+ %9:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @G.1, target-flags(amdgpu-gotprel32-hi) @G.1, implicit-def dead $scc
+ %10:sreg_64_xexec = S_LOAD_DWORDX2_IMM %9, 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4)
+ %12:vreg_64_align2 = COPY %10
+ %11:vreg_128_align2 = FLAT_LOAD_DWORDX4 %12, 16, 0, implicit $exec, implicit $flat_scr :: (dereferenceable load (s128) from @G.1 + 16)
+ %16:vgpr_32 = V_AND_B32_e32 255, %11.sub0, implicit $exec
+ %18:vgpr_32 = V_LSHRREV_B16_e32 8, %11.sub0, implicit $exec
+ %20:vgpr_32 = V_LSHRREV_B32_e32 16, %11.sub0, implicit $exec
+ %22:vgpr_32 = V_AND_B32_e32 255, %20, implicit $exec
+ %24:vgpr_32 = V_LSHRREV_B32_e32 24, %11.sub0, implicit $exec
+ %28:vgpr_32 = V_AND_B32_e32 255, %11.sub1, implicit $exec
+ %29:vgpr_32 = V_LSHRREV_B16_e32 8, %11.sub1, implicit $exec
+ %30:vgpr_32 = V_LSHRREV_B32_e32 16, %11.sub1, implicit $exec
+ %32:vgpr_32 = V_AND_B32_e32 255, %30, implicit $exec
+ %33:vgpr_32 = V_LSHRREV_B32_e32 24, %11.sub1, implicit $exec
+ %37:vgpr_32 = V_AND_B32_e32 255, %11.sub2, implicit $exec
+ %38:vgpr_32 = V_LSHRREV_B16_e32 8, %11.sub2, implicit $exec
+ %39:vgpr_32 = V_LSHRREV_B32_e32 16, %11.sub2, implicit $exec
+ %41:vgpr_32 = V_AND_B32_e32 255, %39, implicit $exec
+ %42:vgpr_32 = V_LSHRREV_B32_e32 24, %11.sub2, implicit $exec
+ %46:vgpr_32 = V_AND_B32_e32 255, %11.sub3, implicit $exec
+ %47:vgpr_32 = V_LSHRREV_B16_e32 8, %11.sub3, implicit $exec
+ %48:vgpr_32 = V_LSHRREV_B32_e32 16, %11.sub3, implicit $exec
+ %50:vgpr_32 = V_AND_B32_e32 255, %48, implicit $exec
+ %51:vgpr_32 = V_LSHRREV_B32_e32 24, %11.sub3, implicit $exec
+ %53:vreg_128_align2 = FLAT_LOAD_DWORDX4 %12, 0, 0, implicit $exec, implicit $flat_scr :: (dereferenceable load (s128) from @G.1, align 32)
+ %57:vgpr_32 = V_AND_B32_e32 255, %53.sub0, implicit $exec
+ %58:vgpr_32 = V_LSHRREV_B16_e32 8, %53.sub0, implicit $exec
+ %59:vgpr_32 = V_LSHRREV_B32_e32 16, %53.sub0, implicit $exec
+ %61:vgpr_32 = V_AND_B32_e32 255, %59, implicit $exec
+ %62:vgpr_32 = V_LSHRREV_B32_e32 24, %53.sub0, implicit $exec
+ %66:vgpr_32 = V_AND_B32_e32 255, %53.sub1, implicit $exec
+ %67:vgpr_32 = V_LSHRREV_B16_e32 8, %53.sub1, implicit $exec
+ %68:vgpr_32 = V_LSHRREV_B32_e32 16, %53.sub1, implicit $exec
+ %70:vgpr_32 = V_AND_B32_e32 255, %68, implicit $exec
+ %71:vgpr_32 = V_LSHRREV_B32_e32 24, %53.sub1, implicit $exec
+ %75:vgpr_32 = V_AND_B32_e32 255, %53.sub2, implicit $exec
+ %76:vgpr_32 = V_LSHRREV_B16_e32 8, %53.sub2, implicit $exec
+ %77:vgpr_32 = V_LSHRREV_B32_e32 16, %53.sub2, implicit $exec
+ %79:vgpr_32 = V_AND_B32_e32 255, %77, implicit $exec
+ %80:vgpr_32 = V_LSHRREV_B32_e32 24, %53.sub2, implicit $exec
+ %84:vgpr_32 = V_AND_B32_e32 255, %53.sub3, implicit $exec
+ %85:vgpr_32 = V_LSHRREV_B16_e32 8, %53.sub3, implicit $exec
+ %86:vgpr_32 = V_LSHRREV_B32_e32 16, %53.sub3, implicit $exec
+ %88:vgpr_32 = V_AND_B32_e32 255, %86, implicit $exec
+ %89:vgpr_32 = V_LSHRREV_B32_e32 24, %53.sub3, implicit $exec
+ %91:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @G, target-flags(amdgpu-gotprel32-hi) @G, implicit-def dead $scc
+ %92:sreg_64_xexec = S_LOAD_DWORDX2_IMM %91, 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4)
+ %94:vreg_64_align2 = COPY %92
+ %93:vreg_128_align2 = FLAT_LOAD_DWORDX4 %94, 16, 0, implicit $exec, implicit $flat_scr :: (dereferenceable load (s128) from @G + 16)
+ %97:vgpr_32 = V_AND_B32_e32 255, %93.sub0, implicit $exec
+ %98:vgpr_32 = V_LSHRREV_B16_e32 8, %93.sub0, implicit $exec
+ %99:vgpr_32 = V_LSHRREV_B32_e32 16, %93.sub0, implicit $exec
+ %101:vgpr_32 = V_AND_B32_e32 255, %99, implicit $exec
+ %102:vgpr_32 = V_LSHRREV_B32_e32 24, %93.sub0, implicit $exec
+ %106:vgpr_32 = V_AND_B32_e32 255, %93.sub1, implicit $exec
+ %107:vgpr_32 = V_LSHRREV_B16_e32 8, %93.sub1, implicit $exec
+ %108:vgpr_32 = V_LSHRREV_B32_e32 16, %93.sub1, implicit $exec
+ %110:vgpr_32 = V_AND_B32_e32 255, %108, implicit $exec
+ %111:vgpr_32 = V_LSHRREV_B32_e32 24, %93.sub1, implicit $exec
+ %115:vgpr_32 = V_AND_B32_e32 255, %93.sub2, implicit $exec
+ %116:vgpr_32 = V_LSHRREV_B16_e32 8, %93.sub2, implicit $exec
+ %117:vgpr_32 = V_LSHRREV_B32_e32 16, %93.sub2, implicit $exec
+ $m0 = S_MOV_B32 0
+ %119:vgpr_32 = V_AND_B32_e32 255, %117, implicit $exec
+ %120:vgpr_32 = V_LSHRREV_B32_e32 24, %93.sub2, implicit $exec
+ %124:vgpr_32 = V_AND_B32_e32 255, %93.sub3, implicit $exec
+ %125:vgpr_32 = V_LSHRREV_B16_e32 8, %93.sub3, implicit $exec
+ %126:vgpr_32 = V_LSHRREV_B32_e32 16, %93.sub3, implicit $exec
+ %128:vgpr_32 = V_AND_B32_e32 255, %126, implicit $exec
+ %129:vgpr_32 = V_LSHRREV_B32_e32 24, %93.sub3, implicit $exec
+ %131:vreg_128_align2 = FLAT_LOAD_DWORDX4 %94, 0, 0, implicit $exec, implicit $flat_scr :: (dereferenceable load (s128) from @G, align 32)
+ %135:vgpr_32 = V_AND_B32_e32 255, %131.sub0, implicit $exec
+ %136:vgpr_32 = V_LSHRREV_B16_e32 8, %131.sub0, implicit $exec
+ %137:vgpr_32 = V_LSHRREV_B32_e32 16, %131.sub0, implicit $exec
+ %139:vgpr_32 = V_AND_B32_e32 255, %137, implicit $exec
+ %140:vgpr_32 = V_LSHRREV_B32_e32 24, %131.sub0, implicit $exec
+ %144:vgpr_32 = V_AND_B32_e32 255, %131.sub1, implicit $exec
+ %145:vgpr_32 = V_LSHRREV_B16_e32 8, %131.sub1, implicit $exec
+ %146:vgpr_32 = V_LSHRREV_B32_e32 16, %131.sub1, implicit $exec
+ %148:vgpr_32 = V_AND_B32_e32 255, %146, implicit $exec
+ %149:vgpr_32 = V_LSHRREV_B32_e32 24, %131.sub1, implicit $exec
+ %153:vgpr_32 = V_AND_B32_e32 255, %131.sub2, implicit $exec
+ %154:vgpr_32 = V_LSHRREV_B16_e32 8, %131.sub2, implicit $exec
+ %155:vgpr_32 = V_LSHRREV_B32_e32 16, %131.sub2, implicit $exec
+ %157:vgpr_32 = V_AND_B32_e32 255, %155, implicit $exec
+ %158:vgpr_32 = V_LSHRREV_B32_e32 24, %131.sub2, implicit $exec
+ %162:vgpr_32 = V_AND_B32_e32 255, %131.sub3, implicit $exec
+ %163:vgpr_32 = V_LSHRREV_B16_e32 8, %131.sub3, implicit $exec
+ %164:vgpr_32 = V_LSHRREV_B32_e32 16, %131.sub3, implicit $exec
+ %166:vgpr_32 = V_AND_B32_e32 255, %164, implicit $exec
+ %167:vgpr_32 = V_LSHRREV_B32_e32 24, %131.sub3, implicit $exec
+ BUNDLE implicit $m0, implicit $exec {
+ DS_GWS_SEMA_V 0, implicit $m0, implicit $exec :: (store (s32) into custom "GWSResource")
+ S_WAITCNT 0
+ }
+ %169:sreg_64_xexec = V_CMP_NE_U16_e64 %167, %89, implicit $exec
+ %170:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %169, implicit $exec
+ %172:vgpr_32 = V_LSHLREV_B16_e32 3, %170, implicit $exec
+ %173:sreg_64_xexec = V_CMP_NE_U16_e64 %166, %88, implicit $exec
+ %174:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %173, implicit $exec
+ %176:vgpr_32 = V_LSHLREV_B16_e32 2, %174, implicit $exec
+ %177:vgpr_32 = V_OR_B32_e32 %172, %176, implicit $exec
+ %178:sreg_64_xexec = V_CMP_NE_U16_e64 %163, %85, implicit $exec
+ %179:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %178, implicit $exec
+ %181:vgpr_32 = V_LSHLREV_B16_e32 1, %179, implicit $exec
+ %182:sreg_64_xexec = V_CMP_NE_U16_e64 %162, %84, implicit $exec
+ %183:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %182, implicit $exec
+ %184:vgpr_32 = V_OR_B32_e32 %183, %181, implicit $exec
+ %185:vgpr_32 = V_AND_B32_e32 3, %184, implicit $exec
+ %186:vgpr_32 = V_OR_B32_e32 %185, %177, implicit $exec
+ %188:vgpr_32 = V_LSHLREV_B16_e32 12, %186, implicit $exec
+ %189:sreg_64_xexec = V_CMP_NE_U16_e64 %158, %80, implicit $exec
+ %190:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %189, implicit $exec
+ %191:vgpr_32 = V_LSHLREV_B16_e32 3, %190, implicit $exec
+ %192:sreg_64_xexec = V_CMP_NE_U16_e64 %157, %79, implicit $exec
+ %193:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %192, implicit $exec
+ %194:vgpr_32 = V_LSHLREV_B16_e32 2, %193, implicit $exec
+ %195:vgpr_32 = V_OR_B32_e32 %191, %194, implicit $exec
+ %196:sreg_64_xexec = V_CMP_NE_U16_e64 %154, %76, implicit $exec
+ %197:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %196, implicit $exec
+ %198:vgpr_32 = V_LSHLREV_B16_e32 1, %197, implicit $exec
+ %199:sreg_64_xexec = V_CMP_NE_U16_e64 %153, %75, implicit $exec
+ %200:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %199, implicit $exec
+ %201:vgpr_32 = V_OR_B32_e32 %200, %198, implicit $exec
+ %202:vgpr_32 = V_AND_B32_e32 3, %201, implicit $exec
+ %203:vgpr_32 = V_OR_B32_e32 %202, %195, implicit $exec
+ %205:vgpr_32 = V_AND_B32_e32 15, %203, implicit $exec
+ %206:vgpr_32 = V_LSHLREV_B16_e32 8, %205, implicit $exec
+ %207:vgpr_32 = V_OR_B32_e32 %188, %206, implicit $exec
+ %208:sreg_64_xexec = V_CMP_NE_U16_e64 %149, %71, implicit $exec
+ %209:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %208, implicit $exec
+ %210:vgpr_32 = V_LSHLREV_B16_e32 3, %209, implicit $exec
+ %211:sreg_64_xexec = V_CMP_NE_U16_e64 %148, %70, implicit $exec
+ %212:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %211, implicit $exec
+ %213:vgpr_32 = V_LSHLREV_B16_e32 2, %212, implicit $exec
+ %214:vgpr_32 = V_OR_B32_e32 %210, %213, implicit $exec
+ %215:sreg_64_xexec = V_CMP_NE_U16_e64 %145, %67, implicit $exec
+ %216:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %215, implicit $exec
+ %217:vgpr_32 = V_LSHLREV_B16_e32 1, %216, implicit $exec
+ %218:sreg_64_xexec = V_CMP_NE_U16_e64 %144, %66, implicit $exec
+ %219:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %218, implicit $exec
+ %220:vgpr_32 = V_OR_B32_e32 %219, %217, implicit $exec
+ %221:vgpr_32 = V_AND_B32_e32 3, %220, implicit $exec
+ %222:vgpr_32 = V_OR_B32_e32 %221, %214, implicit $exec
+ %224:vgpr_32 = V_LSHLREV_B16_e32 4, %222, implicit $exec
+ %225:sreg_64_xexec = V_CMP_NE_U16_e64 %140, %62, implicit $exec
+ %226:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %225, implicit $exec
+ %227:vgpr_32 = V_LSHLREV_B16_e32 3, %226, implicit $exec
+ %228:sreg_64_xexec = V_CMP_NE_U16_e64 %139, %61, implicit $exec
+ %229:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %228, implicit $exec
+ %230:vgpr_32 = V_LSHLREV_B16_e32 2, %229, implicit $exec
+ %231:vgpr_32 = V_OR_B32_e32 %227, %230, implicit $exec
+ %232:sreg_64_xexec = V_CMP_NE_U16_e64 %136, %58, implicit $exec
+ %233:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %232, implicit $exec
+ %234:vgpr_32 = V_LSHLREV_B16_e32 1, %233, implicit $exec
+ %235:sreg_64_xexec = V_CMP_NE_U16_e64 %135, %57, implicit $exec
+ %236:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %235, implicit $exec
+ %237:vgpr_32 = V_OR_B32_e32 %236, %234, implicit $exec
+ %238:vgpr_32 = V_AND_B32_e32 3, %237, implicit $exec
+ %239:vgpr_32 = V_OR_B32_e32 %238, %231, implicit $exec
+ %240:vgpr_32 = V_AND_B32_e32 15, %239, implicit $exec
+ %241:vgpr_32 = V_OR_B32_e32 %240, %224, implicit $exec
+ %242:vgpr_32 = V_AND_B32_e32 255, %241, implicit $exec
+ %243:vgpr_32 = V_OR_B32_e32 %242, %207, implicit $exec
+ %245:vgpr_32 = V_AND_B32_e32 65535, %243, implicit $exec
+ %246:sreg_64_xexec = V_CMP_NE_U16_e64 %129, %51, implicit $exec
+ %247:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %246, implicit $exec
+ %248:vgpr_32 = V_LSHLREV_B16_e32 3, %247, implicit $exec
+ %249:sreg_64_xexec = V_CMP_NE_U16_e64 %128, %50, implicit $exec
+ %250:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %249, implicit $exec
+ %251:vgpr_32 = V_LSHLREV_B16_e32 2, %250, implicit $exec
+ %252:vgpr_32 = V_OR_B32_e32 %248, %251, implicit $exec
+ %253:sreg_64_xexec = V_CMP_NE_U16_e64 %125, %47, implicit $exec
+ %254:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %253, implicit $exec
+ %255:vgpr_32 = V_LSHLREV_B16_e32 1, %254, implicit $exec
+ %256:sreg_64_xexec = V_CMP_NE_U16_e64 %124, %46, implicit $exec
+ %257:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %256, implicit $exec
+ %258:vgpr_32 = V_OR_B32_e32 %257, %255, implicit $exec
+ %259:vgpr_32 = V_AND_B32_e32 3, %258, implicit $exec
+ %260:vgpr_32 = V_OR_B32_e32 %259, %252, implicit $exec
+ %261:vgpr_32 = V_LSHLREV_B16_e32 12, %260, implicit $exec
+ %262:sreg_64_xexec = V_CMP_NE_U16_e64 %120, %42, implicit $exec
+ %263:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %262, implicit $exec
+ %264:vgpr_32 = V_LSHLREV_B16_e32 3, %263, implicit $exec
+ %265:sreg_64_xexec = V_CMP_NE_U16_e64 %119, %41, implicit $exec
+ %266:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %265, implicit $exec
+ %267:vgpr_32 = V_LSHLREV_B16_e32 2, %266, implicit $exec
+ %268:vgpr_32 = V_OR_B32_e32 %264, %267, implicit $exec
+ %269:sreg_64_xexec = V_CMP_NE_U16_e64 %116, %38, implicit $exec
+ %270:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %269, implicit $exec
+ %271:vgpr_32 = V_LSHLREV_B16_e32 1, %270, implicit $exec
+ %272:sreg_64_xexec = V_CMP_NE_U16_e64 %115, %37, implicit $exec
+ %273:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %272, implicit $exec
+ %274:vgpr_32 = V_OR_B32_e32 %273, %271, implicit $exec
+ %275:vgpr_32 = V_AND_B32_e32 3, %274, implicit $exec
+ %276:vgpr_32 = V_OR_B32_e32 %275, %268, implicit $exec
+ %277:vgpr_32 = V_AND_B32_e32 15, %276, implicit $exec
+ %278:vgpr_32 = V_LSHLREV_B16_e32 8, %277, implicit $exec
+ %279:vgpr_32 = V_OR_B32_e32 %261, %278, implicit $exec
+ %280:sreg_64_xexec = V_CMP_NE_U16_e64 %111, %33, implicit $exec
+ %281:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %280, implicit $exec
+ %282:vgpr_32 = V_LSHLREV_B16_e32 3, %281, implicit $exec
+ %283:sreg_64_xexec = V_CMP_NE_U16_e64 %110, %32, implicit $exec
+ %284:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %283, implicit $exec
+ %285:vgpr_32 = V_LSHLREV_B16_e32 2, %284, implicit $exec
+ %286:vgpr_32 = V_OR_B32_e32 %282, %285, implicit $exec
+ %287:sreg_64_xexec = V_CMP_NE_U16_e64 %107, %29, implicit $exec
+ %288:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %287, implicit $exec
+ %289:vgpr_32 = V_LSHLREV_B16_e32 1, %288, implicit $exec
+ %290:sreg_64_xexec = V_CMP_NE_U16_e64 %106, %28, implicit $exec
+ %291:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %290, implicit $exec
+ %292:vgpr_32 = V_OR_B32_e32 %291, %289, implicit $exec
+ %293:vgpr_32 = V_AND_B32_e32 3, %292, implicit $exec
+ %294:vgpr_32 = V_OR_B32_e32 %293, %286, implicit $exec
+ %295:vgpr_32 = V_LSHLREV_B16_e32 4, %294, implicit $exec
+ %296:sreg_64_xexec = V_CMP_NE_U16_e64 %102, %24, implicit $exec
+ %297:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %296, implicit $exec
+ %298:vgpr_32 = V_LSHLREV_B16_e32 3, %297, implicit $exec
+ %299:sreg_64_xexec = V_CMP_NE_U16_e64 %101, %22, implicit $exec
+ %300:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %299, implicit $exec
+ %301:vgpr_32 = V_LSHLREV_B16_e32 2, %300, implicit $exec
+ %302:vgpr_32 = V_OR_B32_e32 %298, %301, implicit $exec
+ %303:sreg_64_xexec = V_CMP_NE_U16_e64 %98, %18, implicit $exec
+ %304:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %303, implicit $exec
+ %305:vgpr_32 = V_LSHLREV_B16_e32 1, %304, implicit $exec
+ %306:sreg_64_xexec = V_CMP_NE_U16_e64 %97, %16, implicit $exec
+ %307:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %306, implicit $exec
+ %308:vgpr_32 = V_OR_B32_e32 %307, %305, implicit $exec
+ %309:vgpr_32 = V_AND_B32_e32 3, %308, implicit $exec
+ %310:vgpr_32 = V_OR_B32_e32 %309, %302, implicit $exec
+ %311:vgpr_32 = V_AND_B32_e32 15, %310, implicit $exec
+ %312:vgpr_32 = V_OR_B32_e32 %311, %295, implicit $exec
+ %313:vgpr_32 = V_AND_B32_e32 255, %312, implicit $exec
+ %314:vgpr_32 = V_OR_B32_e32 %313, %279, implicit $exec
+ %315:vgpr_32 = V_LSHLREV_B32_e32 16, %314, implicit $exec
+ %316:vgpr_32 = V_OR_B32_e32 %245, %315, implicit $exec
+ FLAT_STORE_DWORD undef %318:vreg_64_align2, %316, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `ptr poison`)
+ S_ENDPGM 0
+
+...
diff --git a/llvm/test/CodeGen/AMDGPU/sema-v-unsched-bundle.ll b/llvm/test/CodeGen/AMDGPU/sema-v-unsched-bundle.ll
new file mode 100644
index 0000000000000..e2b5b85257dac
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/sema-v-unsched-bundle.ll
@@ -0,0 +1,24 @@
+; RUN: llc -mtriple=amdgcn -O1 -mcpu=gfx90a -stop-after=machine-scheduler < %s | FileCheck %s
+
+; CHECK: BUNDLE implicit $m0, implicit $exec {
+; CHECK-NEXT: DS_GWS_SEMA_V 0, implicit $m0, implicit $exec :: (store (s32) into custom "GWSResource")
+; CHECK-NEXT: S_WAITCNT 0
+; CHECK-NEXT: }
+
+ at G = global <32 x i8> splat (i8 1)
+ at G.1 = global <32 x i8> splat (i8 127)
+
+define amdgpu_kernel void @gws_sema_v_offset0(i32 %val) #0 {
+ %LGV1 = load <32 x i8>, ptr @G.1, align 32
+ %LGV = load <32 x i8>, ptr @G, align 32
+ call void @llvm.amdgcn.ds.gws.sema.v(i32 0)
+ %C = icmp ne <32 x i8> %LGV, %LGV1
+ store <32 x i1> %C, ptr poison, align 4
+ ret void
+}
+
+declare void @llvm.amdgcn.ds.gws.sema.v(i32) #1
+
+attributes #0 = { convergent nounwind memory(inaccessiblemem: readwrite) }
+attributes #1 = { convergent nocallback nofree nounwind willreturn memory(inaccessiblemem: readwrite) }
+
More information about the llvm-commits
mailing list