[PATCH] D109889: AMDGPU: Broadcast scalar boolean to vector boolean explicitly
Ruiling, Song via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 29 07:28:14 PDT 2021
ruiling updated this revision to Diff 375882.
ruiling added a comment.
remember wave-size and reuse
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D109889/new/
https://reviews.llvm.org/D109889
Files:
llvm/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/test/CodeGen/AMDGPU/expand-scalar-carry-out-select-user.ll
Index: llvm/test/CodeGen/AMDGPU/expand-scalar-carry-out-select-user.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/expand-scalar-carry-out-select-user.ll
+++ llvm/test/CodeGen/AMDGPU/expand-scalar-carry-out-select-user.ll
@@ -14,7 +14,7 @@
; GFX7-NEXT: s_or_b32 s4, s4, s5
; GFX7-NEXT: s_cmp_lg_u32 s4, 0
; GFX7-NEXT: s_addc_u32 s4, s6, 0
-; GFX7-NEXT: s_cselect_b64 vcc, 1, 0
+; GFX7-NEXT: s_cselect_b64 vcc, -1, 0
; GFX7-NEXT: v_mov_b32_e32 v1, s4
; GFX7-NEXT: s_cmp_gt_u32 s6, 31
; GFX7-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
@@ -31,7 +31,7 @@
; GFX9-NEXT: v_add_co_u32_e64 v0, s[4:5], s6, s6
; GFX9-NEXT: s_cmp_lg_u64 s[4:5], 0
; GFX9-NEXT: s_addc_u32 s4, s6, 0
-; GFX9-NEXT: s_cselect_b64 vcc, 1, 0
+; GFX9-NEXT: s_cselect_b64 vcc, -1, 0
; GFX9-NEXT: v_mov_b32_e32 v1, s4
; GFX9-NEXT: s_cmp_gt_u32 s6, 31
; GFX9-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
@@ -49,7 +49,7 @@
; GFX10-NEXT: v_add_co_u32 v0, s5, s4, s4
; GFX10-NEXT: s_cmpk_lg_u32 s5, 0x0
; GFX10-NEXT: s_addc_u32 s5, s4, 0
-; GFX10-NEXT: s_cselect_b32 s6, 1, 0
+; GFX10-NEXT: s_cselect_b32 s6, -1, 0
; GFX10-NEXT: s_cmp_gt_u32 s4, 31
; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, s5, s6
; GFX10-NEXT: s_cselect_b32 vcc_lo, -1, 0
Index: llvm/lib/Target/AMDGPU/SIISelLowering.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -4118,6 +4118,7 @@
unsigned Opc = (MI.getOpcode() == AMDGPU::S_ADD_CO_PSEUDO)
? AMDGPU::S_ADDC_U32
: AMDGPU::S_SUBB_U32;
+
if (Src0.isReg() && TRI->isVectorRegister(MRI, Src0.getReg())) {
Register RegOp0 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
BuildMI(*BB, MII, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), RegOp0)
@@ -4138,7 +4139,10 @@
}
const TargetRegisterClass *Src2RC = MRI.getRegClass(Src2.getReg());
- if (TRI->getRegSizeInBits(*Src2RC) == 64) {
+ unsigned WaveSize = TRI->getRegSizeInBits(*Src2RC);
+ assert(WaveSize == 64 || WaveSize == 32);
+
+ if (WaveSize == 64) {
if (ST.hasScalarCompareEq64()) {
BuildMI(*BB, MII, DL, TII->get(AMDGPU::S_CMP_LG_U64))
.addReg(Src2.getReg())
@@ -4168,8 +4172,13 @@
BuildMI(*BB, MII, DL, TII->get(Opc), Dest.getReg()).add(Src0).add(Src1);
- BuildMI(*BB, MII, DL, TII->get(AMDGPU::COPY), CarryDest.getReg())
- .addReg(AMDGPU::SCC);
+ unsigned SelOpc =
+ (WaveSize == 64) ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;
+
+ BuildMI(*BB, MII, DL, TII->get(SelOpc), CarryDest.getReg())
+ .addImm(-1)
+ .addImm(0);
+
MI.eraseFromParent();
return BB;
}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D109889.375882.patch
Type: text/x-patch
Size: 2829 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20210929/65ded6e2/attachment.bin>
More information about the llvm-commits
mailing list