[PATCH] D109889: AMDGPU: Broadcast scalar boolean to vector boolean explicitly

Ruiling, Song via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Wed Sep 29 19:19:08 PDT 2021


This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG52785989e95d: AMDGPU: Broadcast scalar boolean to vector boolean explicitly (authored by ruiling).

Changed prior to commit:
  https://reviews.llvm.org/D109889?vs=375882&id=376090#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D109889/new/

https://reviews.llvm.org/D109889

Files:
  llvm/lib/Target/AMDGPU/SIISelLowering.cpp
  llvm/test/CodeGen/AMDGPU/expand-scalar-carry-out-select-user.ll


Index: llvm/test/CodeGen/AMDGPU/expand-scalar-carry-out-select-user.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/expand-scalar-carry-out-select-user.ll
+++ llvm/test/CodeGen/AMDGPU/expand-scalar-carry-out-select-user.ll
@@ -15,7 +15,7 @@
 ; GFX7-NEXT:    s_cmp_lg_u32 s4, 0
 ; GFX7-NEXT:    s_addc_u32 s4, s6, 0
 ; GFX7-NEXT:    v_mov_b32_e32 v1, s4
-; GFX7-NEXT:    s_cselect_b64 vcc, 1, 0
+; GFX7-NEXT:    s_cselect_b64 vcc, -1, 0
 ; GFX7-NEXT:    s_cmp_gt_u32 s6, 31
 ; GFX7-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GFX7-NEXT:    s_cselect_b64 vcc, -1, 0
@@ -32,7 +32,7 @@
 ; GFX9-NEXT:    s_cmp_lg_u64 s[4:5], 0
 ; GFX9-NEXT:    s_addc_u32 s4, s6, 0
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s4
-; GFX9-NEXT:    s_cselect_b64 vcc, 1, 0
+; GFX9-NEXT:    s_cselect_b64 vcc, -1, 0
 ; GFX9-NEXT:    s_cmp_gt_u32 s6, 31
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GFX9-NEXT:    s_cselect_b64 vcc, -1, 0
@@ -49,7 +49,7 @@
 ; GFX10-NEXT:    v_add_co_u32 v0, s5, s4, s4
 ; GFX10-NEXT:    s_cmpk_lg_u32 s5, 0x0
 ; GFX10-NEXT:    s_addc_u32 s5, s4, 0
-; GFX10-NEXT:    s_cselect_b32 s6, 1, 0
+; GFX10-NEXT:    s_cselect_b32 s6, -1, 0
 ; GFX10-NEXT:    s_cmp_gt_u32 s4, 31
 ; GFX10-NEXT:    v_cndmask_b32_e64 v1, 0, s5, s6
 ; GFX10-NEXT:    s_cselect_b32 vcc_lo, -1, 0
Index: llvm/lib/Target/AMDGPU/SIISelLowering.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -4138,7 +4138,10 @@
     }
 
     const TargetRegisterClass *Src2RC = MRI.getRegClass(Src2.getReg());
-    if (TRI->getRegSizeInBits(*Src2RC) == 64) {
+    unsigned WaveSize = TRI->getRegSizeInBits(*Src2RC);
+    assert(WaveSize == 64 || WaveSize == 32);
+
+    if (WaveSize == 64) {
       if (ST.hasScalarCompareEq64()) {
         BuildMI(*BB, MII, DL, TII->get(AMDGPU::S_CMP_LG_U64))
             .addReg(Src2.getReg())
@@ -4168,8 +4171,13 @@
 
     BuildMI(*BB, MII, DL, TII->get(Opc), Dest.getReg()).add(Src0).add(Src1);
 
-    BuildMI(*BB, MII, DL, TII->get(AMDGPU::COPY), CarryDest.getReg())
-      .addReg(AMDGPU::SCC);
+    unsigned SelOpc =
+        (WaveSize == 64) ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;
+
+    BuildMI(*BB, MII, DL, TII->get(SelOpc), CarryDest.getReg())
+        .addImm(-1)
+        .addImm(0);
+
     MI.eraseFromParent();
     return BB;
   }


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D109889.376090.patch
Type: text/x-patch
Size: 2420 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20210930/a3d6853a/attachment.bin>


More information about the llvm-commits mailing list