[llvm] aec971a - Revert "[AMDGPU] Wave32 CodeGen for amdgcn.ballot.i64"

Thu Jul 13 06:52:47 PDT 2023

Author: pvanhout
Date: 2023-07-13T15:52:27+02:00
New Revision: aec971adec540411446d8f2893e6882e54001e44

URL: https://github.com/llvm/llvm-project/commit/aec971adec540411446d8f2893e6882e54001e44
DIFF: https://github.com/llvm/llvm-project/commit/aec971adec540411446d8f2893e6882e54001e44.diff

LOG: Revert "[AMDGPU] Wave32 CodeGen for amdgcn.ballot.i64"

This reverts commit cfa2d0a3aa0beb5422107dc9943cb0eae6d93896.

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
    llvm/lib/Target/AMDGPU/VOPCInstructions.td

Removed: 
    llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i64.wave32.ll


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 2aa9d9a7f04eae..c00095eb82638f 100644

--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -1326,45 +1326,27 @@ bool AMDGPUInstructionSelector::selectBallot(MachineInstr &I) const {
   Register DstReg = I.getOperand(0).getReg();
   const unsigned Size = MRI->getType(DstReg).getSizeInBits();
   const bool Is64 = Size == 64;
-  const bool IsWave32 = (STI.getWavefrontSize() == 32);
 
-  // In the common case, the return type matches the wave size.
-  // However we also support emitting i64 ballots in wave32 mode.
-  if (Size != STI.getWavefrontSize() && (!Is64 || !IsWave32))
+  if (Size != STI.getWavefrontSize())
     return false;
 
   std::optional<ValueAndVReg> Arg =
       getIConstantVRegValWithLookThrough(I.getOperand(2).getReg(), *MRI);
 
-  const auto BuildCopy = [&](Register SrcReg) {
-    if (Size == STI.getWavefrontSize()) {
-      BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), DstReg)
-          .addReg(SrcReg)
-          ->dump();
-      return;
-    }
-
-    // If emitting a i64 ballot in wave32, fill the upper bits with zeroes.
-    Register HiReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
-    BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_MOV_B32), HiReg).addImm(0);
-    BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
-        .addReg(SrcReg)
-        .addImm(AMDGPU::sub0)
-        .addReg(HiReg)
-        .addImm(AMDGPU::sub1);
-  };
-
   if (Arg) {
     const int64_t Value = Arg->Value.getSExtValue();
     if (Value == 0) {
       unsigned Opcode = Is64 ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
       BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg).addImm(0);
-    } else if (Value == -1) // all ones
-      BuildCopy(IsWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC);
-    else
+    } else if (Value == -1) { // all ones
+      Register SrcReg = Is64 ? AMDGPU::EXEC : AMDGPU::EXEC_LO;
+      BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), DstReg).addReg(SrcReg);
+    } else
       return false;
-  } else
-    BuildCopy(I.getOperand(2).getReg());
+  } else {
+    Register SrcReg = I.getOperand(2).getReg();
+    BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), DstReg).addReg(SrcReg);
+  }
 
   I.eraseFromParent();
   return true;

diff  --git a/llvm/lib/Target/AMDGPU/VOPCInstructions.td b/llvm/lib/Target/AMDGPU/VOPCInstructions.td
index f1c14460e891fc..9e11775fa9f08e 100644
--- a/llvm/lib/Target/AMDGPU/VOPCInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPCInstructions.td
@@ -992,18 +992,11 @@ multiclass ICMP_Pattern <PatFrags cond, Instruction inst, ValueType vt> {
     (i64 (COPY_TO_REGCLASS (inst $src0, $src1), SReg_64))
   >;
 
-  let WaveSizePredicate = isWave32 in {
-    def : GCNPat <
-      (i32 (AMDGPUsetcc vt:$src0, vt:$src1, cond)),
-      (i32 (COPY_TO_REGCLASS (inst $src0, $src1), SReg_32))
-    >;
-
-    // Support codegen of i64 setcc in wave32 mode.
-    def : GCNPat <
-      (i64 (AMDGPUsetcc vt:$src0, vt:$src1, cond)),
-      (i64 (REG_SEQUENCE SReg_64, (inst $src0, $src1), sub0, (S_MOV_B32 (i32 0)), sub1))
-    >;
-  }
+  let WaveSizePredicate = isWave32 in
+  def : GCNPat <
+    (i32 (AMDGPUsetcc vt:$src0, vt:$src1, cond)),
+    (i32 (COPY_TO_REGCLASS (inst $src0, $src1), SReg_32))
+  >;
 }
 
 defm : ICMP_Pattern <COND_EQ, V_CMP_EQ_U32_e64, i32>;
@@ -1063,22 +1056,13 @@ multiclass FCMP_Pattern <PatFrags cond, Instruction inst, ValueType vt> {
                            DSTCLAMP.NONE), SReg_64))
   >;
 
-  let WaveSizePredicate = isWave32 in {
-    def : GCNPat <
-      (i32 (AMDGPUsetcc (vt (VOP3Mods vt:$src0, i32:$src0_modifiers)),
-                        (vt (VOP3Mods vt:$src1, i32:$src1_modifiers)), cond)),
-      (i32 (COPY_TO_REGCLASS (inst $src0_modifiers, $src0, $src1_modifiers, $src1,
-                              DSTCLAMP.NONE), SReg_32))
-    >;
-
-    def : GCNPat <
-      (i64 (AMDGPUsetcc (vt (VOP3Mods vt:$src0, i32:$src0_modifiers)),
-                        (vt (VOP3Mods vt:$src1, i32:$src1_modifiers)), cond)),
-      (i64 (REG_SEQUENCE SReg_64, (inst $src0_modifiers, $src0, $src1_modifiers, $src1,
-                                   DSTCLAMP.NONE), sub0,
-                                  (S_MOV_B32 (i32 0)), sub1))
-    >;
-  }
+  let WaveSizePredicate = isWave32 in
+  def : GCNPat <
+    (i32 (AMDGPUsetcc (vt (VOP3Mods vt:$src0, i32:$src0_modifiers)),
+                 (vt (VOP3Mods vt:$src1, i32:$src1_modifiers)), cond)),
+    (i32 (COPY_TO_REGCLASS (inst $src0_modifiers, $src0, $src1_modifiers, $src1,
+                           DSTCLAMP.NONE), SReg_32))
+  >;
 }
 
 defm : FCMP_Pattern <COND_OEQ, V_CMP_EQ_F32_e64, f32>;

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i64.wave32.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i64.wave32.ll
deleted file mode 100644
index 109a345d7a2c88..00000000000000
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i64.wave32.ll
+++ /dev/null
@@ -1,106 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -march=amdgcn -global-isel=0 -mcpu=gfx1010 < %s | FileCheck %s --check-prefixes=CHECK,DAGISEL
-; RUN: llc -march=amdgcn -global-isel=0 -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck %s --check-prefixes=CHECK,DAGISEL
-; RUN: llc -march=amdgcn -global-isel -mcpu=gfx1010 < %s | FileCheck %s --check-prefixes=CHECK,GISEL
-; RUN: llc -march=amdgcn -global-isel -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck %s --check-prefixes=CHECK,GISEL
-
-declare i64 @llvm.amdgcn.ballot.i64(i1)
-declare i64 @llvm.ctpop.i64(i64)
-
-; Test ballot(0)
-
-define amdgpu_cs i64 @constant_false() {
-; CHECK-LABEL: constant_false:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    s_mov_b32 s0, 0
-; CHECK-NEXT:    s_mov_b32 s1, 0
-; CHECK-NEXT:    ; return to shader part epilog
-  %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 0)
-  ret i64 %ballot
-}
-
-; Test ballot(1)
-
-define amdgpu_cs i64 @constant_true() {
-; DAGISEL-LABEL: constant_true:
-; DAGISEL:       ; %bb.0:
-; DAGISEL-NEXT:    s_mov_b32 s0, exec_lo
-; DAGISEL-NEXT:    s_mov_b32 s1, exec_hi
-; DAGISEL-NEXT:    ; return to shader part epilog
-;
-; GISEL-LABEL: constant_true:
-; GISEL:       ; %bb.0:
-; GISEL-NEXT:    s_mov_b32 s0, exec_lo
-; GISEL-NEXT:    s_mov_b32 s1, 0
-; GISEL-NEXT:    ; return to shader part epilog
-  %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 1)
-  ret i64 %ballot
-}
-
-; Test ballot of a non-comparison operation
-
-define amdgpu_cs i64 @non_compare(i32 %x) {
-; CHECK-LABEL: non_compare:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    v_and_b32_e32 v0, 1, v0
-; CHECK-NEXT:    s_mov_b32 s1, 0
-; CHECK-NEXT:    v_cmp_ne_u32_e64 s0, 0, v0
-; CHECK-NEXT:    ; return to shader part epilog
-  %trunc = trunc i32 %x to i1
-  %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %trunc)
-  ret i64 %ballot
-}
-
-; Test ballot of comparisons
-
-define amdgpu_cs i64 @compare_ints(i32 %x, i32 %y) {
-; CHECK-LABEL: compare_ints:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    v_cmp_eq_u32_e64 s0, v0, v1
-; CHECK-NEXT:    s_mov_b32 s1, 0
-; CHECK-NEXT:    ; return to shader part epilog
-  %cmp = icmp eq i32 %x, %y
-  %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %cmp)
-  ret i64 %ballot
-}
-
-define amdgpu_cs i64 @compare_int_with_constant(i32 %x) {
-; DAGISEL-LABEL: compare_int_with_constant:
-; DAGISEL:       ; %bb.0:
-; DAGISEL-NEXT:    v_cmp_lt_i32_e64 s0, 0x62, v0
-; DAGISEL-NEXT:    s_mov_b32 s1, 0
-; DAGISEL-NEXT:    ; return to shader part epilog
-;
-; GISEL-LABEL: compare_int_with_constant:
-; GISEL:       ; %bb.0:
-; GISEL-NEXT:    v_cmp_le_i32_e64 s0, 0x63, v0
-; GISEL-NEXT:    s_mov_b32 s1, 0
-; GISEL-NEXT:    ; return to shader part epilog
-  %cmp = icmp sge i32 %x, 99
-  %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %cmp)
-  ret i64 %ballot
-}
-
-define amdgpu_cs i64 @compare_floats(float %x, float %y) {
-; CHECK-LABEL: compare_floats:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    v_cmp_gt_f32_e64 s0, v0, v1
-; CHECK-NEXT:    s_mov_b32 s1, 0
-; CHECK-NEXT:    ; return to shader part epilog
-  %cmp = fcmp ogt float %x, %y
-  %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %cmp)
-  ret i64 %ballot
-}
-
-define amdgpu_cs i64 @ctpop_of_ballot(float %x, float %y) {
-; CHECK-LABEL: ctpop_of_ballot:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    v_cmp_gt_f32_e64 s0, v0, v1
-; CHECK-NEXT:    s_mov_b32 s1, 0
-; CHECK-NEXT:    s_bcnt1_i32_b64 s0, s[0:1]
-; CHECK-NEXT:    ; return to shader part epilog
-  %cmp = fcmp ogt float %x, %y
-  %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %cmp)
-  %bcnt = call i64 @llvm.ctpop.i64(i64 %ballot)
-  ret i64 %bcnt
-}