[llvm] 505906b - [AMDGPU][True16][CodeGen] do not legalize t16 operand during user scan (#145450)

via llvm-commits llvm-commits at lists.llvm.org
Tue Jun 24 20:49:26 PDT 2025


Author: Brox Chen
Date: 2025-06-24T23:49:22-04:00
New Revision: 505906bff6ddf9813666f0404fb604a2b6e02722

URL: https://github.com/llvm/llvm-project/commit/505906bff6ddf9813666f0404fb604a2b6e02722
DIFF: https://github.com/llvm/llvm-project/commit/505906bff6ddf9813666f0404fb604a2b6e02722.diff

LOG: [AMDGPU][True16][CodeGen] do not legalize t16 operand during user scan (#145450)

The legalize t16 operand function could insert a reg_sequence which
modify the user list of the targetted register, and we should not call
it in the middle of an user list iteration

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
    llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16-true16.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 333e91bf37df5..a2be3870bffca 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -8716,9 +8716,8 @@ void SIInstrInfo::splitScalar64BitCountOp(SIInstrWorklist &Worklist,
 void SIInstrInfo::addUsersToMoveToVALUWorklist(
     Register DstReg, MachineRegisterInfo &MRI,
     SIInstrWorklist &Worklist) const {
-  for (MachineRegisterInfo::use_iterator I = MRI.use_begin(DstReg),
-         E = MRI.use_end(); I != E;) {
-    MachineInstr &UseMI = *I->getParent();
+  for (MachineOperand &MO : make_early_inc_range(MRI.use_operands(DstReg))) {
+    MachineInstr &UseMI = *MO.getParent();
 
     unsigned OpNo = 0;
 
@@ -8733,21 +8732,15 @@ void SIInstrInfo::addUsersToMoveToVALUWorklist(
     case AMDGPU::INSERT_SUBREG:
       break;
     default:
-      OpNo = I.getOperandNo();
+      OpNo = MO.getOperandNo();
       break;
     }
 
-    if (!RI.hasVectorRegisters(getOpRegClass(UseMI, OpNo))) {
+    if (!RI.hasVectorRegisters(getOpRegClass(UseMI, OpNo)))
       Worklist.insert(&UseMI);
-
-      do {
-        ++I;
-      } while (I != E && I->getParent() == &UseMI);
-    } else {
+    else
+      // Legalization could change user list.
       legalizeOperandsVALUt16(UseMI, OpNo, MRI);
-
-      ++I;
-    }
   }
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16-true16.mir b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16-true16.mir
index 9b6a2f3a1aa1e..59184499de2f0 100644
--- a/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16-true16.mir
+++ b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16-true16.mir
@@ -108,6 +108,32 @@ body:             |
     %4:sreg_32 = S_FMAC_F16 %3:sreg_32, %3:sreg_32, %2:sreg_32, implicit $mode
 ...
 
+---
+name:            legalize_with_multi_user
+body:             |
+  bb.0:
+    ; GCN-LABEL: name: legalize_with_multi_user
+    ; GCN: [[DEF:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF
+    ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[DEF]], %subreg.lo16, [[DEF1]], %subreg.hi16
+    ; GCN-NEXT: [[V_ADD_F16_t16_e64_:%[0-9]+]]:vgpr_16 = V_ADD_F16_t16_e64 0, [[REG_SEQUENCE]].lo16, 0, 1, 0, 0, 0, implicit $mode, implicit $exec
+    ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768
+    ; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[DEF3:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF
+    ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[V_ADD_F16_t16_e64_]], %subreg.lo16, [[DEF3]], %subreg.hi16
+    ; GCN-NEXT: [[V_PK_FMA_F16_:%[0-9]+]]:vgpr_32 = V_PK_FMA_F16 11, [[S_MOV_B32_]], 0, [[REG_SEQUENCE1]], 8, [[DEF2]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+    ; GCN-NEXT: [[DEF4:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF
+    ; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[V_ADD_F16_t16_e64_]], %subreg.lo16, [[DEF4]], %subreg.hi16
+    ; GCN-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[REG_SEQUENCE2]], [[S_MOV_B32_]], implicit $exec
+    %0:vgpr_16 = IMPLICIT_DEF
+    %1:sreg_32 = COPY %0:vgpr_16
+    %2:sreg_32 = S_ADD_F16 %1:sreg_32, 1, implicit $mode
+    %3:sreg_32 = S_MOV_B32 32768
+    %4:vgpr_32 = IMPLICIT_DEF
+    %5:vgpr_32 = V_PK_FMA_F16 11, %3:sreg_32, 0, %2:sreg_32, 8, %4:vgpr_32, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+    %6:sreg_32 = S_XOR_B32 %2:sreg_32, %3:sreg_32, implicit-def dead $scc
+...
+
 ---
 name:            vgpr16_to_spgr32
 body:             |


        


More information about the llvm-commits mailing list