[PATCH] D80158: [AMDGPU] Fix for the lost CarryOut/CarryIn register operands in S_ADD/SUB_CO_PSEUDO.

Alexander via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Wed May 27 13:04:12 PDT 2020


This revision was automatically updated to reflect the committed changes.
Closed by commit rGeb1092ada32d: [AMDGPU] Fix for the lost CarryOut/CarryIn register operands in… (authored by alex-t).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D80158/new/

https://reviews.llvm.org/D80158

Files:
  llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
  llvm/test/CodeGen/AMDGPU/s_add_co_pseudo_lowering.mir


Index: llvm/test/CodeGen/AMDGPU/s_add_co_pseudo_lowering.mir
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/AMDGPU/s_add_co_pseudo_lowering.mir
@@ -0,0 +1,50 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -verify-machineinstrs -run-pass si-fix-sgpr-copies  %s -o - | FileCheck -check-prefix=GCN %s
+---
+name:            s_add_co_pseudo_test
+tracksRegLiveness: true
+body:             |
+
+  bb.0:
+    liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr0, $sgpr1, $sgpr2
+    ; GCN-LABEL: name: s_add_co_pseudo_test
+    ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr0, $sgpr1, $sgpr2
+    ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+    ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GCN: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr0
+    ; GCN: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr1
+    ; GCN: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr2
+    ; GCN: [[COPY6:%[0-9]+]]:sgpr_32 = COPY [[COPY3]]
+    ; GCN: [[V_MUL_LO_U32_:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32 [[COPY]], [[COPY4]], implicit $exec
+    ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 killed [[V_MUL_LO_U32_]], [[COPY6]], 0, implicit $exec
+    ; GCN: [[S_MUL_HI_U32_:%[0-9]+]]:sreg_32 = S_MUL_HI_U32 [[COPY4]], [[COPY5]]
+    ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -614296167
+    ; GCN: [[V_MUL_LO_U32_1:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32 [[COPY]], [[COPY3]], implicit $exec
+    ; GCN: [[COPY7:%[0-9]+]]:vgpr_32 = COPY killed [[S_MOV_B32_]]
+    ; GCN: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 killed [[V_MUL_LO_U32_1]], [[COPY7]], [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GCN: [[V_MUL_HI_U32_:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32 [[COPY4]], [[V_ADDC_U32_e64_]], implicit $exec
+    ; GCN: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -181084736
+    ; GCN: [[V_MUL_LO_U32_2:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32 [[V_MUL_HI_U32_]], [[S_MOV_B32_1]], implicit $exec
+    ; GCN: [[COPY8:%[0-9]+]]:vgpr_32 = COPY killed [[S_MOV_B32_1]]
+    ; GCN: [[V_ADDC_U32_e64_2:%[0-9]+]]:vgpr_32, [[V_ADDC_U32_e64_3:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY8]], killed [[V_MUL_LO_U32_2]], [[V_ADDC_U32_e64_1]], 0, implicit $exec
+    %0:vgpr_32 = COPY $vgpr0
+    %6:sreg_32 = COPY %0
+    %1:vgpr_32 = COPY $vgpr1
+    %2:vgpr_32 = COPY $vgpr2
+    %3:sreg_32 = COPY $sgpr0
+    %4:sreg_32 = COPY $sgpr1
+    %5:sreg_32 = COPY $sgpr2
+    %20:vgpr_32 = COPY %3
+    %7:sreg_32 = S_MUL_I32 %6, %4
+    %9:vgpr_32, %10:sreg_64_xexec = V_ADD_I32_e64 killed %7, %20, 0, implicit $exec
+    %8:sreg_32 = S_MUL_HI_U32 %4, %5
+    %11:sreg_32 = S_MOV_B32 -614296167
+    %12:sreg_32 = S_MUL_I32 %6, %3
+    %14:sreg_32, %13:sreg_64_xexec = S_ADD_CO_PSEUDO killed %12, killed %11, killed %10, implicit-def dead $scc
+    %15:sreg_32 = S_MUL_HI_U32 %4, %14
+    %16:sreg_32 = S_MOV_B32 -181084736
+    %17:sreg_32 = S_MUL_I32 %15, %16
+    %19:sreg_32, %18:sreg_64_xexec = S_ADD_CO_PSEUDO killed %16, killed %17, killed %13, implicit-def dead $scc
+...
Index: llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -5248,18 +5248,24 @@
                          ? AMDGPU::V_ADDC_U32_e64
                          : AMDGPU::V_SUBB_U32_e64;
       const auto *CarryRC = RI.getRegClass(AMDGPU::SReg_1_XEXECRegClassID);
-      Register DummyCReg = MRI.createVirtualRegister(CarryRC);
-      Register CarryReg = MRI.createVirtualRegister(CarryRC);
+
+      Register CarryInReg = Inst.getOperand(4).getReg();
+      if (!MRI.constrainRegClass(CarryInReg, CarryRC)) {
+        Register NewCarryReg = MRI.createVirtualRegister(CarryRC);
+        BuildMI(*MBB, &Inst, Inst.getDebugLoc(), get(AMDGPU::COPY), NewCarryReg)
+            .addReg(CarryInReg);
+      }
+
+      Register CarryOutReg = Inst.getOperand(1).getReg();
+
       Register DestReg = MRI.createVirtualRegister(RI.getEquivalentVGPRClass(
           MRI.getRegClass(Inst.getOperand(0).getReg())));
-      BuildMI(*MBB, &Inst, Inst.getDebugLoc(), get(AMDGPU::COPY), CarryReg)
-          .addReg(Inst.getOperand(4).getReg());
       MachineInstr *CarryOp =
           BuildMI(*MBB, &Inst, Inst.getDebugLoc(), get(Opc), DestReg)
-              .addReg(DummyCReg, RegState::Define | RegState::Dead)
+              .addReg(CarryOutReg, RegState::Define)
               .add(Inst.getOperand(2))
               .add(Inst.getOperand(3))
-              .addReg(CarryReg, RegState::Kill)
+              .addReg(CarryInReg)
               .addImm(0);
       legalizeOperands(*CarryOp);
       MRI.replaceRegWith(Inst.getOperand(0).getReg(), DestReg);


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D80158.266640.patch
Type: text/x-patch
Size: 4910 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20200527/696ea0b4/attachment.bin>


More information about the llvm-commits mailing list