[llvm] 40202b1 - [AMDGPU] Legalize operands of V_ADDC_U32_e32 and friends

Tue Aug 3 01:05:00 PDT 2021

Author: Jay Foad
Date: 2021-08-03T09:04:52+01:00
New Revision: 40202b13b23290a6e20900896838c2dbbfb281bd

URL: https://github.com/llvm/llvm-project/commit/40202b13b23290a6e20900896838c2dbbfb281bd
DIFF: https://github.com/llvm/llvm-project/commit/40202b13b23290a6e20900896838c2dbbfb281bd.diff

LOG: [AMDGPU] Legalize operands of V_ADDC_U32_e32 and friends

These instructions have an implicit use of vcc which counts towards the
constant bus limit. Pre gfx10 this means that the explicit operands
cannot be sgprs. Use the custom inserter hook to call legalizeOperands
to enforce that restriction.

Fixes https://bugs.llvm.org/show_bug.cgi?id=51217

Differential Revision: https://reviews.llvm.org/D106868

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/SIISelLowering.cpp
    llvm/lib/Target/AMDGPU/VOP2Instructions.td
    llvm/test/CodeGen/AMDGPU/uaddo.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 3479f56e71b5..c3d9ea4381c2 100644

--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -4261,6 +4261,13 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
     MI.eraseFromParent();
     return BB;
   }
+  case AMDGPU::V_ADDC_U32_e32:
+  case AMDGPU::V_SUBB_U32_e32:
+  case AMDGPU::V_SUBBREV_U32_e32:
+    // These instructions have an implicit use of vcc which counts towards the
+    // constant bus limit.
+    TII->legalizeOperands(MI);
+    return BB;
   case AMDGPU::DS_GWS_INIT:
   case AMDGPU::DS_GWS_SEMA_BR:
   case AMDGPU::DS_GWS_BARRIER:

diff  --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index 7860b7e7f8a6..813e362b723b 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -188,7 +188,7 @@ multiclass VOP2bInst <string opName,
       let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]), Defs = [VCC] in {
         def _e32 : VOP2_Pseudo <opName, P, VOPPatOrNull<node,P>.ret>,
                    Commutable_REV<revOp#"_e32", !eq(revOp, opName)> {
-          let usesCustomInserter = !eq(P.NumSrcArgs, 2);
+          let usesCustomInserter = true;
         }
 
         foreach _ = BoolToList<P.HasExtSDWA>.ret in

diff  --git a/llvm/test/CodeGen/AMDGPU/uaddo.ll b/llvm/test/CodeGen/AMDGPU/uaddo.ll
index d867ceb3008a..1c76a1f0340b 100644
--- a/llvm/test/CodeGen/AMDGPU/uaddo.ll
+++ b/llvm/test/CodeGen/AMDGPU/uaddo.ll
@@ -220,10 +220,24 @@ exit:
   ret void
 }
 
+; FUNC-LABEL: {{^}}sv_uaddo_i128:
+; GCN: v_add
+; GCN: v_addc
+; GCN: v_addc
+; GCN: v_addc
+define amdgpu_cs void @sv_uaddo_i128(i32 addrspace(1)* %out, i128 inreg %a, i128 %b) {
+  %uadd = call { i128, i1 } @llvm.uadd.with.overflow.i128(i128 %a, i128 %b)
+  %carry = extractvalue { i128, i1 } %uadd, 1
+  %carry.ext = zext i1 %carry to i32
+  store i32 %carry.ext, i32 addrspace(1)* %out
+  ret void
+}
+
 declare i32 @llvm.amdgcn.workitem.id.x() #1
 declare { i16, i1 } @llvm.uadd.with.overflow.i16(i16, i16) #1
 declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) #1
 declare { i64, i1 } @llvm.uadd.with.overflow.i64(i64, i64) #1
+declare { i128, i1 } @llvm.uadd.with.overflow.i128(i128, i128) #1
 declare { <2 x i32>, <2 x i1> } @llvm.uadd.with.overflow.v2i32(<2 x i32>, <2 x i32>) nounwind readnone