[llvm] 4b980cc - [GlobalISel][InlineAsm] Add support for matching input constraints

Petar Avramovic via llvm-commits llvm-commits at lists.llvm.org
Tue Jun 30 01:50:07 PDT 2020


Author: Petar Avramovic
Date: 2020-06-30T10:49:05+02:00
New Revision: 4b980cc9ca08a0b95b4ac6994770155a48881729

URL: https://github.com/llvm/llvm-project/commit/4b980cc9ca08a0b95b4ac6994770155a48881729
DIFF: https://github.com/llvm/llvm-project/commit/4b980cc9ca08a0b95b4ac6994770155a48881729.diff

LOG: [GlobalISel][InlineAsm] Add support for matching input constraints

Find def operand that corresponds to matching constraint and
tie input to that operand.

Differential Revision: https://reviews.llvm.org/D82651

Added: 
    llvm/test/CodeGen/AMDGPU/GlobalISel/inline-asm.ll

Modified: 
    llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
    llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp b/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
index 3ac52b8e3e73..1950a4e8b763 100644
--- a/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
@@ -232,6 +232,11 @@ static void computeConstraintToUse(const TargetLowering *TLI,
   }
 }
 
+static unsigned getNumOpRegs(const MachineInstr &I, unsigned OpIdx) {
+  unsigned Flag = I.getOperand(OpIdx).getImm();
+  return InlineAsm::getNumOperandRegisters(Flag);
+}
+
 bool InlineAsmLowering::lowerInlineAsm(
     MachineIRBuilder &MIRBuilder, const CallBase &Call,
     std::function<ArrayRef<Register>(const Value &Val)> GetOrCreateVRegs)
@@ -317,6 +322,10 @@ bool InlineAsmLowering::lowerInlineAsm(
                   .addExternalSymbol(IA->getAsmString().c_str())
                   .addImm(ExtraInfo.get());
 
+  // Starting from this operand: flag followed by register(s) will be added as
+  // operands to Inst for each constraint. Used for matching input constraints.
+  unsigned StartIdx = Inst->getNumOperands();
+
   // Collects the output operands for later processing
   GISelAsmOperandInfoVector OutputOperands;
 
@@ -390,8 +399,31 @@ bool InlineAsmLowering::lowerInlineAsm(
       break;
     case InlineAsm::isInput: {
       if (OpInfo.isMatchingInputConstraint()) {
-        LLVM_DEBUG(dbgs() << "Tied input operands not supported yet\n");
-        return false;
+        unsigned DefIdx = OpInfo.getMatchedOperand();
+        // Find operand with register def that corresponds to DefIdx.
+        unsigned InstFlagIdx = StartIdx;
+        for (unsigned i = 0; i < DefIdx; ++i)
+          InstFlagIdx += getNumOpRegs(*Inst, InstFlagIdx) + 1;
+        assert(getNumOpRegs(*Inst, InstFlagIdx) == 1 && "Wrong flag");
+
+        // We want to tie input to register in next operand.
+        unsigned DefRegIdx = InstFlagIdx + 1;
+        Register Def = Inst->getOperand(DefRegIdx).getReg();
+
+        // Copy input to new vreg with same reg class as Def
+        const TargetRegisterClass *RC = MRI->getRegClass(Def);
+        ArrayRef<Register> SrcRegs = GetOrCreateVRegs(*OpInfo.CallOperandVal);
+        assert(SrcRegs.size() == 1 && "Single register is expected here");
+        Register Tmp = MRI->createVirtualRegister(RC);
+        MIRBuilder.buildCopy(Tmp, SrcRegs[0]);
+
+        // Add Flag and input register operand (Tmp) to Inst. Tie Tmp to Def.
+        unsigned UseFlag = InlineAsm::getFlagWord(InlineAsm::Kind_RegUse, 1);
+        unsigned Flag = InlineAsm::getFlagWordForMatchingOp(UseFlag, DefIdx);
+        Inst.addImm(Flag);
+        Inst.addReg(Tmp);
+        Inst->tieOperands(DefRegIdx, Inst->getNumOperands() - 1);
+        break;
       }
 
       if (OpInfo.ConstraintType == TargetLowering::C_Other &&

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inline-asm.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/inline-asm.ll
new file mode 100644
index 000000000000..587f808bc55e
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inline-asm.ll
@@ -0,0 +1,83 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -O0 -global-isel -verify-machineinstrs -o - %s | FileCheck %s
+
+define i32 @test_sgpr_reg_class_constraint() nounwind {
+; CHECK-LABEL: test_sgpr_reg_class_constraint:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s4, 7
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s5, 8
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_add_u32 s4, s4, s5
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    v_mov_b32_e32 v0, s4
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+entry:
+  %asm0 = tail call i32 asm "s_mov_b32 $0, 7", "=s"() nounwind
+  %asm1 = tail call i32 asm "s_mov_b32 $0, 8", "=s"() nounwind
+  %asm2 = tail call i32 asm "s_add_u32 $0, $1, $2", "=s,s,s"(i32 %asm0, i32 %asm1) nounwind
+  ret i32 %asm2
+}
+
+define i32 @test_sgpr_matching_constraint() nounwind {
+; CHECK-LABEL: test_sgpr_matching_constraint:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s4, 7
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s5, 8
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_add_u32 s5, s4, s5
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    v_mov_b32_e32 v0, s5
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+entry:
+  %asm0 = tail call i32 asm "s_mov_b32 $0, 7", "=s"() nounwind
+  %asm1 = tail call i32 asm "s_mov_b32 $0, 8", "=s"() nounwind
+  %asm2 = tail call i32 asm "s_add_u32 $0, $1, $2", "=s,s,0"(i32 %asm0, i32 %asm1) nounwind
+  ret i32 %asm2
+}
+
+define i32 @test_sgpr_to_vgpr_move_reg_class_constraint() nounwind {
+; CHECK-LABEL: test_sgpr_to_vgpr_move_reg_class_constraint:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s4, 7
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    v_mov_b32 v0, s4
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+entry:
+  %asm0 = tail call i32 asm "s_mov_b32 $0, 7", "=s"() nounwind
+  %asm1 = tail call i32 asm "v_mov_b32 $0, $1", "=v,s"(i32 %asm0) nounwind
+  ret i32 %asm1
+}
+
+define i32 @test_sgpr_to_vgpr_move_matching_constraint() nounwind {
+; CHECK-LABEL: test_sgpr_to_vgpr_move_matching_constraint:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    s_mov_b32 s4, 7
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    v_mov_b32_e32 v0, s4
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    v_mov_b32 v0, v0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+entry:
+  %asm0 = tail call i32 asm "s_mov_b32 $0, 7", "=s"() nounwind
+  %asm1 = tail call i32 asm "v_mov_b32 $0, $1", "=v,0"(i32 %asm0) nounwind
+  ret i32 %asm1
+}
+
+!0 = !{i32 70}

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll
index 15320c9155ef..7dc247de3687 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll
@@ -234,4 +234,96 @@ define i32 @test_memory_constraint(i32 addrspace(3)* %a) nounwind {
   ret i32 %1
 }
 
+define i32 @test_vgpr_matching_constraint(i32 %a) nounwind {
+  ; CHECK-LABEL: name: test_vgpr_matching_constraint
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK:   liveins: $vgpr0, $sgpr30_sgpr31
+  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK:   [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+  ; CHECK:   [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
+  ; CHECK:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[AND]](s32)
+  ; CHECK:   INLINEASM &";", 1 /* sideeffect attdialect */, 1835018 /* regdef:VGPR_32 */, def %4, 2147483657 /* reguse tiedto:$0 */, [[COPY2]](tied-def 3)
+  ; CHECK:   [[COPY3:%[0-9]+]]:_(s32) = COPY %4
+  ; CHECK:   $vgpr0 = COPY [[COPY3]](s32)
+  ; CHECK:   [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
+  ; CHECK:   S_SETPC_B64_return [[COPY4]], implicit $vgpr0
+  %and = and i32 %a, 1
+  %asm = call i32 asm sideeffect ";", "=v,0"(i32 %and)
+  ret i32 %asm
+}
+
+define i32 @test_sgpr_matching_constraint() nounwind {
+  ; CHECK-LABEL: name: test_sgpr_matching_constraint
+  ; CHECK: bb.1.entry:
+  ; CHECK:   liveins: $sgpr30_sgpr31
+  ; CHECK:   [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK:   INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %1
+  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY %1
+  ; CHECK:   INLINEASM &"s_mov_b32 $0, 8", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %3
+  ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY %3
+  ; CHECK:   [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY1]](s32)
+  ; CHECK:   [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY2]](s32)
+  ; CHECK:   INLINEASM &"s_add_u32 $0, $1, $2", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %5, 9 /* reguse */, [[COPY3]], 2147483657 /* reguse tiedto:$0 */, [[COPY4]](tied-def 3)
+  ; CHECK:   [[COPY5:%[0-9]+]]:_(s32) = COPY %5
+  ; CHECK:   $vgpr0 = COPY [[COPY5]](s32)
+  ; CHECK:   [[COPY6:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
+  ; CHECK:   S_SETPC_B64_return [[COPY6]], implicit $vgpr0
+entry:
+  %asm0 = tail call i32 asm "s_mov_b32 $0, 7", "=s"() nounwind
+  %asm1 = tail call i32 asm "s_mov_b32 $0, 8", "=s"() nounwind
+  %asm2 = tail call i32 asm "s_add_u32 $0, $1, $2", "=s,s,0"(i32 %asm0, i32 %asm1) nounwind
+  ret i32 %asm2
+}
+
+define void @test_many_matching_constraints(i32 %a, i32 %b, i32 %c) nounwind {
+  ; CHECK-LABEL: name: test_many_matching_constraints
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31
+  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+  ; CHECK:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY2]](s32)
+  ; CHECK:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]](s32)
+  ; CHECK:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY1]](s32)
+  ; CHECK:   INLINEASM &"; ", 1 /* sideeffect attdialect */, 1835018 /* regdef:VGPR_32 */, def %4, 1835018 /* regdef:VGPR_32 */, def %5, 1835018 /* regdef:VGPR_32 */, def %6, 2147483657 /* reguse tiedto:$0 */, [[COPY4]](tied-def 3), 2147614729 /* reguse tiedto:$2 */, [[COPY5]](tied-def 7), 2147549193 /* reguse tiedto:$1 */, [[COPY6]](tied-def 5)
+  ; CHECK:   [[COPY7:%[0-9]+]]:_(s32) = COPY %4
+  ; CHECK:   [[COPY8:%[0-9]+]]:_(s32) = COPY %5
+  ; CHECK:   [[COPY9:%[0-9]+]]:_(s32) = COPY %6
+  ; CHECK:   G_STORE [[COPY7]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+  ; CHECK:   G_STORE [[COPY8]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+  ; CHECK:   G_STORE [[COPY9]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+  ; CHECK:   [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]]
+  ; CHECK:   S_SETPC_B64_return [[COPY10]]
+  %asm = call {i32, i32, i32} asm sideeffect "; ", "=v,=v,=v,0,2,1"(i32 %c, i32 %a, i32 %b)
+  %asmresult0 = extractvalue  {i32, i32, i32} %asm, 0
+  store i32 %asmresult0, i32 addrspace(1)* undef
+  %asmresult1 = extractvalue  {i32, i32, i32} %asm, 1
+  store i32 %asmresult1, i32 addrspace(1)* undef
+  %asmresult2 = extractvalue  {i32, i32, i32} %asm, 2
+  store i32 %asmresult2, i32 addrspace(1)* undef
+  ret void
+}
+
+define i32 @test_sgpr_to_vgpr_move_matching_constraint() nounwind {
+  ; CHECK-LABEL: name: test_sgpr_to_vgpr_move_matching_constraint
+  ; CHECK: bb.1.entry:
+  ; CHECK:   liveins: $sgpr30_sgpr31
+  ; CHECK:   [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK:   INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %1
+  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY %1
+  ; CHECK:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY1]](s32)
+  ; CHECK:   INLINEASM &"v_mov_b32 $0, $1", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %3, 2147483657 /* reguse tiedto:$0 */, [[COPY2]](tied-def 3)
+  ; CHECK:   [[COPY3:%[0-9]+]]:_(s32) = COPY %3
+  ; CHECK:   $vgpr0 = COPY [[COPY3]](s32)
+  ; CHECK:   [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
+  ; CHECK:   S_SETPC_B64_return [[COPY4]], implicit $vgpr0
+entry:
+  %asm0 = tail call i32 asm "s_mov_b32 $0, 7", "=s"() nounwind
+  %asm1 = tail call i32 asm "v_mov_b32 $0, $1", "=v,0"(i32 %asm0) nounwind
+  ret i32 %asm1
+}
+
 !0 = !{i32 70}


        


More information about the llvm-commits mailing list