[llvm] [Clang][inlineasm] Add special support for "rm" output constraints (PR #92040)

via llvm-commits llvm-commits at lists.llvm.org
Mon May 13 15:16:02 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-x86

Author: Bill Wendling (bwendling)

<details>
<summary>Changes</summary>

Clang isn't able to support multiple constraints on inputs and outputs.
Instead, it picks the "safest" one to use, i.e. the most conseravite. In
the case of "rm" it picks the memory constraint. This leads to obviously
horrible code:

  asm __volatile__ ("pushf\n\t"
                    "popq %0"
                    : "=rm" (x));

is compiled to:

	#APP
        pushf
	popq -8(%rsp)
	#NO_APP
	movq	-8(%rsp), %rax

Blech!

This hack^Wchange, makes a special exception for "rm" to use "r" if at
all possible. The "RegMayBeFolded" flag is then used by the Greedy
register allocator to allow for the old behavior if register pressure
is too great.

There are some major restrictions. First, this works only for x86
architectures. Each architecture have subtly different addressing modes,
making it hard to apply this change to all of them. An architecture
that needs this feature should look into implementing
"TargetInstrInfo::getFrameIndexOperands()."

Second, this feature works only with the Greedy register allocator. The
other register allocators default to the current behavior.

Fixes: https://github.com/llvm/llvm-project/issues/20571

Cc: Nick Desaulniers <ndesaulniers@<!-- -->google.com>
Cc: Kees Cook <keescook@<!-- -->google.com>
Cc: llvm@<!-- -->lists.linux.dev

---

Patch is 24.04 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/92040.diff


8 Files Affected:

- (modified) llvm/include/llvm/CodeGen/TargetLowering.h (+5) 
- (modified) llvm/include/llvm/CodeGen/TargetPassConfig.h (+2) 
- (modified) llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (+15-10) 
- (modified) llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h (+3-2) 
- (modified) llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp (+29-1) 
- (modified) llvm/lib/CodeGen/TargetPassConfig.cpp (+6) 
- (added) llvm/test/CodeGen/X86/asm-constraints-rm.ll (+363) 
- (modified) llvm/test/CodeGen/X86/inlineasm-sched-bug.ll (+1-4) 


``````````diff
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 50a8c7eb75af5..ff321f6aa0f62 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -4939,6 +4939,11 @@ class TargetLowering : public TargetLoweringBase {
     /// Memory, Other, Unknown.
     TargetLowering::ConstraintType ConstraintType = TargetLowering::C_Unknown;
 
+    /// The register may be folded. This is used if the constraint is "rm",
+    /// where we prefer using a register, but can fall back to a memory slot
+    /// under register pressure.
+    bool MayFoldRegister = false;
+
     /// If this is the result output operand or a clobber, this is null,
     /// otherwise it is the incoming operand to the CallInst.  This gets
     /// modified as the asm is processed.
diff --git a/llvm/include/llvm/CodeGen/TargetPassConfig.h b/llvm/include/llvm/CodeGen/TargetPassConfig.h
index d00e0bed91a45..c1f4199536409 100644
--- a/llvm/include/llvm/CodeGen/TargetPassConfig.h
+++ b/llvm/include/llvm/CodeGen/TargetPassConfig.h
@@ -496,6 +496,8 @@ class TargetPassConfig : public ImmutablePass {
 void registerCodeGenCallback(PassInstrumentationCallbacks &PIC,
                              LLVMTargetMachine &);
 
+bool usesGreedyOrDefaultRegisterAllocator();
+
 } // end namespace llvm
 
 #endif // LLVM_CODEGEN_TARGETPASSCONFIG_H
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index ca352da5d36eb..7bc03becf1a5a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -1008,7 +1008,8 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG,
 }
 
 void RegsForValue::AddInlineAsmOperands(InlineAsm::Kind Code, bool HasMatching,
-                                        unsigned MatchingIdx, const SDLoc &dl,
+                                        unsigned MatchingIdx,
+                                        bool MayFoldRegister, const SDLoc &dl,
                                         SelectionDAG &DAG,
                                         std::vector<SDValue> &Ops) const {
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -1024,7 +1025,9 @@ void RegsForValue::AddInlineAsmOperands(InlineAsm::Kind Code, bool HasMatching,
     // from the def.
     const MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
     const TargetRegisterClass *RC = MRI.getRegClass(Regs.front());
+
     Flag.setRegClass(RC->getID());
+    Flag.setRegMayBeFolded(MayFoldRegister);
   }
 
   SDValue Res = DAG.getTargetConstant(Flag, dl, MVT::i32);
@@ -9775,8 +9778,8 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
         AsmNodeOperands.push_back(OpInfo.CallOperand);
       } else {
         // Otherwise, this outputs to a register (directly for C_Register /
-        // C_RegisterClass, and a target-defined fashion for
-        // C_Immediate/C_Other). Find a register that we can use.
+        // C_RegisterClass, and a target-defined fashion for C_Immediate /
+        // C_Other). Find a register that we can use.
         if (OpInfo.AssignedRegs.Regs.empty()) {
           emitInlineAsmError(
               Call, "couldn't allocate output register for constraint '" +
@@ -9792,7 +9795,8 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
         OpInfo.AssignedRegs.AddInlineAsmOperands(
             OpInfo.isEarlyClobber ? InlineAsm::Kind::RegDefEarlyClobber
                                   : InlineAsm::Kind::RegDef,
-            false, 0, getCurSDLoc(), DAG, AsmNodeOperands);
+            false, 0, OpInfo.MayFoldRegister, getCurSDLoc(), DAG,
+            AsmNodeOperands);
       }
       break;
 
@@ -9834,9 +9838,9 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
           SDLoc dl = getCurSDLoc();
           // Use the produced MatchedRegs object to
           MatchedRegs.getCopyToRegs(InOperandVal, DAG, dl, Chain, &Glue, &Call);
-          MatchedRegs.AddInlineAsmOperands(InlineAsm::Kind::RegUse, true,
-                                           OpInfo.getMatchedOperand(), dl, DAG,
-                                           AsmNodeOperands);
+          MatchedRegs.AddInlineAsmOperands(
+              InlineAsm::Kind::RegUse, true, OpInfo.getMatchedOperand(),
+              OpInfo.MayFoldRegister, dl, DAG, AsmNodeOperands);
           break;
         }
 
@@ -9965,7 +9969,8 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
                                         &Call);
 
       OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind::RegUse, false,
-                                               0, dl, DAG, AsmNodeOperands);
+                                               0, OpInfo.MayFoldRegister, dl,
+                                               DAG, AsmNodeOperands);
       break;
     }
     case InlineAsm::isClobber:
@@ -9973,8 +9978,8 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
       // allocator is aware that the physreg got clobbered.
       if (!OpInfo.AssignedRegs.Regs.empty())
         OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind::Clobber,
-                                                 false, 0, getCurSDLoc(), DAG,
-                                                 AsmNodeOperands);
+                                                 false, 0, false, getCurSDLoc(),
+                                                 DAG, AsmNodeOperands);
       break;
     }
   }
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index ae361f8c500a0..daf9cfbbe1279 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -783,8 +783,9 @@ struct RegsForValue {
   /// code marker, matching input operand index (if applicable), and includes
   /// the number of values added into it.
   void AddInlineAsmOperands(InlineAsm::Kind Code, bool HasMatching,
-                            unsigned MatchingIdx, const SDLoc &dl,
-                            SelectionDAG &DAG, std::vector<SDValue> &Ops) const;
+                            unsigned MatchingIdx, bool MayFoldRegister,
+                            const SDLoc &dl, SelectionDAG &DAG,
+                            std::vector<SDValue> &Ops) const;
 
   /// Check if the total RegCount is greater than one.
   bool occupiesMultipleRegs() const {
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 7beaeb9b7a171..cadb609ec72f5 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -21,6 +21,7 @@
 #include "llvm/CodeGen/MachineModuleInfoImpls.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
 #include "llvm/CodeGen/TargetRegisterInfo.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DerivedTypes.h"
@@ -33,6 +34,7 @@
 #include "llvm/Support/KnownBits.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Target/TargetMachine.h"
+#include "llvm/TargetParser/Triple.h"
 #include <cctype>
 using namespace llvm;
 
@@ -5668,6 +5670,7 @@ TargetLowering::ParseConstraints(const DataLayout &DL,
   unsigned ResNo = 0; // ResNo - The result number of the next output.
   unsigned LabelNo = 0; // LabelNo - CallBr indirect dest number.
 
+  const Triple &T = getTargetMachine().getTargetTriple();
   for (InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
     ConstraintOperands.emplace_back(std::move(CI));
     AsmOperandInfo &OpInfo = ConstraintOperands.back();
@@ -5678,6 +5681,16 @@ TargetLowering::ParseConstraints(const DataLayout &DL,
 
     OpInfo.ConstraintVT = MVT::Other;
 
+    // Special treatment for all platforms (currently only x86) that can fold a
+    // register into a spill. This is used for the "rm" constraint, where we
+    // would vastly prefer to use 'r' over 'm', but can't because of LLVM's
+    // architecture picks the most "conservative" constraint to ensure that (in
+    // the case of "rm") register pressure cause bad things to happen.
+    if (T.isX86() && !OpInfo.hasMatchingInput() && OpInfo.Codes.size() == 2 &&
+        llvm::is_contained(OpInfo.Codes, "r") &&
+        llvm::is_contained(OpInfo.Codes, "m"))
+      OpInfo.MayFoldRegister = true;
+
     // Compute the value type for each operand.
     switch (OpInfo.Type) {
     case InlineAsm::isOutput:
@@ -5954,7 +5967,12 @@ TargetLowering::ConstraintWeight
 ///  1) If there is an 'other' constraint, and if the operand is valid for
 ///     that constraint, use it.  This makes us take advantage of 'i'
 ///     constraints when available.
-///  2) Otherwise, pick the most general constraint present.  This prefers
+///  2) Special processing is done for the "rm" constraint. If specified, we
+///     opt for the 'r' constraint, but mark the operand as being "foldable."
+///     In the face of register exhaustion, the register allocator is free to
+///     choose to use a stack slot. This only applies to the greedy and default
+///     register allocators. FIXME: Support other allocators (fast?).
+///  3) Otherwise, pick the most general constraint present.  This prefers
 ///     'm' over 'r', for example.
 ///
 TargetLowering::ConstraintGroup TargetLowering::getConstraintPreferences(
@@ -5962,6 +5980,16 @@ TargetLowering::ConstraintGroup TargetLowering::getConstraintPreferences(
   ConstraintGroup Ret;
 
   Ret.reserve(OpInfo.Codes.size());
+
+  // If we can fold the register (i.e. it has an "rm" constraint), opt for the
+  // 'r' constraint, and allow the register allocator to spill if need be.
+  // Applies only to the greedy and default register allocators.
+  if (OpInfo.MayFoldRegister && usesGreedyOrDefaultRegisterAllocator()) {
+    Ret.emplace_back(ConstraintPair("r", getConstraintType("r")));
+    Ret.emplace_back(ConstraintPair("m", getConstraintType("m")));
+    return Ret;
+  }
+
   for (StringRef Code : OpInfo.Codes) {
     TargetLowering::ConstraintType CType = getConstraintType(Code);
 
diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp
index 8832b51333d91..b768cde55d79f 100644
--- a/llvm/lib/CodeGen/TargetPassConfig.cpp
+++ b/llvm/lib/CodeGen/TargetPassConfig.cpp
@@ -1077,6 +1077,12 @@ static cl::opt<RegisterRegAlloc::FunctionPassCtor, false,
     RegAlloc("regalloc", cl::Hidden, cl::init(&useDefaultRegisterAllocator),
              cl::desc("Register allocator to use"));
 
+bool llvm::usesGreedyOrDefaultRegisterAllocator() {
+  return RegAlloc == (RegisterRegAlloc::
+                          FunctionPassCtor)&createGreedyRegisterAllocator ||
+         RegAlloc == &useDefaultRegisterAllocator;
+}
+
 /// Add the complete set of target-independent postISel code generator passes.
 ///
 /// This can be read as the standard order of major LLVM CodeGen stages. Stages
diff --git a/llvm/test/CodeGen/X86/asm-constraints-rm.ll b/llvm/test/CodeGen/X86/asm-constraints-rm.ll
new file mode 100644
index 0000000000000..f718f6b26abb3
--- /dev/null
+++ b/llvm/test/CodeGen/X86/asm-constraints-rm.ll
@@ -0,0 +1,363 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --filter "^\t#" --version 4
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -O2 -regalloc=greedy < %s | FileCheck --check-prefix=GREEDY-X86_64 %s
+; RUN: llc -mtriple=i386-unknown-linux-gnu -O2 -regalloc=greedy   < %s | FileCheck --check-prefix=GREEDY-I386 %s
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -O2 -regalloc=basic  < %s | FileCheck --check-prefix=BASIC-X86_64 %s
+; RUN: llc -mtriple=i386-unknown-linux-gnu -O2 -regalloc=basic    < %s | FileCheck --check-prefix=BASIC-I386 %s
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -O2 -regalloc=fast   < %s | FileCheck --check-prefix=FAST-X86_64 %s
+; RUN: llc -mtriple=i386-unknown-linux-gnu -O2 -regalloc=fast     < %s | FileCheck --check-prefix=FAST-I386 %s
+
+; The Greedy register allocator should use registers when there isn't register
+; pressure.
+
+define dso_local i32 @test1(ptr nocapture noundef readonly %ptr) local_unnamed_addr #0 {
+; GREEDY-X86_64-LABEL: test1:
+; GREEDY-X86_64:    #APP
+; GREEDY-X86_64:    # 'rm' input no pressure -> %eax %ecx
+; GREEDY-X86_64:    #NO_APP
+;
+; GREEDY-I386-LABEL: test1:
+; GREEDY-I386:    #APP
+; GREEDY-I386:    # 'rm' input no pressure -> %ecx %edx
+; GREEDY-I386:    #NO_APP
+;
+; BASIC-X86_64-LABEL: test1:
+; BASIC-X86_64:    #APP
+; BASIC-X86_64:    # 'rm' input no pressure -> -{{[0-9]+}}(%rsp) -{{[0-9]+}}(%rsp)
+; BASIC-X86_64:    #NO_APP
+;
+; BASIC-I386-LABEL: test1:
+; BASIC-I386:    #APP
+; BASIC-I386:    # 'rm' input no pressure -> {{[0-9]+}}(%esp) (%esp)
+; BASIC-I386:    #NO_APP
+;
+; FAST-X86_64-LABEL: test1:
+; FAST-X86_64:    #APP
+; FAST-X86_64:    # 'rm' input no pressure -> -{{[0-9]+}}(%rsp) -{{[0-9]+}}(%rsp)
+; FAST-X86_64:    #NO_APP
+;
+; FAST-I386-LABEL: test1:
+; FAST-I386:    #APP
+; FAST-I386:    # 'rm' input no pressure -> {{[0-9]+}}(%esp) (%esp)
+; FAST-I386:    #NO_APP
+entry:
+  %b = getelementptr inbounds i8, ptr %ptr, i64 4
+  %0 = load i32, ptr %b, align 4
+  %d = getelementptr inbounds i8, ptr %ptr, i64 12
+  %1 = load i32, ptr %d, align 4
+  tail call void asm sideeffect "# 'rm' input no pressure -> $0 $1", "rm,rm,~{dirflag},~{fpsr},~{flags}"(i32 %0, i32 %1) #1
+  %2 = load i32, ptr %ptr, align 4
+  ret i32 %2
+}
+
+define dso_local i32 @test2(ptr nocapture noundef readonly %ptr) local_unnamed_addr #0 {
+; GREEDY-X86_64-LABEL: test2:
+; GREEDY-X86_64:    #APP # 8-byte Folded Reload
+; GREEDY-X86_64:    # 'rm' input pressure -> -{{[0-9]+}}(%rsp) -{{[0-9]+}}(%rsp)
+; GREEDY-X86_64:    #NO_APP
+;
+; GREEDY-I386-LABEL: test2:
+; GREEDY-I386:    #APP # 8-byte Folded Reload
+; GREEDY-I386:    # 'rm' input pressure -> {{[0-9]+}}(%esp) (%esp)
+; GREEDY-I386:    #NO_APP
+;
+; BASIC-X86_64-LABEL: test2:
+; BASIC-X86_64:    #APP
+; BASIC-X86_64:    # 'rm' input pressure -> -{{[0-9]+}}(%rsp) -{{[0-9]+}}(%rsp)
+; BASIC-X86_64:    #NO_APP
+;
+; BASIC-I386-LABEL: test2:
+; BASIC-I386:    #APP
+; BASIC-I386:    # 'rm' input pressure -> {{[0-9]+}}(%esp) (%esp)
+; BASIC-I386:    #NO_APP
+;
+; FAST-X86_64-LABEL: test2:
+; FAST-X86_64:    #APP
+; FAST-X86_64:    # 'rm' input pressure -> -{{[0-9]+}}(%rsp) -{{[0-9]+}}(%rsp)
+; FAST-X86_64:    #NO_APP
+;
+; FAST-I386-LABEL: test2:
+; FAST-I386:    #APP
+; FAST-I386:    # 'rm' input pressure -> {{[0-9]+}}(%esp) {{[0-9]+}}(%esp)
+; FAST-I386:    #NO_APP
+entry:
+  %b = getelementptr inbounds i8, ptr %ptr, i64 4
+  %0 = load i32, ptr %b, align 4
+  %d = getelementptr inbounds i8, ptr %ptr, i64 12
+  %1 = load i32, ptr %d, align 4
+  tail call void asm sideeffect "# 'rm' input pressure -> $0 $1", "rm,rm,~{ax},~{cx},~{dx},~{si},~{di},~{r8},~{r9},~{r10},~{r11},~{bx},~{bp},~{r14},~{r15},~{r12},~{r13},~{dirflag},~{fpsr},~{flags}"(i32 %0, i32 %1) #1
+  %2 = load i32, ptr %ptr, align 4
+  ret i32 %2
+}
+
+define dso_local i32 @test3(ptr noundef %ptr) local_unnamed_addr #0 {
+; GREEDY-X86_64-LABEL: test3:
+; GREEDY-X86_64:    #APP
+; GREEDY-X86_64:    # 'rm' output no pressure -> %eax %ecx
+; GREEDY-X86_64:    #NO_APP
+;
+; GREEDY-I386-LABEL: test3:
+; GREEDY-I386:    #APP
+; GREEDY-I386:    # 'rm' output no pressure -> %ecx %edx
+; GREEDY-I386:    #NO_APP
+;
+; BASIC-X86_64-LABEL: test3:
+; BASIC-X86_64:    #APP
+; BASIC-X86_64:    # 'rm' output no pressure -> 4(%rdi) 12(%rdi)
+; BASIC-X86_64:    #NO_APP
+;
+; BASIC-I386-LABEL: test3:
+; BASIC-I386:    #APP
+; BASIC-I386:    # 'rm' output no pressure -> 4(%eax) 12(%eax)
+; BASIC-I386:    #NO_APP
+;
+; FAST-X86_64-LABEL: test3:
+; FAST-X86_64:    #APP
+; FAST-X86_64:    # 'rm' output no pressure -> 4(%rdi) 12(%rdi)
+; FAST-X86_64:    #NO_APP
+;
+; FAST-I386-LABEL: test3:
+; FAST-I386:    #APP
+; FAST-I386:    # 'rm' output no pressure -> 4(%eax) 12(%eax)
+; FAST-I386:    #NO_APP
+entry:
+  %b = getelementptr inbounds i8, ptr %ptr, i64 4
+  %d = getelementptr inbounds i8, ptr %ptr, i64 12
+  tail call void asm sideeffect "# 'rm' output no pressure -> $0 $1", "=*rm,=*rm,~{dirflag},~{fpsr},~{flags}"(ptr nonnull elementtype(i32) %b, ptr nonnull elementtype(i32) %d) #1
+  %0 = load i32, ptr %ptr, align 4
+  ret i32 %0
+}
+
+define dso_local i32 @test4(ptr noundef %ptr) local_unnamed_addr #0 {
+; GREEDY-X86_64-LABEL: test4:
+; GREEDY-X86_64:    #APP
+; GREEDY-X86_64:    # tied 'rm' no pressure -> %eax %ecx %eax %ecx
+; GREEDY-X86_64:    #NO_APP
+;
+; GREEDY-I386-LABEL: test4:
+; GREEDY-I386:    #APP
+; GREEDY-I386:    # tied 'rm' no pressure -> %ecx %edx %ecx %edx
+; GREEDY-I386:    #NO_APP
+;
+; BASIC-X86_64-LABEL: test4:
+; BASIC-X86_64:    #APP
+; BASIC-X86_64:    # tied 'rm' no pressure -> %eax %ecx %eax %ecx
+; BASIC-X86_64:    #NO_APP
+;
+; BASIC-I386-LABEL: test4:
+; BASIC-I386:    #APP
+; BASIC-I386:    # tied 'rm' no pressure -> %eax %ecx %eax %ecx
+; BASIC-I386:    #NO_APP
+;
+; FAST-X86_64-LABEL: test4:
+; FAST-X86_64:    #APP
+; FAST-X86_64:    # tied 'rm' no pressure -> %ecx %eax %ecx %eax
+; FAST-X86_64:    #NO_APP
+;
+; FAST-I386-LABEL: test4:
+; FAST-I386:    #APP
+; FAST-I386:    # tied 'rm' no pressure -> %edx %ecx %edx %ecx
+; FAST-I386:    #NO_APP
+entry:
+  %b = getelementptr inbounds i8, ptr %ptr, i64 4
+  %0 = load i32, ptr %b, align 4
+  %d = getelementptr inbounds i8, ptr %ptr, i64 12
+  %1 = load i32, ptr %d, align 4
+  tail call void asm sideeffect "# tied 'rm' no pressure -> $0 $1 $2 $3", "=*rm,=*rm,0,1,~{dirflag},~{fpsr},~{flags}"(ptr nonnull elementtype(i32) %b, ptr nonnull elementtype(i32) %d, i32 %0, i32 %1) #1
+  %2 = load i32, ptr %ptr, align 4
+  ret i32 %2
+}
+
+define dso_local i32 @test5(ptr nocapture noundef readonly %ptr) local_unnamed_addr #0 {
+; GREEDY-X86_64-LABEL: test5:
+; GREEDY-X86_64:    #APP
+; GREEDY-X86_64:    # 'rm' input -> %eax
+; GREEDY-X86_64:    #NO_APP
+;
+; GREEDY-I386-LABEL: test5:
+; GREEDY-I386:    #APP
+; GREEDY-I386:    # 'rm' input -> %ecx
+; GREEDY-I386:    #NO_APP
+;
+; BASIC-X86_64-LABEL: test5:
+; BASIC-X86_64:    #APP
+; BASIC-X86_64:    # 'rm' input -> -{{[0-9]+}}(%rsp)
+; BASIC-X86_64:    #NO_APP
+;
+; BASIC-I386-LABEL: test5:
+; BASIC-I386:    #APP
+; BASIC-I386:    # 'rm' input -> (%esp)
+; BASIC-I386:    #NO_APP
+;
+; FAST-X86_64-LABEL: test5:
+; FAST-X86_64:    #APP
+; FAST-X86_64:    # 'rm' input -> -{{[0-9]+}}(%rsp)
+; FAST-X86_64:    #NO_APP
+;
+; FAST-I386-LABEL: test5:
+; FAST-I386:    #APP
+; FAST-I386:    # 'rm' input -> (%esp)
+; FAST-I386:    #NO_APP
+entry:
+  %b = getelementptr inbounds i8, ptr %ptr, i64 4
+  %0 = load i32, ptr %b, align 4
+  tail call void asm sideeffect "# 'rm' input -> $0", "rm,~{dirflag},~{fpsr},~{flags}"(i32 %0) #1
+  %1 = load i32, ptr %ptr, align 4
+  ret i32 %1
+}
+
+define dso_local i32 @test6(ptr nocapture noundef readonly %ptr) local_unnamed_addr #0 {
+; GREEDY-X86_64-LABEL: test6:
+; GREEDY-X86_64:    #APP
+; GREEDY-X86_64:    # 'rm' and 'r' input -> %eax %ecx
+; GREEDY-X86_64:    #NO_APP
+;
+; GREEDY-I386-LABEL: test6:
+; GREEDY-I386:    #APP
+; GREEDY-I386:    # 'rm' and 'r' input -> %ecx %edx
+; GREEDY-I386:    #NO_APP
+;
+; BASIC-X86_64-LABEL: test6:
+; BASIC-X86_64:    #APP
+; BASIC-X86_64:    # 'rm' and 'r' input -> -{{[0-9]+}}(%rsp) %ecx
+; BASIC-X86_64:    #NO_APP
+;
+; BASIC-I386-LABEL: test6:
+; BASIC-I386:    #APP
+; BASIC-I386:    # 'rm' and 'r' input -> (%esp) %ecx
+; BASIC-I386:    #NO_APP
+;
+; FAST-X86_64-LABEL: test6:
+; FAST-X86_64:    #APP
+; FAST-X86_64:    # 'rm' and 'r' input -> -{{[0-9]+}}(%rsp) %eax
+; FAST-X86_64:    #NO_APP
+;
+; FAST-I386-LABEL: test6:
+; FAST-I386:    #APP
+; FAST-I386:    # 'rm' and 'r' input -> (%esp) %ecx
+; FAST-I386:    #NO_APP
+entry:
+  %b = getelementptr inbounds i8, ptr %ptr, i64 4
+  %0 = load i32, ptr %b, align 4
+  %d = getelementptr inbounds i8, ptr %ptr, i64 12
+  %1 = load i32, ptr %d, align 4
+  tail call void asm sideeffect "# 'rm' and 'r' input -> $0 $1", "rm,r,~{dirflag},~{fpsr},~{flags}"(i32 %0, i32 %1) #1
+  %2 = load i32, ptr %ptr, align 4
+  ret i32 %2
+}
+
+define dso_local i32 @test7(ptr noundef %ptr) local_unnamed_addr #0 {
+; GREEDY-X86_64-LABEL: test7:
+; GREEDY-X86_64:    #APP
+; GREEDY-X86_64:    # 'rm' output -> %eax
+; GREEDY-X86_64:    #NO_APP
+;
+; GREEDY-I386-LABEL: test7:
+; GREEDY-I386:    #APP
+; GREEDY-I386:  ...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/92040


More information about the llvm-commits mailing list