[llvm] e84182a - [X86][Inline] Skip inline asm in inlining target feature check (#83820)

via llvm-commits llvm-commits at lists.llvm.org
Tue Mar 5 05:21:37 PST 2024


Author: Nikita Popov
Date: 2024-03-05T14:21:33+01:00
New Revision: e84182af919d136d74b75ded4d599b38fb47dfb0

URL: https://github.com/llvm/llvm-project/commit/e84182af919d136d74b75ded4d599b38fb47dfb0
DIFF: https://github.com/llvm/llvm-project/commit/e84182af919d136d74b75ded4d599b38fb47dfb0.diff

LOG: [X86][Inline] Skip inline asm in inlining target feature check (#83820)

When inlining across functions with different target features, we
perform roughly two checks:
 1. The caller features must be a superset of the callee features.
2. Calls in the callee cannot use types where the target features would
change the call ABI (e.g. by changing whether something is passed in a
zmm or two ymm registers). The latter check is very crude right now.

The latter check currently also catches inline asm "calls". I believe
that inline asm should be excluded from this check, as it is independent
from the usual call ABI, and instead governed by the inline asm
constraint string.

Fixes https://github.com/llvm/llvm-project/issues/67054.

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86TargetTransformInfo.cpp
    llvm/test/Transforms/Inline/X86/call-abi-compatibility.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 18bf32fe1acaad..4cca291a245622 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -6087,6 +6087,10 @@ bool X86TTIImpl::areInlineCompatible(const Function *Caller,
 
   for (const Instruction &I : instructions(Callee)) {
     if (const auto *CB = dyn_cast<CallBase>(&I)) {
+      // Having more target features is fine for inline ASM.
+      if (CB->isInlineAsm())
+        continue;
+
       SmallVector<Type *, 8> Types;
       for (Value *Arg : CB->args())
         Types.push_back(Arg->getType());

diff  --git a/llvm/test/Transforms/Inline/X86/call-abi-compatibility.ll b/llvm/test/Transforms/Inline/X86/call-abi-compatibility.ll
index f03270bafea999..6f582cab2f1452 100644
--- a/llvm/test/Transforms/Inline/X86/call-abi-compatibility.ll
+++ b/llvm/test/Transforms/Inline/X86/call-abi-compatibility.ll
@@ -94,27 +94,22 @@ define internal void @caller_not_avx4() {
 
 declare i64 @caller_unknown_simple(i64)
 
-; FIXME: This call should get inlined, because the callee only contains
+; This call should get inlined, because the callee only contains
 ; inline ASM, not real calls.
 define <8 x i64> @caller_inline_asm(ptr %p0, i64 %k, ptr %p1, ptr %p2) #0 {
 ; CHECK-LABEL: define {{[^@]+}}@caller_inline_asm
 ; CHECK-SAME: (ptr [[P0:%.*]], i64 [[K:%.*]], ptr [[P1:%.*]], ptr [[P2:%.*]]) #[[ATTR2:[0-9]+]] {
-; CHECK-NEXT:    [[CALL:%.*]] = call <8 x i64> @callee_inline_asm(ptr [[P0]], i64 [[K]], ptr [[P1]], ptr [[P2]])
-; CHECK-NEXT:    ret <8 x i64> [[CALL]]
+; CHECK-NEXT:    [[SRC_I:%.*]] = load <8 x i64>, ptr [[P0]], align 64
+; CHECK-NEXT:    [[A_I:%.*]] = load <8 x i64>, ptr [[P1]], align 64
+; CHECK-NEXT:    [[B_I:%.*]] = load <8 x i64>, ptr [[P2]], align 64
+; CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i64> asm "vpaddb\09$($3, $2, $0 {$1}", "=v,^Yk,v,v,0,~{dirflag},~{fpsr},~{flags}"(i64 [[K]], <8 x i64> [[A_I]], <8 x i64> [[B_I]], <8 x i64> [[SRC_I]])
+; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
 ;
   %call = call <8 x i64> @callee_inline_asm(ptr %p0, i64 %k, ptr %p1, ptr %p2)
   ret <8 x i64> %call
 }
 
 define internal <8 x i64> @callee_inline_asm(ptr %p0, i64 %k, ptr %p1, ptr %p2) #1 {
-; CHECK-LABEL: define {{[^@]+}}@callee_inline_asm
-; CHECK-SAME: (ptr [[P0:%.*]], i64 [[K:%.*]], ptr [[P1:%.*]], ptr [[P2:%.*]]) #[[ATTR3:[0-9]+]] {
-; CHECK-NEXT:    [[SRC:%.*]] = load <8 x i64>, ptr [[P0]], align 64
-; CHECK-NEXT:    [[A:%.*]] = load <8 x i64>, ptr [[P1]], align 64
-; CHECK-NEXT:    [[B:%.*]] = load <8 x i64>, ptr [[P2]], align 64
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i64> asm "vpaddb\09$($3, $2, $0 {$1}", "=v,^Yk,v,v,0,~{dirflag},~{fpsr},~{flags}"(i64 [[K]], <8 x i64> [[A]], <8 x i64> [[B]], <8 x i64> [[SRC]])
-; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
-;
   %src = load <8 x i64>, ptr %p0, align 64
   %a = load <8 x i64>, ptr %p1, align 64
   %b = load <8 x i64>, ptr %p2, align 64


        


More information about the llvm-commits mailing list