[clang] [llvm] [X86] Support EGPR for inline assembly. (PR #92338)

Wed May 29 20:16:01 PDT 2024

https://github.com/FreddyLeaf updated https://github.com/llvm/llvm-project/pull/92338

>From 41fbc18c7a4a26b11bc4b772bbe2e384ad9d9dbc Mon Sep 17 00:00:00 2001
From: Freddy Ye <freddy.ye at intel.com>
Date: Fri, 10 May 2024 16:29:55 +0800
Subject: [PATCH 01/11] [X86] Support EGPR for inline assembly.

"jR": explictly enables EGPR
"r": enables/disables EGPR w/wo -mapx-inline-asm-use-gpr32
-mapx-inline-asm-use-gpr32 will also define a new Macro:
__APX_INLINE_ASM_USE_GPR32__
---
 clang/include/clang/Driver/Options.td         |  2 +
 clang/lib/Basic/Targets/X86.cpp               | 26 +++++++++
 clang/lib/Basic/Targets/X86.h                 |  1 +
 clang/lib/Driver/ToolChains/Arch/X86.cpp      |  2 +
 .../Driver/x86-apx-inline-asm-use-gpr32.cpp   |  3 +
 clang/test/Preprocessor/x86_target_features.c |  3 +
 llvm/lib/Target/X86/X86.td                    |  3 +
 llvm/lib/Target/X86/X86ISelLowering.cpp       | 57 +++++++++++++++++--
 .../CodeGen/X86/inline-asm-jR-constraint.ll   | 19 +++++++
 .../CodeGen/X86/inline-asm-r-constraint.ll    | 16 ++++++
 10 files changed, 127 insertions(+), 5 deletions(-)
 create mode 100644 clang/test/Driver/x86-apx-inline-asm-use-gpr32.cpp
 create mode 100644 llvm/test/CodeGen/X86/inline-asm-jR-constraint.ll
 create mode 100644 llvm/test/CodeGen/X86/inline-asm-r-constraint.ll

diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 73a2518480e9b..20a7c482bbf06 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -6281,6 +6281,8 @@ def mno_apx_features_EQ : CommaJoined<["-"], "mno-apx-features=">, Group<m_x86_F
 // we will add it to -mapxf.
 def mapxf : Flag<["-"], "mapxf">, Alias<mapx_features_EQ>, AliasArgs<["egpr","push2pop2","ppx", "ndd"]>;
 def mno_apxf : Flag<["-"], "mno-apxf">, Alias<mno_apx_features_EQ>, AliasArgs<["egpr","push2pop2","ppx","ndd"]>;
+def mapx_inline_asm_use_gpr32 : Flag<["-"], "mapx-inline-asm-use-gpr32">, Group<m_Group>,
+                                HelpText<"Enable use of GPR32 in inline assembly for APX">;
 } // let Flags = [TargetSpecific]
 
 // VE feature flags
diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp
index 67e2126cf766b..9e61b6e6d6441 100644
--- a/clang/lib/Basic/Targets/X86.cpp
+++ b/clang/lib/Basic/Targets/X86.cpp
@@ -450,6 +450,8 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
       HasFullBFloat16 = true;
     } else if (Feature == "+egpr") {
       HasEGPR = true;
+    } else if (Feature == "+inline-asm-use-gpr32") {
+      HasInlineAsmUseGPR32 = true;
     } else if (Feature == "+push2pop2") {
       HasPush2Pop2 = true;
     } else if (Feature == "+ppx") {
@@ -974,6 +976,8 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
   // Condition here is aligned with the feature set of mapxf in Options.td
   if (HasEGPR && HasPush2Pop2 && HasPPX && HasNDD)
     Builder.defineMacro("__APX_F__");
+  if (HasInlineAsmUseGPR32)
+    Builder.defineMacro("__APX_INLINE_ASM_USE_GPR32__");
 
   // Each case falls through to the previous one here.
   switch (SSELevel) {
@@ -1493,6 +1497,15 @@ bool X86TargetInfo::validateAsmConstraint(
   case 'C': // SSE floating point constant.
   case 'G': // x87 floating point constant.
     return true;
+  case 'j':
+    Name++;
+    switch (*Name) {
+    default:
+      return false;
+    case 'R':
+      Info.setAllowsRegister();
+      return true;
+    }
   case '@':
     // CC condition changes.
     if (auto Len = matchAsmCCConstraint(Name)) {
@@ -1764,6 +1777,19 @@ std::string X86TargetInfo::convertConstraint(const char *&Constraint) const {
       // to the next constraint.
       return std::string("^") + std::string(Constraint++, 2);
     }
+  case 'j':
+    switch (Constraint[1]) {
+    default:
+      // Break from inner switch and fall through (copy single char),
+      // continue parsing after copying the current constraint into
+      // the return string.
+      break;
+    case 'R':
+      // "^" hints llvm that this is a 2 letter constraint.
+      // "Constraint++" is used to promote the string iterator
+      // to the next constraint.
+      return std::string("^") + std::string(Constraint++, 2);
+    }
     [[fallthrough]];
   default:
     return std::string(1, *Constraint);
diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h
index c14e4d5f433d8..69c68ee80f3ba 100644
--- a/clang/lib/Basic/Targets/X86.h
+++ b/clang/lib/Basic/Targets/X86.h
@@ -174,6 +174,7 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo {
   bool HasNDD = false;
   bool HasCCMP = false;
   bool HasCF = false;
+  bool HasInlineAsmUseGPR32 = false;
 
 protected:
   llvm::X86::CPUKind CPU = llvm::X86::CK_None;
diff --git a/clang/lib/Driver/ToolChains/Arch/X86.cpp b/clang/lib/Driver/ToolChains/Arch/X86.cpp
index 53e26a9f8e229..085ff4824a9b0 100644
--- a/clang/lib/Driver/ToolChains/Arch/X86.cpp
+++ b/clang/lib/Driver/ToolChains/Arch/X86.cpp
@@ -309,4 +309,6 @@ void x86::getX86TargetFeatures(const Driver &D, const llvm::Triple &Triple,
     Features.push_back("+prefer-no-gather");
   if (Args.hasArg(options::OPT_mno_scatter))
     Features.push_back("+prefer-no-scatter");
+  if (Args.hasArg(options::OPT_mapx_inline_asm_use_gpr32))
+    Features.push_back("+inline-asm-use-gpr32");
 }
diff --git a/clang/test/Driver/x86-apx-inline-asm-use-gpr32.cpp b/clang/test/Driver/x86-apx-inline-asm-use-gpr32.cpp
new file mode 100644
index 0000000000000..a45140d96e66c
--- /dev/null
+++ b/clang/test/Driver/x86-apx-inline-asm-use-gpr32.cpp
@@ -0,0 +1,3 @@
+/// Tests -mapx-inline-asm-use-gpr32
+// RUN: %clang -target x86_64-unknown-linux-gnu -c -mapx-inline-asm-use-gpr32 -### %s 2>&1 | FileCheck --check-prefix=GPR32 %s
+// GPR32: "-target-feature" "+inline-asm-use-gpr32"
diff --git a/clang/test/Preprocessor/x86_target_features.c b/clang/test/Preprocessor/x86_target_features.c
index 5602c59158fe5..7fe19598fbd4e 100644
--- a/clang/test/Preprocessor/x86_target_features.c
+++ b/clang/test/Preprocessor/x86_target_features.c
@@ -811,3 +811,6 @@
 // NDD: #define __NDD__ 1
 // PPX: #define __PPX__ 1
 // PUSH2POP2: #define __PUSH2POP2__ 1
+
+// RUN: %clang -target x86_64-unknown-unknown -march=x86-64 -mapx-inline-asm-use-gpr32 -x c -E -dM -o - %s | FileCheck --check-prefixes=USEGPR32 %s
+// USEGPR32: #define __APX_INLINE_ASM_USE_GPR32__ 1
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index 9f5b58d78fcce..90a0c878a1101 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -353,6 +353,9 @@ def FeatureCCMP : SubtargetFeature<"ccmp", "HasCCMP", "true",
                                    "Support conditional cmp & test instructions">;
 def FeatureCF : SubtargetFeature<"cf", "HasCF", "true",
                                  "Support conditional faulting">;
+def FeatureUseGPR32InInlineAsm
+    : SubtargetFeature<"inline-asm-use-gpr32", "UseInlineAsmGPR32", "false",
+                       "Enable use of GPR32 in inline assembly for APX">;
 
 // Ivy Bridge and newer processors have enhanced REP MOVSB and STOSB (aka
 // "string operations"). See "REP String Enhancement" in the Intel Software
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 0410cc33ca337..2f381a3fc117c 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -57574,6 +57574,13 @@ X86TargetLowering::getConstraintType(StringRef Constraint) const {
       case '2':
         return C_RegisterClass;
       }
+    case 'j':
+      switch (Constraint[1]) {
+      default:
+        break;
+      case 'R':
+        return C_RegisterClass;
+      }
     }
   } else if (parseConstraintCode(Constraint) != X86::COND_INVALID)
     return C_Other;
@@ -57653,6 +57660,18 @@ X86TargetLowering::getSingleConstraintMatchWeight(
       break;
     }
     break;
+  case 'j':
+    if (StringRef(Constraint).size() != 2)
+      break;
+    switch (Constraint[1]) {
+    default:
+      return CW_Invalid;
+    case 'R':
+      if (CallOperandVal->getType()->isIntegerTy())
+        Wt = CW_SpecificReg;
+      break;
+    }
+    break;
   case 'v':
     if ((Ty->getPrimitiveSizeInBits() == 512) && Subtarget.hasAVX512())
       Wt = CW_Register;
@@ -58015,16 +58034,28 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
         return std::make_pair(0U, &X86::GR64_ABCDRegClass);
       break;
     case 'r':   // GENERAL_REGS
-    case 'l':   // INDEX_REGS
+    case 'l':   // INDEX_REGS{}
+      if (Subtarget.useInlineAsmGPR32()) {
+        if (VT == MVT::i8 || VT == MVT::i1)
+          return std::make_pair(0U, &X86::GR8_NOREX2RegClass);
+        if (VT == MVT::i16)
+          return std::make_pair(0U, &X86::GR16_NOREX2RegClass);
+        if (VT == MVT::i32 || VT == MVT::f32 ||
+            (!VT.isVector() && !Subtarget.is64Bit()))
+          return std::make_pair(0U, &X86::GR32_NOREX2RegClass);
+        if (VT != MVT::f80 && !VT.isVector())
+          return std::make_pair(0U, &X86::GR64_NOREX2RegClass);
+        break;
+      }
       if (VT == MVT::i8 || VT == MVT::i1)
-        return std::make_pair(0U, &X86::GR8_NOREX2RegClass);
+        return std::make_pair(0U, &X86::GR8RegClass);
       if (VT == MVT::i16)
-        return std::make_pair(0U, &X86::GR16_NOREX2RegClass);
+        return std::make_pair(0U, &X86::GR16RegClass);
       if (VT == MVT::i32 || VT == MVT::f32 ||
           (!VT.isVector() && !Subtarget.is64Bit()))
-        return std::make_pair(0U, &X86::GR32_NOREX2RegClass);
+        return std::make_pair(0U, &X86::GR32RegClass);
       if (VT != MVT::f80 && !VT.isVector())
-        return std::make_pair(0U, &X86::GR64_NOREX2RegClass);
+        return std::make_pair(0U, &X86::GR64RegClass);
       break;
     case 'R':   // LEGACY_REGS
       if (VT == MVT::i8 || VT == MVT::i1)
@@ -58248,6 +58279,22 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
       }
       break;
     }
+  } else if (Constraint.size() == 2 && Constraint[0] == 'j') {
+    switch (Constraint[1]) {
+      default:
+        break;
+      case 'R':
+        if (VT == MVT::i8 || VT == MVT::i1)
+          return std::make_pair(0U, &X86::GR8RegClass);
+        if (VT == MVT::i16)
+          return std::make_pair(0U, &X86::GR16RegClass);
+        if (VT == MVT::i32 || VT == MVT::f32 ||
+            (!VT.isVector() && !Subtarget.is64Bit()))
+          return std::make_pair(0U, &X86::GR32RegClass);
+        if (VT != MVT::f80 && !VT.isVector())
+          return std::make_pair(0U, &X86::GR64RegClass);
+        break;
+    }
   }
 
   if (parseConstraintCode(Constraint) != X86::COND_INVALID)
diff --git a/llvm/test/CodeGen/X86/inline-asm-jR-constraint.ll b/llvm/test/CodeGen/X86/inline-asm-jR-constraint.ll
new file mode 100644
index 0000000000000..2348e75433798
--- /dev/null
+++ b/llvm/test/CodeGen/X86/inline-asm-jR-constraint.ll
@@ -0,0 +1,19 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: not llc -mtriple=x86_64 %s 2>&1 | FileCheck %s --check-prefix=ERR
+; RUN: llc -mtriple=x86_64 -mattr=+egpr  < %s | FileCheck %s --check-prefix=EGPR
+; RUN: llc -mtriple=x86_64 -mattr=+egpr,+inline-asm-use-gpr32  < %s | FileCheck %s --check-prefix=EGPRUSEGPR32
+
+; ERR: error: inline assembly requires more registers than available
+
+define void @constraint_jR_test() nounwind "frame-pointer"="all" {
+; EGPR-LABEL: constraint_jR_test:
+; EGPR:    addq %r16, %rax
+;
+; EGPRUSEGPR32-LABEL: constraint_jR_test:
+; EGPRUSEGPR32:    addq %r16, %rax
+entry:
+  %reg = alloca i64, align 8
+  %0 = load i64, ptr %reg, align 8
+  call void asm sideeffect "add $0, %rax", "^jR,~{rax},~{rbx},~{rcx},~{rdx},~{rdi},~{rsi},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15},~{dirflag},~{fpsr},~{flags}"(i64 %0)
+  ret void
+}
diff --git a/llvm/test/CodeGen/X86/inline-asm-r-constraint.ll b/llvm/test/CodeGen/X86/inline-asm-r-constraint.ll
new file mode 100644
index 0000000000000..158dcdf9be272
--- /dev/null
+++ b/llvm/test/CodeGen/X86/inline-asm-r-constraint.ll
@@ -0,0 +1,16 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: not llc -mtriple=x86_64 < %s 2>&1 | FileCheck %s --check-prefix=ERR
+; RUN: not llc -mtriple=x86_64 -mattr=+egpr < %s 2>&1 | FileCheck %s --check-prefix=ERR
+; RUN: llc -mtriple=x86_64 -mattr=+egpr,+inline-asm-use-gpr32 < %s | FileCheck %s --check-prefix=USEGPR32
+
+; ERR: error: inline assembly requires more registers than available
+
+define void @constraint_r_test() nounwind "frame-pointer"="all" {
+; USEGPR32-LABEL: constraint_r_test:
+; USEGPR32:    addq %r16, %rax
+entry:
+  %reg = alloca i64, align 8
+  %0 = load i64, ptr %reg, align 8
+  call void asm sideeffect "add $0, %rax", "r,~{rax},~{rbx},~{rcx},~{rdx},~{rdi},~{rsi},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15},~{dirflag},~{fpsr},~{flags}"(i64 %0)
+  ret void
+}

>From c1daf204376b734bb2b527d646769d807f6b668c Mon Sep 17 00:00:00 2001
From: Freddy Ye <freddy.ye at intel.com>
Date: Thu, 16 May 2024 11:15:03 +0800
Subject: [PATCH 02/11] refine

---
 llvm/lib/Target/X86/X86ISelLowering.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 2f381a3fc117c..5c7fd89299e9d 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -58034,7 +58034,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
         return std::make_pair(0U, &X86::GR64_ABCDRegClass);
       break;
     case 'r':   // GENERAL_REGS
-    case 'l':   // INDEX_REGS{}
+    case 'l':   // INDEX_REGS
       if (Subtarget.useInlineAsmGPR32()) {
         if (VT == MVT::i8 || VT == MVT::i1)
           return std::make_pair(0U, &X86::GR8_NOREX2RegClass);

>From 9f43b45b2f5985e7d6de5b2130710427dbf50ab9 Mon Sep 17 00:00:00 2001
From: Freddy Ye <freddy.ye at intel.com>
Date: Thu, 16 May 2024 14:52:15 +0800
Subject: [PATCH 03/11] clang-format

---
 llvm/lib/Target/X86/X86ISelLowering.cpp | 26 ++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 5c7fd89299e9d..36466876163c3 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -58281,19 +58281,19 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
     }
   } else if (Constraint.size() == 2 && Constraint[0] == 'j') {
     switch (Constraint[1]) {
-      default:
-        break;
-      case 'R':
-        if (VT == MVT::i8 || VT == MVT::i1)
-          return std::make_pair(0U, &X86::GR8RegClass);
-        if (VT == MVT::i16)
-          return std::make_pair(0U, &X86::GR16RegClass);
-        if (VT == MVT::i32 || VT == MVT::f32 ||
-            (!VT.isVector() && !Subtarget.is64Bit()))
-          return std::make_pair(0U, &X86::GR32RegClass);
-        if (VT != MVT::f80 && !VT.isVector())
-          return std::make_pair(0U, &X86::GR64RegClass);
-        break;
+    default:
+      break;
+    case 'R':
+      if (VT == MVT::i8 || VT == MVT::i1)
+        return std::make_pair(0U, &X86::GR8RegClass);
+      if (VT == MVT::i16)
+        return std::make_pair(0U, &X86::GR16RegClass);
+      if (VT == MVT::i32 || VT == MVT::f32 ||
+          (!VT.isVector() && !Subtarget.is64Bit()))
+        return std::make_pair(0U, &X86::GR32RegClass);
+      if (VT != MVT::f80 && !VT.isVector())
+        return std::make_pair(0U, &X86::GR64RegClass);
+      break;
     }
   }
 

>From 1fdd2f23a05f679d8777176970217a68543a4a62 Mon Sep 17 00:00:00 2001
From: Freddy Ye <freddy.ye at intel.com>
Date: Fri, 17 May 2024 09:26:37 +0800
Subject: [PATCH 04/11] add egpr restric to the new option.

---
 clang/lib/Basic/Targets/X86.cpp               | 2 +-
 clang/test/Preprocessor/x86_target_features.c | 4 +++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp
index 9e61b6e6d6441..0114974637f7c 100644
--- a/clang/lib/Basic/Targets/X86.cpp
+++ b/clang/lib/Basic/Targets/X86.cpp
@@ -976,7 +976,7 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
   // Condition here is aligned with the feature set of mapxf in Options.td
   if (HasEGPR && HasPush2Pop2 && HasPPX && HasNDD)
     Builder.defineMacro("__APX_F__");
-  if (HasInlineAsmUseGPR32)
+  if (HasEGPR && HasInlineAsmUseGPR32)
     Builder.defineMacro("__APX_INLINE_ASM_USE_GPR32__");
 
   // Each case falls through to the previous one here.
diff --git a/clang/test/Preprocessor/x86_target_features.c b/clang/test/Preprocessor/x86_target_features.c
index 7fe19598fbd4e..d2603fde01786 100644
--- a/clang/test/Preprocessor/x86_target_features.c
+++ b/clang/test/Preprocessor/x86_target_features.c
@@ -812,5 +812,7 @@
 // PPX: #define __PPX__ 1
 // PUSH2POP2: #define __PUSH2POP2__ 1
 
-// RUN: %clang -target x86_64-unknown-unknown -march=x86-64 -mapx-inline-asm-use-gpr32 -x c -E -dM -o - %s | FileCheck --check-prefixes=USEGPR32 %s
+// RUN: %clang -target x86_64-unknown-unknown -march=x86-64 -mapx-inline-asm-use-gpr32 -x c -E -dM -o - %s | FileCheck --check-prefixes=NOUSEGPR32 %s
+// RUN: %clang -target x86_64-unknown-unknown -march=x86-64 -mapx-features=egpr -mapx-inline-asm-use-gpr32 -x c -E -dM -o - %s | FileCheck --check-prefixes=USEGPR32 %s
+// NOUSEGPR32-NOT: #define __APX_INLINE_ASM_USE_GPR32__ 1
 // USEGPR32: #define __APX_INLINE_ASM_USE_GPR32__ 1

>From 4d1ad3090416cda320c88f1ddc0937b5749e64b4 Mon Sep 17 00:00:00 2001
From: Freddy Ye <freddy.ye at intel.com>
Date: Tue, 21 May 2024 10:21:08 +0800
Subject: [PATCH 05/11] address comments.

---
 .../X86/{ => apx}/inline-asm-jR-constraint.ll       |  0
 .../X86/{ => apx}/inline-asm-r-constraint.ll        | 13 +++++++++++++
 2 files changed, 13 insertions(+)
 rename llvm/test/CodeGen/X86/{ => apx}/inline-asm-jR-constraint.ll (100%)
 rename llvm/test/CodeGen/X86/{ => apx}/inline-asm-r-constraint.ll (63%)

diff --git a/llvm/test/CodeGen/X86/inline-asm-jR-constraint.ll b/llvm/test/CodeGen/X86/apx/inline-asm-jR-constraint.ll
similarity index 100%
rename from llvm/test/CodeGen/X86/inline-asm-jR-constraint.ll
rename to llvm/test/CodeGen/X86/apx/inline-asm-jR-constraint.ll
diff --git a/llvm/test/CodeGen/X86/inline-asm-r-constraint.ll b/llvm/test/CodeGen/X86/apx/inline-asm-r-constraint.ll
similarity index 63%
rename from llvm/test/CodeGen/X86/inline-asm-r-constraint.ll
rename to llvm/test/CodeGen/X86/apx/inline-asm-r-constraint.ll
index 158dcdf9be272..955372dbc5f20 100644
--- a/llvm/test/CodeGen/X86/inline-asm-r-constraint.ll
+++ b/llvm/test/CodeGen/X86/apx/inline-asm-r-constraint.ll
@@ -14,3 +14,16 @@ entry:
   call void asm sideeffect "add $0, %rax", "r,~{rax},~{rbx},~{rcx},~{rdx},~{rdi},~{rsi},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15},~{dirflag},~{fpsr},~{flags}"(i64 %0)
   ret void
 }
+
+define void @constraint_jR_test() nounwind "frame-pointer"="all" {
+; EGPR-LABEL: constraint_jR_test:
+; EGPR:    addq %r16, %rax
+;
+; EGPRUSEGPR32-LABEL: constraint_jR_test:
+; EGPRUSEGPR32:    addq %r16, %rax
+entry:
+  %reg = alloca i64, align 8
+  %0 = load i64, ptr %reg, align 8
+  call void asm sideeffect "add $0, %rax", "^jR,~{rax},~{rbx},~{rcx},~{rdx},~{rdi},~{rsi},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15},~{dirflag},~{fpsr},~{flags}"(i64 %0)
+  ret void
+}

>From 40877041618aa8f472f0da7cda06c21f4007a1ec Mon Sep 17 00:00:00 2001
From: Freddy Ye <freddy.ye at intel.com>
Date: Tue, 21 May 2024 13:27:12 +0800
Subject: [PATCH 06/11] address comments,

---
 llvm/docs/LangRef.rst | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 6f5a4644ffc2b..d322991eae29a 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -5414,6 +5414,8 @@ X86:
   operand will get allocated only to RAX -- if two 32-bit operands are needed,
   you're better off splitting it yourself, before passing it to the asm
   statement.
+- ``jR``: An 8, 16, 32, or 64-bit integer EGPR when EGPR feature is on.
+  Otherwise, same as ``R``.
 
 XCore:
 

>From 0bb679457692d53c111d9543066f1a2a492db02a Mon Sep 17 00:00:00 2001
From: Freddy Ye <freddy.ye at intel.com>
Date: Wed, 22 May 2024 10:14:54 +0800
Subject: [PATCH 07/11] address comments

---
 .../X86/apx/inline-asm-jR-constraint.ll       | 11 ++++-------
 .../X86/apx/inline-asm-r-constraint.ll        | 19 +++----------------
 2 files changed, 7 insertions(+), 23 deletions(-)

diff --git a/llvm/test/CodeGen/X86/apx/inline-asm-jR-constraint.ll b/llvm/test/CodeGen/X86/apx/inline-asm-jR-constraint.ll
index 2348e75433798..805f1e68726ef 100644
--- a/llvm/test/CodeGen/X86/apx/inline-asm-jR-constraint.ll
+++ b/llvm/test/CodeGen/X86/apx/inline-asm-jR-constraint.ll
@@ -1,16 +1,13 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: not llc -mtriple=x86_64 %s 2>&1 | FileCheck %s --check-prefix=ERR
-; RUN: llc -mtriple=x86_64 -mattr=+egpr  < %s | FileCheck %s --check-prefix=EGPR
-; RUN: llc -mtriple=x86_64 -mattr=+egpr,+inline-asm-use-gpr32  < %s | FileCheck %s --check-prefix=EGPRUSEGPR32
+; RUN: llc -mtriple=x86_64 -mattr=+egpr < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64 -mattr=+egpr,+inline-asm-use-gpr32 < %s | FileCheck %s
 
 ; ERR: error: inline assembly requires more registers than available
 
 define void @constraint_jR_test() nounwind "frame-pointer"="all" {
-; EGPR-LABEL: constraint_jR_test:
-; EGPR:    addq %r16, %rax
-;
-; EGPRUSEGPR32-LABEL: constraint_jR_test:
-; EGPRUSEGPR32:    addq %r16, %rax
+; CHECK-LABEL: constraint_jR_test:
+; CHECK:    addq %r16, %rax
 entry:
   %reg = alloca i64, align 8
   %0 = load i64, ptr %reg, align 8
diff --git a/llvm/test/CodeGen/X86/apx/inline-asm-r-constraint.ll b/llvm/test/CodeGen/X86/apx/inline-asm-r-constraint.ll
index 955372dbc5f20..54b55ef6e156c 100644
--- a/llvm/test/CodeGen/X86/apx/inline-asm-r-constraint.ll
+++ b/llvm/test/CodeGen/X86/apx/inline-asm-r-constraint.ll
@@ -1,29 +1,16 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: not llc -mtriple=x86_64 < %s 2>&1 | FileCheck %s --check-prefix=ERR
 ; RUN: not llc -mtriple=x86_64 -mattr=+egpr < %s 2>&1 | FileCheck %s --check-prefix=ERR
-; RUN: llc -mtriple=x86_64 -mattr=+egpr,+inline-asm-use-gpr32 < %s | FileCheck %s --check-prefix=USEGPR32
+; RUN: llc -mtriple=x86_64 -mattr=+egpr,+inline-asm-use-gpr32 < %s | FileCheck %s
 
 ; ERR: error: inline assembly requires more registers than available
 
 define void @constraint_r_test() nounwind "frame-pointer"="all" {
-; USEGPR32-LABEL: constraint_r_test:
-; USEGPR32:    addq %r16, %rax
+; CHECK-LABEL: constraint_r_test:
+; CHECK:    addq %r16, %rax
 entry:
   %reg = alloca i64, align 8
   %0 = load i64, ptr %reg, align 8
   call void asm sideeffect "add $0, %rax", "r,~{rax},~{rbx},~{rcx},~{rdx},~{rdi},~{rsi},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15},~{dirflag},~{fpsr},~{flags}"(i64 %0)
   ret void
 }
-
-define void @constraint_jR_test() nounwind "frame-pointer"="all" {
-; EGPR-LABEL: constraint_jR_test:
-; EGPR:    addq %r16, %rax
-;
-; EGPRUSEGPR32-LABEL: constraint_jR_test:
-; EGPRUSEGPR32:    addq %r16, %rax
-entry:
-  %reg = alloca i64, align 8
-  %0 = load i64, ptr %reg, align 8
-  call void asm sideeffect "add $0, %rax", "^jR,~{rax},~{rbx},~{rcx},~{rdx},~{rdi},~{rsi},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15},~{dirflag},~{fpsr},~{flags}"(i64 %0)
-  ret void
-}

>From 4a9bf69673a2da02293aa3cf9cab54fbc98a89a2 Mon Sep 17 00:00:00 2001
From: Freddy Ye <freddy.ye at intel.com>
Date: Thu, 23 May 2024 09:26:03 +0800
Subject: [PATCH 08/11] address

---
 llvm/lib/Target/X86/X86ISelLowering.cpp | 29 ++++++++++---------------
 1 file changed, 12 insertions(+), 17 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 36466876163c3..de084f3c3b285 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -58035,28 +58035,23 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
       break;
     case 'r':   // GENERAL_REGS
     case 'l':   // INDEX_REGS
-      if (Subtarget.useInlineAsmGPR32()) {
-        if (VT == MVT::i8 || VT == MVT::i1)
-          return std::make_pair(0U, &X86::GR8_NOREX2RegClass);
-        if (VT == MVT::i16)
-          return std::make_pair(0U, &X86::GR16_NOREX2RegClass);
-        if (VT == MVT::i32 || VT == MVT::f32 ||
-            (!VT.isVector() && !Subtarget.is64Bit()))
-          return std::make_pair(0U, &X86::GR32_NOREX2RegClass);
-        if (VT != MVT::f80 && !VT.isVector())
-          return std::make_pair(0U, &X86::GR64_NOREX2RegClass);
-        break;
-      }
       if (VT == MVT::i8 || VT == MVT::i1)
-        return std::make_pair(0U, &X86::GR8RegClass);
+        return std::make_pair(0U, Subtarget.useInlineAsmGPR32()
+                                      ? &X86::GR8_NOREX2RegClass
+                                      : &X86::GR8RegClass);
       if (VT == MVT::i16)
-        return std::make_pair(0U, &X86::GR16RegClass);
+        return std::make_pair(0U, Subtarget.useInlineAsmGPR32()
+                                      ? &X86::GR16_NOREX2RegClass
+                                      : &X86::GR16RegClass);
       if (VT == MVT::i32 || VT == MVT::f32 ||
           (!VT.isVector() && !Subtarget.is64Bit()))
-        return std::make_pair(0U, &X86::GR32RegClass);
+        return std::make_pair(0U, Subtarget.useInlineAsmGPR32()
+                                      ? &X86::GR32_NOREX2RegClass
+                                      : &X86::GR32RegClass);
       if (VT != MVT::f80 && !VT.isVector())
-        return std::make_pair(0U, &X86::GR64RegClass);
-      break;
+        return std::make_pair(0U, Subtarget.useInlineAsmGPR32()
+                                      ? &X86::GR64_NOREX2RegClass
+                                      : &X86::GR64RegClass);
     case 'R':   // LEGACY_REGS
       if (VT == MVT::i8 || VT == MVT::i1)
         return std::make_pair(0U, &X86::GR8_NOREXRegClass);

>From bf3a53c0b7fc1828572f771e4772d25062110dc0 Mon Sep 17 00:00:00 2001
From: Freddy Ye <freddy.ye at intel.com>
Date: Wed, 29 May 2024 17:19:50 +0800
Subject: [PATCH 09/11] address comments.

---
 clang/lib/Basic/Targets/X86.cpp               |  4 ++
 llvm/docs/LangRef.rst                         | 12 ++--
 llvm/lib/Target/X86/X86.td                    |  2 +-
 llvm/lib/Target/X86/X86ISelLowering.cpp       | 55 +++++++++++++------
 .../X86/apx/inline-asm-jR-constraint.ll       |  4 +-
 .../X86/apx/inline-asm-jr-constraint.ll       | 13 +++++
 .../X86/apx/inline-asm-jr-err-constraint.ll   | 14 +++++
 .../X86/apx/inline-asm-r-constraint.ll        | 16 ------
 .../X86/apx/inline-asm-r-l-q-constraint.ll    | 38 +++++++++++++
 9 files changed, 117 insertions(+), 41 deletions(-)
 create mode 100644 llvm/test/CodeGen/X86/apx/inline-asm-jr-constraint.ll
 create mode 100644 llvm/test/CodeGen/X86/apx/inline-asm-jr-err-constraint.ll
 delete mode 100644 llvm/test/CodeGen/X86/apx/inline-asm-r-constraint.ll
 create mode 100644 llvm/test/CodeGen/X86/apx/inline-asm-r-l-q-constraint.ll

diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp
index cf671a3a9c948..b74e6ac7af56f 100644
--- a/clang/lib/Basic/Targets/X86.cpp
+++ b/clang/lib/Basic/Targets/X86.cpp
@@ -1487,6 +1487,9 @@ bool X86TargetInfo::validateAsmConstraint(
     switch (*Name) {
     default:
       return false;
+    case 'r':
+      Info.setAllowsRegister();
+      return true;
     case 'R':
       Info.setAllowsRegister();
       return true;
@@ -1769,6 +1772,7 @@ std::string X86TargetInfo::convertConstraint(const char *&Constraint) const {
       // continue parsing after copying the current constraint into
       // the return string.
       break;
+    case 'r':
     case 'R':
       // "^" hints llvm that this is a 2 letter constraint.
       // "Constraint++" is used to promote the string iterator
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 1244022e1bba8..dc32b4d7ac7d0 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -5394,10 +5394,12 @@ X86:
 - ``Z``: An immediate 32-bit unsigned integer.
 - ``q``: An 8, 16, 32, or 64-bit register which can be accessed as an 8-bit
   ``l`` integer register. On X86-32, this is the ``a``, ``b``, ``c``, and ``d``
-  registers, and on X86-64, it is all of the integer registers.
+  registers, and on X86-64, it is all of the integer registers. When feature
+  `egpr` and `inline-asm-use-gpr32` are both on, they will be extended to EGPR.
 - ``Q``: An 8, 16, 32, or 64-bit register which can be accessed as an 8-bit
   ``h`` integer register. This is the ``a``, ``b``, ``c``, and ``d`` registers.
-- ``r`` or ``l``: An 8, 16, 32, or 64-bit integer register.
+- ``r`` or ``l``: An 8, 16, 32, or 64-bit integer register. When feature
+  `egpr` and `inline-asm-use-gpr32` are both on, they will be extended to EGPR.
 - ``R``: An 8, 16, 32, or 64-bit "legacy" integer register -- one which has
   existed since i386, and can be accessed without the REX prefix.
 - ``f``: A 32, 64, or 80-bit '387 FPU stack pseudo-register.
@@ -5418,8 +5420,10 @@ X86:
   operand will get allocated only to RAX -- if two 32-bit operands are needed,
   you're better off splitting it yourself, before passing it to the asm
   statement.
-- ``jR``: An 8, 16, 32, or 64-bit integer EGPR when EGPR feature is on.
-  Otherwise, same as ``R``.
+- ``jr``: An 8, 16, 32, or 64-bit integer GPR. It won't be extended to EGPR
+  When feature `egpr` or `inline-asm-use-gpr32` is on.
+- ``jR``: An 8, 16, 32, or 64-bit integer EGPR when egpr is on. Otherwise, same
+  as ``r``.
 
 XCore:
 
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index fcaf665ff476f..628ff560017ed 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -347,7 +347,7 @@ def FeatureNF : SubtargetFeature<"nf", "HasNF", "true",
 def FeatureCF : SubtargetFeature<"cf", "HasCF", "true",
                                  "Support conditional faulting">;
 def FeatureUseGPR32InInlineAsm
-    : SubtargetFeature<"inline-asm-use-gpr32", "UseInlineAsmGPR32", "false",
+    : SubtargetFeature<"inline-asm-use-gpr32", "UseInlineAsmGPR32", "true",
                        "Enable use of GPR32 in inline assembly for APX">;
 
 // Ivy Bridge and newer processors have enhanced REP MOVSB and STOSB (aka
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 5981b67b08a95..a385db12f398a 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -57585,6 +57585,7 @@ X86TargetLowering::getConstraintType(StringRef Constraint) const {
       switch (Constraint[1]) {
       default:
         break;
+      case 'r':
       case 'R':
         return C_RegisterClass;
       }
@@ -57673,6 +57674,7 @@ X86TargetLowering::getSingleConstraintMatchWeight(
     switch (Constraint[1]) {
     default:
       return CW_Invalid;
+    case 'r':
     case 'R':
       if (CallOperandVal->getType()->isIntegerTy())
         Wt = CW_SpecificReg;
@@ -58018,13 +58020,21 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
     case 'q':   // GENERAL_REGS in 64-bit mode, Q_REGS in 32-bit mode.
       if (Subtarget.is64Bit()) {
         if (VT == MVT::i8 || VT == MVT::i1)
-          return std::make_pair(0U, &X86::GR8_NOREX2RegClass);
+          return std::make_pair(0U, Subtarget.hasEGPR() && Subtarget.useInlineAsmGPR32()
+                                      ? &X86::GR8RegClass
+                                      : &X86::GR8_NOREX2RegClass);
         if (VT == MVT::i16)
-          return std::make_pair(0U, &X86::GR16_NOREX2RegClass);
+          return std::make_pair(0U, Subtarget.hasEGPR() && Subtarget.useInlineAsmGPR32()
+                                      ? &X86::GR16RegClass
+                                      : &X86::GR16_NOREX2RegClass);
         if (VT == MVT::i32 || VT == MVT::f32)
-          return std::make_pair(0U, &X86::GR32_NOREX2RegClass);
+          return std::make_pair(0U, Subtarget.hasEGPR() && Subtarget.useInlineAsmGPR32()
+                                      ? &X86::GR32RegClass
+                                      : &X86::GR32_NOREX2RegClass);
         if (VT != MVT::f80 && !VT.isVector())
-          return std::make_pair(0U, &X86::GR64_NOREX2RegClass);
+          return std::make_pair(0U, Subtarget.hasEGPR() && Subtarget.useInlineAsmGPR32()
+                                      ? &X86::GR64RegClass
+                                      : &X86::GR64_NOREX2RegClass);
         break;
       }
       [[fallthrough]];
@@ -58043,22 +58053,22 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
     case 'r':   // GENERAL_REGS
     case 'l':   // INDEX_REGS
       if (VT == MVT::i8 || VT == MVT::i1)
-        return std::make_pair(0U, Subtarget.useInlineAsmGPR32()
-                                      ? &X86::GR8_NOREX2RegClass
-                                      : &X86::GR8RegClass);
+        return std::make_pair(0U, Subtarget.hasEGPR() && Subtarget.useInlineAsmGPR32()
+                                      ? &X86::GR8RegClass
+                                      : &X86::GR8_NOREX2RegClass);
       if (VT == MVT::i16)
-        return std::make_pair(0U, Subtarget.useInlineAsmGPR32()
-                                      ? &X86::GR16_NOREX2RegClass
-                                      : &X86::GR16RegClass);
+        return std::make_pair(0U, Subtarget.hasEGPR() && Subtarget.useInlineAsmGPR32()
+                                      ? &X86::GR16RegClass
+                                      : &X86::GR16_NOREX2RegClass);
       if (VT == MVT::i32 || VT == MVT::f32 ||
           (!VT.isVector() && !Subtarget.is64Bit()))
-        return std::make_pair(0U, Subtarget.useInlineAsmGPR32()
-                                      ? &X86::GR32_NOREX2RegClass
-                                      : &X86::GR32RegClass);
+        return std::make_pair(0U, Subtarget.hasEGPR() && Subtarget.useInlineAsmGPR32()
+                                      ? &X86::GR32RegClass
+                                      : &X86::GR32_NOREX2RegClass);
       if (VT != MVT::f80 && !VT.isVector())
-        return std::make_pair(0U, Subtarget.useInlineAsmGPR32()
-                                      ? &X86::GR64_NOREX2RegClass
-                                      : &X86::GR64RegClass);
+        return std::make_pair(0U, Subtarget.hasEGPR() && Subtarget.useInlineAsmGPR32()
+                                      ? &X86::GR64RegClass
+                                      : &X86::GR64_NOREX2RegClass);
     case 'R':   // LEGACY_REGS
       if (VT == MVT::i8 || VT == MVT::i1)
         return std::make_pair(0U, &X86::GR8_NOREXRegClass);
@@ -58285,13 +58295,22 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
     switch (Constraint[1]) {
     default:
       break;
+    case 'r':
+      if (VT == MVT::i8 || VT == MVT::i1)
+        return std::make_pair(0U, &X86::GR8_NOREX2RegClass);
+      if (VT == MVT::i16)
+        return std::make_pair(0U, &X86::GR16_NOREX2RegClass);
+      if (VT == MVT::i32 || VT == MVT::f32)
+        return std::make_pair(0U, &X86::GR32_NOREX2RegClass);
+      if (VT != MVT::f80 && !VT.isVector())
+        return std::make_pair(0U, &X86::GR64_NOREX2RegClass);
+      break;
     case 'R':
       if (VT == MVT::i8 || VT == MVT::i1)
         return std::make_pair(0U, &X86::GR8RegClass);
       if (VT == MVT::i16)
         return std::make_pair(0U, &X86::GR16RegClass);
-      if (VT == MVT::i32 || VT == MVT::f32 ||
-          (!VT.isVector() && !Subtarget.is64Bit()))
+      if (VT == MVT::i32 || VT == MVT::f32)
         return std::make_pair(0U, &X86::GR32RegClass);
       if (VT != MVT::f80 && !VT.isVector())
         return std::make_pair(0U, &X86::GR64RegClass);
diff --git a/llvm/test/CodeGen/X86/apx/inline-asm-jR-constraint.ll b/llvm/test/CodeGen/X86/apx/inline-asm-jR-constraint.ll
index 805f1e68726ef..af7c37b923a46 100644
--- a/llvm/test/CodeGen/X86/apx/inline-asm-jR-constraint.ll
+++ b/llvm/test/CodeGen/X86/apx/inline-asm-jR-constraint.ll
@@ -5,12 +5,12 @@
 
 ; ERR: error: inline assembly requires more registers than available
 
-define void @constraint_jR_test() nounwind "frame-pointer"="all" {
+define void @constraint_jR_test() nounwind {
 ; CHECK-LABEL: constraint_jR_test:
 ; CHECK:    addq %r16, %rax
 entry:
   %reg = alloca i64, align 8
   %0 = load i64, ptr %reg, align 8
-  call void asm sideeffect "add $0, %rax", "^jR,~{rax},~{rbx},~{rcx},~{rdx},~{rdi},~{rsi},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15},~{dirflag},~{fpsr},~{flags}"(i64 %0)
+  call void asm sideeffect "add $0, %rax", "^jR,~{rax},~{rbx},~{rbp},~{rcx},~{rdx},~{rdi},~{rsi},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15},~{dirflag},~{fpsr},~{flags}"(i64 %0)
   ret void
 }
diff --git a/llvm/test/CodeGen/X86/apx/inline-asm-jr-constraint.ll b/llvm/test/CodeGen/X86/apx/inline-asm-jr-constraint.ll
new file mode 100644
index 0000000000000..8e20bbd5c1c51
--- /dev/null
+++ b/llvm/test/CodeGen/X86/apx/inline-asm-jr-constraint.ll
@@ -0,0 +1,13 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=x86_64 < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64 -mattr=+egpr < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64 -mattr=+egpr,+inline-asm-use-gpr32 < %s | FileCheck %s
+
+; CHECK: addq %r8, %rax
+define void @constraint_jr_test() nounwind {
+entry:
+  %reg = alloca i64, align 8
+  %0 = load i64, ptr %reg, align 8
+  call void asm sideeffect "add $0, %rax", "^jr,~{rax},~{rbx},~{rbp},~{rcx},~{rdx},~{rdi},~{rsi},~{dirflag},~{fpsr},~{flags}"(i64 %0)
+  ret void
+}
diff --git a/llvm/test/CodeGen/X86/apx/inline-asm-jr-err-constraint.ll b/llvm/test/CodeGen/X86/apx/inline-asm-jr-err-constraint.ll
new file mode 100644
index 0000000000000..b0b1c7d0e594a
--- /dev/null
+++ b/llvm/test/CodeGen/X86/apx/inline-asm-jr-err-constraint.ll
@@ -0,0 +1,14 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: not llc -mtriple=x86_64 %s 2>&1 | FileCheck %s
+; RUN: not llc -mtriple=x86_64 -mattr=+egpr %s 2>&1 | FileCheck %s
+; RUN: not llc -mtriple=x86_64 -mattr=+egpr,+inline-asm-use-gpr32 %s 2>&1 | FileCheck %s
+
+; CHECK: error: inline assembly requires more registers than available
+define void @constraint_jr_test() nounwind {
+entry:
+  %reg = alloca i64, align 8
+  %0 = load i64, ptr %reg, align 8
+  call void asm sideeffect "add $0, %rax", "^jr,~{rax},~{rbx},~{rbp},~{rcx},~{rdx},~{rdi},~{rsi},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15},~{dirflag},~{fpsr},~{flags}"(i64 %0)
+  ret void
+}
+
diff --git a/llvm/test/CodeGen/X86/apx/inline-asm-r-constraint.ll b/llvm/test/CodeGen/X86/apx/inline-asm-r-constraint.ll
deleted file mode 100644
index 54b55ef6e156c..0000000000000
--- a/llvm/test/CodeGen/X86/apx/inline-asm-r-constraint.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: not llc -mtriple=x86_64 < %s 2>&1 | FileCheck %s --check-prefix=ERR
-; RUN: not llc -mtriple=x86_64 -mattr=+egpr < %s 2>&1 | FileCheck %s --check-prefix=ERR
-; RUN: llc -mtriple=x86_64 -mattr=+egpr,+inline-asm-use-gpr32 < %s | FileCheck %s
-
-; ERR: error: inline assembly requires more registers than available
-
-define void @constraint_r_test() nounwind "frame-pointer"="all" {
-; CHECK-LABEL: constraint_r_test:
-; CHECK:    addq %r16, %rax
-entry:
-  %reg = alloca i64, align 8
-  %0 = load i64, ptr %reg, align 8
-  call void asm sideeffect "add $0, %rax", "r,~{rax},~{rbx},~{rcx},~{rdx},~{rdi},~{rsi},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15},~{dirflag},~{fpsr},~{flags}"(i64 %0)
-  ret void
-}
diff --git a/llvm/test/CodeGen/X86/apx/inline-asm-r-l-q-constraint.ll b/llvm/test/CodeGen/X86/apx/inline-asm-r-l-q-constraint.ll
new file mode 100644
index 0000000000000..a51fff55b9665
--- /dev/null
+++ b/llvm/test/CodeGen/X86/apx/inline-asm-r-l-q-constraint.ll
@@ -0,0 +1,38 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: not llc -mtriple=x86_64 < %s 2>&1 | FileCheck %s --check-prefix=ERR
+; RUN: not llc -mtriple=x86_64 -mattr=+egpr < %s 2>&1 | FileCheck %s --check-prefix=ERR
+; RUN: llc -mtriple=x86_64 -mattr=+egpr,+inline-asm-use-gpr32 < %s | FileCheck %s
+
+; ERR: error: inline assembly requires more registers than available
+define void @constraint_r_test() nounwind {
+; CHECK-LABEL: constraint_r_test:
+; CHECK:    addq %r16, %rax
+entry:
+  %reg = alloca i64, align 8
+  %0 = load i64, ptr %reg, align 8
+  call void asm sideeffect "add $0, %rax", "r,~{rax},~{rbx},~{rbp},~{rcx},~{rdx},~{rdi},~{rsi},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15},~{dirflag},~{fpsr},~{flags}"(i64 %0)
+  ret void
+}
+
+; ERR: error: inline assembly requires more registers than available
+define void @constraint_l_test() nounwind {
+; CHECK-LABEL: constraint_l_test:
+; CHECK:    addq %r16, %rax
+entry:
+  %reg = alloca i64, align 8
+  %0 = load i64, ptr %reg, align 8
+  call void asm sideeffect "add $0, %rax", "l,~{rax},~{rbx},~{rbp},~{rcx},~{rdx},~{rdi},~{rsi},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15},~{dirflag},~{fpsr},~{flags}"(i64 %0)
+  ret void
+}
+
+
+; ERR: error: inline assembly requires more registers than available
+define void @constraint_q_test() nounwind {
+; CHECK-LABEL: constraint_q_test:
+; CHECK:    addq %r16, %rax
+entry:
+  %reg = alloca i64, align 8
+  %0 = load i64, ptr %reg, align 8
+  call void asm sideeffect "add $0, %rax", "q,~{rax},~{rbx},~{rbp},~{rcx},~{rdx},~{rdi},~{rsi},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15},~{dirflag},~{fpsr},~{flags}"(i64 %0)
+  ret void
+}

>From fb6ba87b4f678df51d9e9e0807401f883bdbaced Mon Sep 17 00:00:00 2001
From: Freddy Ye <freddy.ye at intel.com>
Date: Wed, 29 May 2024 17:29:41 +0800
Subject: [PATCH 10/11] address and clang-format

---
 llvm/docs/LangRef.rst                   |  6 ++--
 llvm/lib/Target/X86/X86ISelLowering.cpp | 40 +++++++++++++++----------
 2 files changed, 27 insertions(+), 19 deletions(-)

diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index dc32b4d7ac7d0..da7c5cb7ab763 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -5420,10 +5420,10 @@ X86:
   operand will get allocated only to RAX -- if two 32-bit operands are needed,
   you're better off splitting it yourself, before passing it to the asm
   statement.
-- ``jr``: An 8, 16, 32, or 64-bit integer GPR. It won't be extended to EGPR
+- ``jr``: An 8, 16, 32, or 64-bit integer gpr16. It won't be extended to gpr32
   When feature `egpr` or `inline-asm-use-gpr32` is on.
-- ``jR``: An 8, 16, 32, or 64-bit integer EGPR when egpr is on. Otherwise, same
-  as ``r``.
+- ``jR``: An 8, 16, 32, or 64-bit integer gpr32 when feature `egpr`` is on.
+  Otherwise, same as ``r``.
 
 XCore:
 
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index a385db12f398a..e39e4a212b0e3 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -58020,21 +58020,25 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
     case 'q':   // GENERAL_REGS in 64-bit mode, Q_REGS in 32-bit mode.
       if (Subtarget.is64Bit()) {
         if (VT == MVT::i8 || VT == MVT::i1)
-          return std::make_pair(0U, Subtarget.hasEGPR() && Subtarget.useInlineAsmGPR32()
-                                      ? &X86::GR8RegClass
-                                      : &X86::GR8_NOREX2RegClass);
+          return std::make_pair(0U, Subtarget.hasEGPR() &&
+                                            Subtarget.useInlineAsmGPR32()
+                                        ? &X86::GR8RegClass
+                                        : &X86::GR8_NOREX2RegClass);
         if (VT == MVT::i16)
-          return std::make_pair(0U, Subtarget.hasEGPR() && Subtarget.useInlineAsmGPR32()
-                                      ? &X86::GR16RegClass
-                                      : &X86::GR16_NOREX2RegClass);
+          return std::make_pair(0U, Subtarget.hasEGPR() &&
+                                            Subtarget.useInlineAsmGPR32()
+                                        ? &X86::GR16RegClass
+                                        : &X86::GR16_NOREX2RegClass);
         if (VT == MVT::i32 || VT == MVT::f32)
-          return std::make_pair(0U, Subtarget.hasEGPR() && Subtarget.useInlineAsmGPR32()
-                                      ? &X86::GR32RegClass
-                                      : &X86::GR32_NOREX2RegClass);
+          return std::make_pair(0U, Subtarget.hasEGPR() &&
+                                            Subtarget.useInlineAsmGPR32()
+                                        ? &X86::GR32RegClass
+                                        : &X86::GR32_NOREX2RegClass);
         if (VT != MVT::f80 && !VT.isVector())
-          return std::make_pair(0U, Subtarget.hasEGPR() && Subtarget.useInlineAsmGPR32()
-                                      ? &X86::GR64RegClass
-                                      : &X86::GR64_NOREX2RegClass);
+          return std::make_pair(0U, Subtarget.hasEGPR() &&
+                                            Subtarget.useInlineAsmGPR32()
+                                        ? &X86::GR64RegClass
+                                        : &X86::GR64_NOREX2RegClass);
         break;
       }
       [[fallthrough]];
@@ -58053,20 +58057,24 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
     case 'r':   // GENERAL_REGS
     case 'l':   // INDEX_REGS
       if (VT == MVT::i8 || VT == MVT::i1)
-        return std::make_pair(0U, Subtarget.hasEGPR() && Subtarget.useInlineAsmGPR32()
+        return std::make_pair(0U, Subtarget.hasEGPR() &&
+                                          Subtarget.useInlineAsmGPR32()
                                       ? &X86::GR8RegClass
                                       : &X86::GR8_NOREX2RegClass);
       if (VT == MVT::i16)
-        return std::make_pair(0U, Subtarget.hasEGPR() && Subtarget.useInlineAsmGPR32()
+        return std::make_pair(0U, Subtarget.hasEGPR() &&
+                                          Subtarget.useInlineAsmGPR32()
                                       ? &X86::GR16RegClass
                                       : &X86::GR16_NOREX2RegClass);
       if (VT == MVT::i32 || VT == MVT::f32 ||
           (!VT.isVector() && !Subtarget.is64Bit()))
-        return std::make_pair(0U, Subtarget.hasEGPR() && Subtarget.useInlineAsmGPR32()
+        return std::make_pair(0U, Subtarget.hasEGPR() &&
+                                          Subtarget.useInlineAsmGPR32()
                                       ? &X86::GR32RegClass
                                       : &X86::GR32_NOREX2RegClass);
       if (VT != MVT::f80 && !VT.isVector())
-        return std::make_pair(0U, Subtarget.hasEGPR() && Subtarget.useInlineAsmGPR32()
+        return std::make_pair(0U, Subtarget.hasEGPR() &&
+                                          Subtarget.useInlineAsmGPR32()
                                       ? &X86::GR64RegClass
                                       : &X86::GR64_NOREX2RegClass);
     case 'R':   // LEGACY_REGS

>From 81f58b656ae13331d32542e7a4dab8d9cd83dc09 Mon Sep 17 00:00:00 2001
From: Freddy Ye <freddy.ye at intel.com>
Date: Thu, 30 May 2024 11:15:33 +0800
Subject: [PATCH 11/11] address comments

---
 llvm/docs/LangRef.rst                         |  6 +++---
 llvm/test/CodeGen/X86/apx/asm-constraint.ll   | 20 ++++++++++++-------
 .../X86/apx/inline-asm-jR-constraint.ll       |  1 +
 .../X86/apx/inline-asm-jr-constraint.ll       | 18 ++++++++++++++---
 4 files changed, 32 insertions(+), 13 deletions(-)

diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index da7c5cb7ab763..464132b61622c 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -5395,11 +5395,11 @@ X86:
 - ``q``: An 8, 16, 32, or 64-bit register which can be accessed as an 8-bit
   ``l`` integer register. On X86-32, this is the ``a``, ``b``, ``c``, and ``d``
   registers, and on X86-64, it is all of the integer registers. When feature
-  `egpr` and `inline-asm-use-gpr32` are both on, they will be extended to EGPR.
+  `egpr` and `inline-asm-use-gpr32` are both on, it will be extended to gpr32.
 - ``Q``: An 8, 16, 32, or 64-bit register which can be accessed as an 8-bit
   ``h`` integer register. This is the ``a``, ``b``, ``c``, and ``d`` registers.
 - ``r`` or ``l``: An 8, 16, 32, or 64-bit integer register. When feature
-  `egpr` and `inline-asm-use-gpr32` are both on, they will be extended to EGPR.
+  `egpr` and `inline-asm-use-gpr32` are both on, it will be extended to gpr32.
 - ``R``: An 8, 16, 32, or 64-bit "legacy" integer register -- one which has
   existed since i386, and can be accessed without the REX prefix.
 - ``f``: A 32, 64, or 80-bit '387 FPU stack pseudo-register.
@@ -5421,7 +5421,7 @@ X86:
   you're better off splitting it yourself, before passing it to the asm
   statement.
 - ``jr``: An 8, 16, 32, or 64-bit integer gpr16. It won't be extended to gpr32
-  When feature `egpr` or `inline-asm-use-gpr32` is on.
+  when feature `egpr` or `inline-asm-use-gpr32` is on.
 - ``jR``: An 8, 16, 32, or 64-bit integer gpr32 when feature `egpr`` is on.
   Otherwise, same as ``r``.
 
diff --git a/llvm/test/CodeGen/X86/apx/asm-constraint.ll b/llvm/test/CodeGen/X86/apx/asm-constraint.ll
index 9b81cbf29c25b..4eaa5d03c5825 100644
--- a/llvm/test/CodeGen/X86/apx/asm-constraint.ll
+++ b/llvm/test/CodeGen/X86/apx/asm-constraint.ll
@@ -1,21 +1,27 @@
 ; Check r16-r31 can not be used with 'q','r','l' constraint for backward compatibility.
-; RUN: not llc < %s -mtriple=x86_64-unknown-unknown -mattr=+egpr 2>&1 | FileCheck %s
+; RUN: not llc < %s -mtriple=x86_64-unknown-unknown -mattr=+egpr 2>&1 | FileCheck %s --check-prefix=ERR
+; RUN: not llc -mtriple=x86_64 < %s 2>&1 | FileCheck %s --check-prefix=ERR
+; RUN: not llc -mtriple=x86_64 -mattr=+egpr < %s 2>&1 | FileCheck %s --check-prefix=ERR
+; RUN: llc -mtriple=x86_64 -mattr=+egpr,+inline-asm-use-gpr32 < %s | FileCheck %s
 
 define void @q() {
-; CHECK: error: inline assembly requires more registers than available
-  %a = call i32 asm sideeffect "movq %rax, $0", "=q,~{rax},~{rbx},~{rcx},~{rdx},~{rdi},~{rsi},~{rbp},~{rsp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
+; ERR: error: inline assembly requires more registers than available
+; CHECK:    movq    %rax, %r16
+  %a = call i64 asm sideeffect "movq %rax, $0", "=q,~{rax},~{rbx},~{rcx},~{rdx},~{rdi},~{rsi},~{rbp},~{rsp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
   ret void
 }
 
 define void @r() {
-; CHECK: error: inline assembly requires more registers than available
-  %a = call i32 asm sideeffect "movq %rax, $0", "=r,~{rax},~{rbx},~{rcx},~{rdx},~{rdi},~{rsi},~{rbp},~{rsp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
+; ERR: error: inline assembly requires more registers than available
+; CHECK:    movq    %rax, %r16
+  %a = call i64 asm sideeffect "movq %rax, $0", "=r,~{rax},~{rbx},~{rcx},~{rdx},~{rdi},~{rsi},~{rbp},~{rsp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
   ret void
 }
 
 define void @l() {
-; CHECK: error: inline assembly requires more registers than available
-  %a = call i32 asm sideeffect "movq %rax, $0", "=l,~{rax},~{rbx},~{rcx},~{rdx},~{rdi},~{rsi},~{rbp},~{rsp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
+; ERR: error: inline assembly requires more registers than available
+; CHECK:    movq    %rax, %r16
+  %a = call i64 asm sideeffect "movq %rax, $0", "=l,~{rax},~{rbx},~{rcx},~{rdx},~{rdi},~{rsi},~{rbp},~{rsp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
   ret void
 }
 
diff --git a/llvm/test/CodeGen/X86/apx/inline-asm-jR-constraint.ll b/llvm/test/CodeGen/X86/apx/inline-asm-jR-constraint.ll
index af7c37b923a46..32b84915c6793 100644
--- a/llvm/test/CodeGen/X86/apx/inline-asm-jR-constraint.ll
+++ b/llvm/test/CodeGen/X86/apx/inline-asm-jR-constraint.ll
@@ -2,6 +2,7 @@
 ; RUN: not llc -mtriple=x86_64 %s 2>&1 | FileCheck %s --check-prefix=ERR
 ; RUN: llc -mtriple=x86_64 -mattr=+egpr < %s | FileCheck %s
 ; RUN: llc -mtriple=x86_64 -mattr=+egpr,+inline-asm-use-gpr32 < %s | FileCheck %s
+; RUN: not llc -mtriple=x86_64 -mattr=+inline-asm-use-gpr32 %s 2>&1 | FileCheck %s --check-prefix=ERR
 
 ; ERR: error: inline assembly requires more registers than available
 
diff --git a/llvm/test/CodeGen/X86/apx/inline-asm-jr-constraint.ll b/llvm/test/CodeGen/X86/apx/inline-asm-jr-constraint.ll
index 8e20bbd5c1c51..80fabd4974322 100644
--- a/llvm/test/CodeGen/X86/apx/inline-asm-jr-constraint.ll
+++ b/llvm/test/CodeGen/X86/apx/inline-asm-jr-constraint.ll
@@ -1,7 +1,10 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=x86_64 < %s | FileCheck %s
-; RUN: llc -mtriple=x86_64 -mattr=+egpr < %s | FileCheck %s
-; RUN: llc -mtriple=x86_64 -mattr=+egpr,+inline-asm-use-gpr32 < %s | FileCheck %s
+; RUN: not llc -mtriple=x86_64 < %s | FileCheck %s
+; RUN: not llc -mtriple=x86_64 -mattr=+egpr < %s | FileCheck %s
+; RUN: not llc -mtriple=x86_64 -mattr=+egpr,+inline-asm-use-gpr32 < %s | FileCheck %s
+; RUN: not llc -mtriple=x86_64 %s 2>&1 | FileCheck %s --check-prefix=ERR
+; RUN: not llc -mtriple=x86_64 -mattr=+egpr %s 2>&1 | FileCheck %s --check-prefix=ERR
+; RUN: not llc -mtriple=x86_64 -mattr=+egpr,+inline-asm-use-gpr32 %s 2>&1 | FileCheck %s --check-prefix=ERR
 
 ; CHECK: addq %r8, %rax
 define void @constraint_jr_test() nounwind {
@@ -11,3 +14,12 @@ entry:
   call void asm sideeffect "add $0, %rax", "^jr,~{rax},~{rbx},~{rbp},~{rcx},~{rdx},~{rdi},~{rsi},~{dirflag},~{fpsr},~{flags}"(i64 %0)
   ret void
 }
+
+; ERR: error: inline assembly requires more registers than available
+define void @constraint_jr_test_err() nounwind {
+entry:
+  %reg = alloca i64, align 8
+  %0 = load i64, ptr %reg, align 8
+  call void asm sideeffect "add $0, %rax", "^jr,~{rax},~{rbx},~{rbp},~{rcx},~{rdx},~{rdi},~{rsi},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15},~{dirflag},~{fpsr},~{flags}"(i64 %0)
+  ret void
+}