[llvm] [X86][APX] Try to replace NDD with NF instructions when optimizeCompareInstr (PR #130488)

Phoebe Wang via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 10 01:31:40 PDT 2025


https://github.com/phoebewang updated https://github.com/llvm/llvm-project/pull/130488

>From 3f0314d4266b305dcf5e2424a7dfe2d2b262b4ed Mon Sep 17 00:00:00 2001
From: "Wang, Phoebe" <phoebe.wang at intel.com>
Date: Sun, 9 Mar 2025 21:11:16 +0800
Subject: [PATCH 1/2] [X86][APX] Try to replace NDD with NF instructions when
 optimizeCompareInstr

https://godbolt.org/z/rWYdqnjjx
---
 llvm/lib/Target/X86/X86InstrInfo.cpp | 15 +++++++++++++++
 llvm/test/CodeGen/X86/apx/cf.ll      | 28 +++++++++++++++++++++++-----
 2 files changed, 38 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 5fe7203c052d8..e3b4fe2cec9fb 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -5352,10 +5352,12 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
   MachineInstr *MI = nullptr;
   MachineInstr *Sub = nullptr;
   MachineInstr *Movr0Inst = nullptr;
+  SmallVector<MachineInstr *, 4> NDDInsts;
   bool NoSignFlag = false;
   bool ClearsOverflowFlag = false;
   bool ShouldUpdateCC = false;
   bool IsSwapped = false;
+  bool HasCF = Subtarget.hasNF();
   unsigned OpNo = 0;
   X86::CondCode NewCC = X86::COND_INVALID;
   int64_t ImmDelta = 0;
@@ -5441,6 +5443,13 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
           continue;
         }
 
+        // Try to replace NDD with NF instructions.
+        if (HasCF && X86II::hasNewDataDest(Inst.getDesc().TSFlags) &&
+            Inst.registerDefIsDead(X86::EFLAGS, TRI)) {
+          NDDInsts.push_back(&Inst);
+          continue;
+        }
+
         // Cannot do anything for any other EFLAG changes.
         return false;
       }
@@ -5637,6 +5646,12 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
       return false;
   }
 
+  // Replace NDD with NF instructions.
+  for (MachineInstr *NDD : NDDInsts) {
+    NDD->setDesc(get(X86::getNFVariant(NDD->getOpcode())));
+    NDD->removeOperand(NDD->getNumOperands() - 1);
+  }
+
   // Make sure Sub instruction defines EFLAGS and mark the def live.
   MachineOperand *FlagDef =
       Sub->findRegisterDefOperand(X86::EFLAGS, /*TRI=*/nullptr);
diff --git a/llvm/test/CodeGen/X86/apx/cf.ll b/llvm/test/CodeGen/X86/apx/cf.ll
index a64d7df11a4d0..fc170ca5f2b2e 100644
--- a/llvm/test/CodeGen/X86/apx/cf.ll
+++ b/llvm/test/CodeGen/X86/apx/cf.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64 -mattr=+cf,+avx512f -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64 -mattr=+cf,+nf,+ndd,+avx512f -verify-machineinstrs | FileCheck %s
 
 define void @basic(i32 %a, ptr %b, ptr %p, ptr %q) {
 ; CHECK-LABEL: basic:
@@ -57,9 +57,8 @@ entry:
 define i64 @reduced_data_dependency(i64 %a, i64 %b, ptr %c) {
 ; CHECK-LABEL: reduced_data_dependency:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    movq %rdi, %rcx
-; CHECK-NEXT:    subq %rsi, %rcx
-; CHECK-NEXT:    cfcmovnsq (%rdx), %rdi, %rax
+; CHECK-NEXT:    subq %rsi, %rdi, %rax
+; CHECK-NEXT:    cfcmovnsq (%rdx), %rdi, %rcx
 ; CHECK-NEXT:    addq %rcx, %rax
 ; CHECK-NEXT:    retq
 entry:
@@ -125,7 +124,7 @@ entry:
   ret void
 }
 
-define void @single_cmp(i32 %a, i32 %b, ptr %c, ptr %d) #2 {
+define void @single_cmp(i32 %a, i32 %b, ptr %c, ptr %d) {
 ; CHECK-LABEL: single_cmp:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    cmpl %esi, %edi
@@ -139,3 +138,22 @@ entry:
   tail call void @llvm.masked.store.v1i16.p0(<1 x i16> %2, ptr %d, i32 2, <1 x i1> %1)
   ret void
 }
+
+define void @load_add_store(i32 %a, i32 %b, ptr %p) {
+; CHECK-LABEL: load_add_store:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    cmpl %esi, %edi
+; CHECK-NEXT:    cfcmovnew (%rdx), %ax
+; CHECK-NEXT:    {nf} incw %ax
+; CHECK-NEXT:    cfcmovnew %ax, (%rdx)
+; CHECK-NEXT:    retq
+entry:
+  %0 = icmp ne i32 %a, %b
+  %1 = insertelement <1 x i1> poison, i1 %0, i64 0
+  %2 = tail call <1 x i16> @llvm.masked.load.v1i16.p0(ptr %p, i32 2, <1 x i1> %1, <1 x i16> poison)
+  %3 = extractelement <1 x i16> %2, i64 0
+  %4 = add i16 %3, 1
+  %5 = insertelement <1 x i16> poison, i16 %4, i64 0
+  tail call void @llvm.masked.store.v1i16.p0(<1 x i16> %5, ptr %p, i32 2, <1 x i1> %1)
+  ret void
+}

>From 527af4e956574f4d09f7756aacdc13f70d921839 Mon Sep 17 00:00:00 2001
From: "Wang, Phoebe" <phoebe.wang at intel.com>
Date: Mon, 10 Mar 2025 16:28:22 +0800
Subject: [PATCH 2/2] Address review comments

---
 llvm/lib/Target/X86/X86InstrInfo.cpp | 19 +++++++++++--------
 llvm/test/CodeGen/X86/apx/cf.ll      |  9 +++++----
 2 files changed, 16 insertions(+), 12 deletions(-)

diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index e3b4fe2cec9fb..9b124e3460258 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -5352,12 +5352,12 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
   MachineInstr *MI = nullptr;
   MachineInstr *Sub = nullptr;
   MachineInstr *Movr0Inst = nullptr;
-  SmallVector<MachineInstr *, 4> NDDInsts;
+  SmallVector<std::pair<MachineInstr *, unsigned>, 4> InstsToUpdate;
   bool NoSignFlag = false;
   bool ClearsOverflowFlag = false;
   bool ShouldUpdateCC = false;
   bool IsSwapped = false;
-  bool HasCF = Subtarget.hasNF();
+  bool HasNF = Subtarget.hasNF();
   unsigned OpNo = 0;
   X86::CondCode NewCC = X86::COND_INVALID;
   int64_t ImmDelta = 0;
@@ -5444,9 +5444,12 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
         }
 
         // Try to replace NDD with NF instructions.
-        if (HasCF && X86II::hasNewDataDest(Inst.getDesc().TSFlags) &&
-            Inst.registerDefIsDead(X86::EFLAGS, TRI)) {
-          NDDInsts.push_back(&Inst);
+        if (HasNF && Inst.registerDefIsDead(X86::EFLAGS, TRI)) {
+          unsigned NewOp = X86::getNFVariant(Inst.getOpcode());
+          if (!NewOp)
+            return false;
+
+          InstsToUpdate.push_back(std::make_pair(&Inst, NewOp));
           continue;
         }
 
@@ -5647,9 +5650,9 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
   }
 
   // Replace NDD with NF instructions.
-  for (MachineInstr *NDD : NDDInsts) {
-    NDD->setDesc(get(X86::getNFVariant(NDD->getOpcode())));
-    NDD->removeOperand(NDD->getNumOperands() - 1);
+  for (auto &Inst : InstsToUpdate) {
+    Inst.first->setDesc(get(Inst.second));
+    Inst.first->removeOperand(Inst.first->getNumOperands() - 1);
   }
 
   // Make sure Sub instruction defines EFLAGS and mark the def live.
diff --git a/llvm/test/CodeGen/X86/apx/cf.ll b/llvm/test/CodeGen/X86/apx/cf.ll
index fc170ca5f2b2e..8d104e5f3ced2 100644
--- a/llvm/test/CodeGen/X86/apx/cf.ll
+++ b/llvm/test/CodeGen/X86/apx/cf.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64 -mattr=+cf,+nf,+ndd,+avx512f -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64 -mattr=+cf,+nf,+avx512f -verify-machineinstrs | FileCheck %s
 
 define void @basic(i32 %a, ptr %b, ptr %p, ptr %q) {
 ; CHECK-LABEL: basic:
@@ -57,8 +57,9 @@ entry:
 define i64 @reduced_data_dependency(i64 %a, i64 %b, ptr %c) {
 ; CHECK-LABEL: reduced_data_dependency:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    subq %rsi, %rdi, %rax
-; CHECK-NEXT:    cfcmovnsq (%rdx), %rdi, %rcx
+; CHECK-NEXT:    movq %rdi, %rcx
+; CHECK-NEXT:    subq %rsi, %rcx
+; CHECK-NEXT:    cfcmovnsq (%rdx), %rdi, %rax
 ; CHECK-NEXT:    addq %rcx, %rax
 ; CHECK-NEXT:    retq
 entry:
@@ -144,7 +145,7 @@ define void @load_add_store(i32 %a, i32 %b, ptr %p) {
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    cmpl %esi, %edi
 ; CHECK-NEXT:    cfcmovnew (%rdx), %ax
-; CHECK-NEXT:    {nf} incw %ax
+; CHECK-NEXT:    {nf} incl %eax
 ; CHECK-NEXT:    cfcmovnew %ax, (%rdx)
 ; CHECK-NEXT:    retq
 entry:



More information about the llvm-commits mailing list