[llvm] [X86][APX] Add NF instructions to convertToThreeAddress functions (PR #130969)
Phoebe Wang via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 12 07:31:49 PDT 2025
https://github.com/phoebewang created https://github.com/llvm/llvm-project/pull/130969
Since #130488, we have NF instructions when converting to three address instructions.
>From 63398ce6f0c88437a675e507a7083e77245848ff Mon Sep 17 00:00:00 2001
From: "Wang, Phoebe" <phoebe.wang at intel.com>
Date: Wed, 12 Mar 2025 22:27:01 +0800
Subject: [PATCH] [X86][APX] Add NF instructions to convertToThreeAddress
functions
Since #130488, we have NF instructions when converting to three address instructions.
---
llvm/lib/Target/X86/X86InstrInfo.cpp | 77 +++---
llvm/test/CodeGen/X86/apx/nf-regressions.ll | 274 ++++++++++++++++++++
2 files changed, 315 insertions(+), 36 deletions(-)
create mode 100644 llvm/test/CodeGen/X86/apx/nf-regressions.ll
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 24002cbbcab57..55a19d97464cd 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -1283,11 +1283,14 @@ MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
MachineInstrBuilder MIB =
BuildMI(MBB, MBBI, MI.getDebugLoc(), get(Opcode), OutRegLEA);
+#define CASE_NF(OP) \
+ case X86::OP: \
+ case X86::OP##_NF:
switch (MIOpc) {
default:
llvm_unreachable("Unreachable!");
- case X86::SHL8ri:
- case X86::SHL16ri: {
+ CASE_NF(SHL8ri)
+ CASE_NF(SHL16ri) {
unsigned ShAmt = MI.getOperand(2).getImm();
MIB.addReg(0)
.addImm(1LL << ShAmt)
@@ -1296,23 +1299,23 @@ MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
.addReg(0);
break;
}
- case X86::INC8r:
- case X86::INC16r:
+ CASE_NF(INC8r)
+ CASE_NF(INC16r)
addRegOffset(MIB, InRegLEA, true, 1);
break;
- case X86::DEC8r:
- case X86::DEC16r:
+ CASE_NF(DEC8r)
+ CASE_NF(DEC16r)
addRegOffset(MIB, InRegLEA, true, -1);
break;
- case X86::ADD8ri:
+ CASE_NF(ADD8ri)
+ CASE_NF(ADD16ri)
case X86::ADD8ri_DB:
- case X86::ADD16ri:
case X86::ADD16ri_DB:
addRegOffset(MIB, InRegLEA, true, MI.getOperand(2).getImm());
break;
- case X86::ADD8rr:
+ CASE_NF(ADD8rr)
+ CASE_NF(ADD16rr)
case X86::ADD8rr_DB:
- case X86::ADD16rr:
case X86::ADD16rr_DB: {
Src2 = MI.getOperand(2).getReg();
Src2SubReg = MI.getOperand(2).getSubReg();
@@ -1449,7 +1452,7 @@ MachineInstr *X86InstrInfo::convertToThreeAddress(MachineInstr &MI,
switch (MIOpc) {
default:
llvm_unreachable("Unreachable!");
- case X86::SHL64ri: {
+ CASE_NF(SHL64ri) {
assert(MI.getNumOperands() >= 3 && "Unknown shift instruction!");
unsigned ShAmt = getTruncatedShiftCount(MI, 2);
if (!isTruncatedShiftCountForLEA(ShAmt))
@@ -1469,7 +1472,7 @@ MachineInstr *X86InstrInfo::convertToThreeAddress(MachineInstr &MI,
.addReg(0);
break;
}
- case X86::SHL32ri: {
+ CASE_NF(SHL32ri) {
assert(MI.getNumOperands() >= 3 && "Unknown shift instruction!");
unsigned ShAmt = getTruncatedShiftCount(MI, 2);
if (!isTruncatedShiftCountForLEA(ShAmt))
@@ -1501,20 +1504,20 @@ MachineInstr *X86InstrInfo::convertToThreeAddress(MachineInstr &MI,
LV->getVarInfo(SrcReg).Kills.push_back(NewMI);
break;
}
- case X86::SHL8ri:
+ CASE_NF(SHL8ri)
Is8BitOp = true;
[[fallthrough]];
- case X86::SHL16ri: {
+ CASE_NF(SHL16ri) {
assert(MI.getNumOperands() >= 3 && "Unknown shift instruction!");
unsigned ShAmt = getTruncatedShiftCount(MI, 2);
if (!isTruncatedShiftCountForLEA(ShAmt))
return nullptr;
return convertToThreeAddressWithLEA(MIOpc, MI, LV, LIS, Is8BitOp);
}
- case X86::INC64r:
- case X86::INC32r: {
+ CASE_NF(INC64r)
+ CASE_NF(INC32r) {
assert(MI.getNumOperands() >= 2 && "Unknown inc instruction!");
- unsigned Opc = MIOpc == X86::INC64r
+ unsigned Opc = (MIOpc == X86::INC64r || MIOpc == X86::INC64r_NF)
? X86::LEA64r
: (Is64Bit ? X86::LEA64_32r : X86::LEA32r);
bool isKill;
@@ -1536,10 +1539,10 @@ MachineInstr *X86InstrInfo::convertToThreeAddress(MachineInstr &MI,
LV->getVarInfo(SrcReg).Kills.push_back(NewMI);
break;
}
- case X86::DEC64r:
- case X86::DEC32r: {
+ CASE_NF(DEC64r)
+ CASE_NF(DEC32r) {
assert(MI.getNumOperands() >= 2 && "Unknown dec instruction!");
- unsigned Opc = MIOpc == X86::DEC64r
+ unsigned Opc = (MIOpc == X86::DEC64r || MIOpc == X86::DEC64r_NF)
? X86::LEA64r
: (Is64Bit ? X86::LEA64_32r : X86::LEA32r);
@@ -1562,20 +1565,21 @@ MachineInstr *X86InstrInfo::convertToThreeAddress(MachineInstr &MI,
LV->getVarInfo(SrcReg).Kills.push_back(NewMI);
break;
}
- case X86::DEC8r:
- case X86::INC8r:
+ CASE_NF(DEC8r)
+ CASE_NF(INC8r)
Is8BitOp = true;
[[fallthrough]];
- case X86::DEC16r:
- case X86::INC16r:
+ CASE_NF(DEC16r)
+ CASE_NF(INC16r)
return convertToThreeAddressWithLEA(MIOpc, MI, LV, LIS, Is8BitOp);
- case X86::ADD64rr:
+ CASE_NF(ADD64rr)
+ CASE_NF(ADD32rr)
case X86::ADD64rr_DB:
- case X86::ADD32rr:
case X86::ADD32rr_DB: {
assert(MI.getNumOperands() >= 3 && "Unknown add instruction!");
unsigned Opc;
- if (MIOpc == X86::ADD64rr || MIOpc == X86::ADD64rr_DB)
+ if (MIOpc == X86::ADD64rr || MIOpc == X86::ADD64rr_NF ||
+ MIOpc == X86::ADD64rr_DB)
Opc = X86::LEA64r;
else
Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r;
@@ -1620,21 +1624,21 @@ MachineInstr *X86InstrInfo::convertToThreeAddress(MachineInstr &MI,
NumRegOperands = 3;
break;
}
- case X86::ADD8rr:
+ CASE_NF(ADD8rr)
case X86::ADD8rr_DB:
Is8BitOp = true;
[[fallthrough]];
- case X86::ADD16rr:
+ CASE_NF(ADD16rr)
case X86::ADD16rr_DB:
return convertToThreeAddressWithLEA(MIOpc, MI, LV, LIS, Is8BitOp);
- case X86::ADD64ri32:
+ CASE_NF(ADD64ri32)
case X86::ADD64ri32_DB:
assert(MI.getNumOperands() >= 3 && "Unknown add instruction!");
NewMI = addOffset(
BuildMI(MF, MI.getDebugLoc(), get(X86::LEA64r)).add(Dest).add(Src),
MI.getOperand(2));
break;
- case X86::ADD32ri:
+ CASE_NF(ADD32ri)
case X86::ADD32ri_DB: {
assert(MI.getNumOperands() >= 3 && "Unknown add instruction!");
unsigned Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r;
@@ -1659,18 +1663,18 @@ MachineInstr *X86InstrInfo::convertToThreeAddress(MachineInstr &MI,
LV->getVarInfo(SrcReg).Kills.push_back(NewMI);
break;
}
- case X86::ADD8ri:
+ CASE_NF(ADD8ri)
case X86::ADD8ri_DB:
Is8BitOp = true;
[[fallthrough]];
- case X86::ADD16ri:
+ CASE_NF(ADD16ri)
case X86::ADD16ri_DB:
return convertToThreeAddressWithLEA(MIOpc, MI, LV, LIS, Is8BitOp);
- case X86::SUB8ri:
+ CASE_NF(SUB8ri)
case X86::SUB16ri:
/// FIXME: Support these similar to ADD8ri/ADD16ri*.
return nullptr;
- case X86::SUB32ri: {
+ CASE_NF(SUB32ri) {
if (!MI.getOperand(2).isImm())
return nullptr;
int64_t Imm = MI.getOperand(2).getImm();
@@ -1701,7 +1705,7 @@ MachineInstr *X86InstrInfo::convertToThreeAddress(MachineInstr &MI,
break;
}
- case X86::SUB64ri32: {
+ CASE_NF(SUB64ri32) {
if (!MI.getOperand(2).isImm())
return nullptr;
int64_t Imm = MI.getOperand(2).getImm();
@@ -2034,6 +2038,7 @@ MachineInstr *X86InstrInfo::convertToThreeAddress(MachineInstr &MI,
break;
}
}
+#undef CASE_NF
if (!NewMI)
return nullptr;
diff --git a/llvm/test/CodeGen/X86/apx/nf-regressions.ll b/llvm/test/CodeGen/X86/apx/nf-regressions.ll
new file mode 100644
index 0000000000000..846a2f91c584e
--- /dev/null
+++ b/llvm/test/CodeGen/X86/apx/nf-regressions.ll
@@ -0,0 +1,274 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=x86_64 -mattr=+nf -verify-machineinstrs | FileCheck %s
+
+define void @convertToThreeAddress(ptr %arg, ptr %arg1) {
+; CHECK-LABEL: convertToThreeAddress:
+; CHECK: # %bb.0: # %bb
+; CHECK-NEXT: movslq (%rdi), %rax
+; CHECK-NEXT: movslq (%rsi), %rcx
+; CHECK-NEXT: subq %rax, %rcx
+; CHECK-NEXT: leaq 1(%rcx), %rax
+; CHECK-NEXT: js .LBB0_3
+; CHECK-NEXT: .p2align 4
+; CHECK-NEXT: .LBB0_1: # %bb10
+; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: cmpq $1, %rax
+; CHECK-NEXT: jg .LBB0_1
+; CHECK-NEXT: # %bb.2: # %bb9
+; CHECK-NEXT: retq
+; CHECK-NEXT: .LBB0_3: # %bb16
+; CHECK-NEXT: xorl %edx, %edx
+; CHECK-NEXT: testb %dl, %dl
+; CHECK-NEXT: jne .LBB0_15
+; CHECK-NEXT: # %bb.4: # %bb16
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: testb %al, %al
+; CHECK-NEXT: jne .LBB0_21
+; CHECK-NEXT: # %bb.5: # %bb17
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: testb %al, %al
+; CHECK-NEXT: jne .LBB0_21
+; CHECK-NEXT: # %bb.6: # %bb18
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: testb %al, %al
+; CHECK-NEXT: jne .LBB0_11
+; CHECK-NEXT: # %bb.7: # %bb19
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: testb %al, %al
+; CHECK-NEXT: je .LBB0_8
+; CHECK-NEXT: .LBB0_10: # %bb24
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: testb %al, %al
+; CHECK-NEXT: je .LBB0_11
+; CHECK-NEXT: jmp .LBB0_12
+; CHECK-NEXT: .LBB0_15: # %bb27
+; CHECK-NEXT: xorl %edx, %edx
+; CHECK-NEXT: testb %dl, %dl
+; CHECK-NEXT: jne .LBB0_17
+; CHECK-NEXT: # %bb.16: # %bb28
+; CHECK-NEXT: xorl %edx, %edx
+; CHECK-NEXT: testb %dl, %dl
+; CHECK-NEXT: xorl %edx, %edx
+; CHECK-NEXT: testb %dl, %dl
+; CHECK-NEXT: .LBB0_17: # %bb37
+; CHECK-NEXT: xorl %edx, %edx
+; CHECK-NEXT: testb %dl, %dl
+; CHECK-NEXT: jne .LBB0_21
+; CHECK-NEXT: # %bb.18: # %bb38
+; CHECK-NEXT: testq %rcx, %rcx
+; CHECK-NEXT: js .LBB0_21
+; CHECK-NEXT: # %bb.19: # %bb40
+; CHECK-NEXT: xorl %ecx, %ecx
+; CHECK-NEXT: testb %cl, %cl
+; CHECK-NEXT: jne .LBB0_21
+; CHECK-NEXT: .p2align 4
+; CHECK-NEXT: .LBB0_20: # %bb41
+; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: cmpq $1, %rax
+; CHECK-NEXT: jg .LBB0_20
+; CHECK-NEXT: jmp .LBB0_21
+; CHECK-NEXT: .LBB0_8: # %bb20
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: testb %al, %al
+; CHECK-NEXT: jne .LBB0_12
+; CHECK-NEXT: # %bb.9: # %bb23
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: testb %al, %al
+; CHECK-NEXT: je .LBB0_10
+; CHECK-NEXT: .LBB0_11: # %bb61.preheader
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: testb %al, %al
+; CHECK-NEXT: jne .LBB0_21
+; CHECK-NEXT: .LBB0_12: # %bb53
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: testb %al, %al
+; CHECK-NEXT: jne .LBB0_21
+; CHECK-NEXT: # %bb.13: # %bb54
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: testb %al, %al
+; CHECK-NEXT: je .LBB0_14
+; CHECK-NEXT: .LBB0_21: # %bb63
+; CHECK-NEXT: retq
+; CHECK-NEXT: .LBB0_14: # %bb57
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: testb %al, %al
+; CHECK-NEXT: retq
+bb:
+ %i = load i32, ptr %arg, align 4
+ %i2 = sext i32 %i to i64
+ %i3 = load i32, ptr %arg1, align 4
+ %i4 = sext i32 %i3 to i64
+ %i5 = sub nsw i64 %i4, %i2
+ %i6 = add nsw i64 %i5, 1
+ %i7 = icmp sgt i64 %i5, -1
+ br i1 %i7, label %bb8, label %bb16
+
+bb8: ; preds = %bb
+ br label %bb10
+
+bb9: ; preds = %bb13
+ br label %bb15
+
+bb10: ; preds = %bb13, %bb8
+ %i11 = phi i64 [ %i6, %bb8 ], [ poison, %bb13 ]
+ br i1 poison, label %bb12, label %bb13
+
+bb12: ; preds = %bb10
+ br label %bb13
+
+bb13: ; preds = %bb12, %bb10
+ %i14 = icmp sgt i64 %i11, 1
+ br i1 %i14, label %bb10, label %bb9
+
+bb15: ; preds = %bb9
+ ret void
+
+bb16: ; preds = %bb
+ switch i32 poison, label %bb63 [
+ i32 1, label %bb27
+ i32 2, label %bb17
+ ]
+
+bb17: ; preds = %bb16
+ br i1 poison, label %bb18, label %bb63
+
+bb18: ; preds = %bb17
+ br i1 poison, label %bb61, label %bb19
+
+bb19: ; preds = %bb18
+ br i1 poison, label %bb24, label %bb20
+
+bb20: ; preds = %bb19
+ br label %bb21
+
+bb21: ; preds = %bb21, %bb20
+ br i1 poison, label %bb22, label %bb21
+
+bb22: ; preds = %bb21
+ br i1 poison, label %bb50, label %bb23
+
+bb23: ; preds = %bb22
+ br i1 poison, label %bb61, label %bb24
+
+bb24: ; preds = %bb23, %bb19
+ br label %bb25
+
+bb25: ; preds = %bb25, %bb24
+ br i1 poison, label %bb26, label %bb25
+
+bb26: ; preds = %bb25
+ br i1 poison, label %bb50, label %bb61
+
+bb27: ; preds = %bb16
+ br i1 poison, label %bb28, label %bb37
+
+bb28: ; preds = %bb27
+ br label %bb29
+
+bb29: ; preds = %bb32, %bb28
+ br i1 poison, label %bb32, label %bb30
+
+bb30: ; preds = %bb29
+ br label %bb31
+
+bb31: ; preds = %bb31, %bb30
+ br i1 poison, label %bb31, label %bb32
+
+bb32: ; preds = %bb31, %bb29
+ br i1 poison, label %bb29, label %bb33
+
+bb33: ; preds = %bb36, %bb32
+ br i1 poison, label %bb36, label %bb34
+
+bb34: ; preds = %bb33
+ br label %bb35
+
+bb35: ; preds = %bb35, %bb34
+ br i1 poison, label %bb35, label %bb36
+
+bb36: ; preds = %bb35, %bb33
+ br i1 poison, label %bb33, label %bb37
+
+bb37: ; preds = %bb36, %bb27
+ br i1 poison, label %bb38, label %bb63
+
+bb38: ; preds = %bb37
+ br label %bb39
+
+bb39: ; preds = %bb49, %bb38
+ br i1 %i7, label %bb40, label %bb49
+
+bb40: ; preds = %bb39
+ br i1 poison, label %bb41, label %bb49
+
+bb41: ; preds = %bb47, %bb40
+ %i42 = phi i64 [ poison, %bb47 ], [ %i6, %bb40 ]
+ br label %bb43
+
+bb43: ; preds = %bb46, %bb41
+ br i1 poison, label %bb45, label %bb44
+
+bb44: ; preds = %bb43
+ br label %bb46
+
+bb45: ; preds = %bb43
+ br label %bb46
+
+bb46: ; preds = %bb45, %bb44
+ br i1 poison, label %bb43, label %bb47
+
+bb47: ; preds = %bb46
+ %i48 = icmp sgt i64 %i42, 1
+ br i1 %i48, label %bb41, label %bb49
+
+bb49: ; preds = %bb47, %bb40, %bb39
+ br i1 poison, label %bb39, label %bb63
+
+bb50: ; preds = %bb26, %bb22
+ br label %bb53
+
+bb51: ; preds = %bb61
+ br i1 poison, label %bb52, label %bb53
+
+bb52: ; preds = %bb60, %bb57, %bb51
+ br label %bb62
+
+bb53: ; preds = %bb51, %bb50
+ br i1 poison, label %bb58, label %bb54
+
+bb54: ; preds = %bb53
+ br label %bb55
+
+bb55: ; preds = %bb55, %bb54
+ br i1 poison, label %bb56, label %bb55
+
+bb56: ; preds = %bb55
+ br i1 poison, label %bb63, label %bb57
+
+bb57: ; preds = %bb56
+ br i1 poison, label %bb52, label %bb58
+
+bb58: ; preds = %bb57, %bb53
+ br label %bb59
+
+bb59: ; preds = %bb59, %bb58
+ br i1 poison, label %bb60, label %bb59
+
+bb60: ; preds = %bb59
+ br i1 poison, label %bb63, label %bb52
+
+bb61: ; preds = %bb61, %bb26, %bb23, %bb18
+ br i1 poison, label %bb61, label %bb51
+
+bb62: ; preds = %bb62, %bb52
+ br i1 poison, label %bb62, label %bb63
+
+bb63: ; preds = %bb62, %bb60, %bb56, %bb49, %bb37, %bb17, %bb16
+ br i1 poison, label %bb65, label %bb64
+
+bb64: ; preds = %bb63
+ br label %bb65
+
+bb65: ; preds = %bb64, %bb63
+ ret void
+}
More information about the llvm-commits
mailing list