[llvm] [AArch64] Convert `CSEL(X, 1)` into `CSINC(X, XZR)` in early-ifcvt (PR #162993)
Csanád Hajdú via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 20 01:30:56 PDT 2025
https://github.com/Il-Capitano updated https://github.com/llvm/llvm-project/pull/162993
>From 4518b6908faf2d09c4799dd844fec109b96c2375 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Csan=C3=A1d=20Hajd=C3=BA?= <csanad.hajdu at arm.com>
Date: Fri, 10 Oct 2025 17:37:58 +0200
Subject: [PATCH 1/3] [AArch64] Convert `CSEL(X, 1)` into `CSINC(X, XZR)` in
early-ifcvt
Early if conversion can create instruction sequences such as
```
mov x1, #1
csel x0, x1, x2, eq
```
which could be simplified into the following instead
```
csinc x0, x2, xzr, ne
```
One notable example that generates code like this is `cmpxchg weak`.
This is fixed by handling an immediate value of 1 as `add(wzr, 1)` so
that the addition can be folded into CSEL by using CSINC instead.
---
llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 44 ++++++++--
.../test/CodeGen/AArch64/arm64-early-ifcvt.ll | 80 +++++++++++++++++++
llvm/test/CodeGen/AArch64/peephole-csel.ll | 5 +-
3 files changed, 120 insertions(+), 9 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 12c600f0f2661..1d10ae4628fc8 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -708,8 +708,32 @@ static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg));
const MachineInstr *DefMI = MRI.getVRegDef(VReg);
unsigned Opc = 0;
- unsigned SrcOpNum = 0;
+ unsigned SrcReg = 0;
switch (DefMI->getOpcode()) {
+ case AArch64::SUBREG_TO_REG:
+ // Check for the following way to define an 64-bit immediate:
+ // %0:gpr32 = MOVi32imm 1
+ // %1:gpr64 = SUBREG_TO_REG 0, %0:gpr32, %subreg.sub_32
+ if (!DefMI->getOperand(1).isImm() || DefMI->getOperand(1).getImm() != 0)
+ return 0;
+ if (!DefMI->getOperand(2).isReg())
+ return 0;
+ if (!DefMI->getOperand(3).isImm() ||
+ DefMI->getOperand(3).getImm() != AArch64::sub_32)
+ return 0;
+ DefMI = MRI.getVRegDef(DefMI->getOperand(2).getReg());
+ if (DefMI->getOpcode() != AArch64::MOVi32imm)
+ return 0;
+ // fall-through to MOVi32imm case.
+ [[fallthrough]];
+ case AArch64::MOVi32imm:
+ case AArch64::MOVi64imm:
+ if (!DefMI->getOperand(1).isImm() || DefMI->getOperand(1).getImm() != 1)
+ return 0;
+ SrcReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
+ Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
+ break;
+
case AArch64::ADDSXri:
case AArch64::ADDSWri:
// if NZCV is used, do not fold.
@@ -724,7 +748,7 @@ static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 ||
DefMI->getOperand(3).getImm() != 0)
return 0;
- SrcOpNum = 1;
+ SrcReg = DefMI->getOperand(1).getReg();
Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
break;
@@ -734,7 +758,7 @@ static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
return 0;
- SrcOpNum = 2;
+ SrcReg = DefMI->getOperand(2).getReg();
Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr;
break;
}
@@ -753,17 +777,17 @@ static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
return 0;
- SrcOpNum = 2;
+ SrcReg = DefMI->getOperand(2).getReg();
Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr;
break;
}
default:
return 0;
}
- assert(Opc && SrcOpNum && "Missing parameters");
+ assert(Opc && SrcReg && "Missing parameters");
if (NewVReg)
- *NewVReg = DefMI->getOperand(SrcOpNum).getReg();
+ *NewVReg = SrcReg;
return Opc;
}
@@ -976,6 +1000,14 @@ void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB,
// Fold the operation. Leave any dead instructions for DCE to clean up.
if (FoldedOpc) {
+ // NewVReg might be XZR/WZR. In that case create a COPY into a virtual
+ // register.
+ if (!Register::isVirtualRegister(NewVReg)) {
+ unsigned ZeroReg = NewVReg;
+ NewVReg = MRI.createVirtualRegister(RC);
+ BuildMI(MBB, I, DL, get(TargetOpcode::COPY), NewVReg).addReg(ZeroReg);
+ }
+
FalseReg = NewVReg;
Opc = FoldedOpc;
// The extends the live range of NewVReg.
diff --git a/llvm/test/CodeGen/AArch64/arm64-early-ifcvt.ll b/llvm/test/CodeGen/AArch64/arm64-early-ifcvt.ll
index 97a7741bcde75..849323f0fedf3 100644
--- a/llvm/test/CodeGen/AArch64/arm64-early-ifcvt.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-early-ifcvt.ll
@@ -421,3 +421,83 @@ for.body51: ; preds = %is_sbox.exit155
unreachable
}
declare fastcc void @get_switch_type(i32, i32, i16 signext, i16 signext, ptr nocapture) nounwind ssp
+
+; CHECK-LABEL: fold_imm1_csinc_32:
+; CHECK: cmp w0, w1
+; CHECK-NEXT: csinc w0, w2, wzr, ge
+; CHECK-NEXT: ret
+define i32 @fold_imm1_csinc_32(i32 %x, i32 %y, i32 %n) nounwind ssp {
+entry:
+ %cmp = icmp slt i32 %x, %y
+ br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+ br label %exit
+
+if.else:
+ br label %exit
+
+exit:
+ %result = phi i32 [ 1, %if.then ], [ %n, %if.else ]
+ ret i32 %result
+}
+
+; CHECK-LABEL: fold_imm1_csinc_64:
+; CHECK: cmp x0, x1
+; CHECK-NEXT: csinc x0, x2, xzr, ge
+; CHECK-NEXT: ret
+define i64 @fold_imm1_csinc_64(i64 %x, i64 %y, i64 %n) nounwind ssp {
+entry:
+ %cmp = icmp slt i64 %x, %y
+ br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+ br label %exit
+
+if.else:
+ br label %exit
+
+exit:
+ %result = phi i64 [ 1, %if.then ], [ %n, %if.else ]
+ ret i64 %result
+}
+
+; CHECK-LABEL: fold_imm1_cset_32:
+; CHECK: cmp w0, w1
+; CHECK-NEXT: cset w0, lt
+; CHECK-NEXT: ret
+define i32 @fold_imm1_cset_32(i32 %x, i32 %y) nounwind ssp {
+entry:
+ %cmp = icmp slt i32 %x, %y
+ br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+ br label %exit
+
+if.else:
+ br label %exit
+
+exit:
+ %result = phi i32 [ 1, %if.then ], [ 0, %if.else ]
+ ret i32 %result
+}
+
+; CHECK-LABEL: fold_imm1_cset_64:
+; CHECK: cmp x0, x1
+; CHECK-NEXT: cset x0, lt
+; CHECK-NEXT: ret
+define i64 @fold_imm1_cset_64(i64 %x, i64 %y) nounwind ssp {
+entry:
+ %cmp = icmp slt i64 %x, %y
+ br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+ br label %exit
+
+if.else:
+ br label %exit
+
+exit:
+ %result = phi i64 [ 1, %if.then ], [ 0, %if.else ]
+ ret i64 %result
+}
diff --git a/llvm/test/CodeGen/AArch64/peephole-csel.ll b/llvm/test/CodeGen/AArch64/peephole-csel.ll
index 868b9f1f2f6ac..b085258059b7e 100644
--- a/llvm/test/CodeGen/AArch64/peephole-csel.ll
+++ b/llvm/test/CodeGen/AArch64/peephole-csel.ll
@@ -5,10 +5,9 @@ define void @peephole_csel(ptr %dst, i1 %0, i1 %cmp) {
; CHECK-LABEL: peephole_csel:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: tst w2, #0x1
-; CHECK-NEXT: mov w8, #1 // =0x1
-; CHECK-NEXT: mov x9, xzr
+; CHECK-NEXT: mov x8, xzr
; CHECK-NEXT: tst w1, #0x1
-; CHECK-NEXT: csel x8, x8, x9, eq
+; CHECK-NEXT: csinc x8, x8, xzr, ne
; CHECK-NEXT: str x8, [x0]
; CHECK-NEXT: ret
entry:
>From 311dcc91394db153909d934f5fc834d4ef6755d6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Csan=C3=A1d=20Hajd=C3=BA?= <csanad.hajdu at arm.com>
Date: Wed, 15 Oct 2025 21:50:13 +0200
Subject: [PATCH 2/3] Address review feedback
* Remove fallthrough
* Rename NewVReg -> NewReg
---
llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 33 +++++++++++---------
1 file changed, 19 insertions(+), 14 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 1d10ae4628fc8..777df137b9d6a 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -700,7 +700,7 @@ static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) {
// csel instruction. If so, return the folded opcode, and the replacement
// register.
static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
- unsigned *NewVReg = nullptr) {
+ unsigned *NewReg = nullptr) {
VReg = removeCopies(MRI, VReg);
if (!Register::isVirtualRegister(VReg))
return 0;
@@ -724,8 +724,13 @@ static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
DefMI = MRI.getVRegDef(DefMI->getOperand(2).getReg());
if (DefMI->getOpcode() != AArch64::MOVi32imm)
return 0;
- // fall-through to MOVi32imm case.
- [[fallthrough]];
+ if (!DefMI->getOperand(1).isImm() || DefMI->getOperand(1).getImm() != 1)
+ return 0;
+ assert(Is64Bit);
+ SrcReg = AArch64::XZR;
+ Opc = AArch64::CSINCXr;
+ break;
+
case AArch64::MOVi32imm:
case AArch64::MOVi64imm:
if (!DefMI->getOperand(1).isImm() || DefMI->getOperand(1).getImm() != 1)
@@ -786,8 +791,8 @@ static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
}
assert(Opc && SrcReg && "Missing parameters");
- if (NewVReg)
- *NewVReg = SrcReg;
+ if (NewReg)
+ *NewReg = SrcReg;
return Opc;
}
@@ -988,30 +993,30 @@ void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB,
// Try folding simple instructions into the csel.
if (TryFold) {
- unsigned NewVReg = 0;
- unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewVReg);
+ unsigned NewReg = 0;
+ unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewReg);
if (FoldedOpc) {
// The folded opcodes csinc, csinc and csneg apply the operation to
// FalseReg, so we need to invert the condition.
CC = AArch64CC::getInvertedCondCode(CC);
TrueReg = FalseReg;
} else
- FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewVReg);
+ FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewReg);
// Fold the operation. Leave any dead instructions for DCE to clean up.
if (FoldedOpc) {
// NewVReg might be XZR/WZR. In that case create a COPY into a virtual
// register.
- if (!Register::isVirtualRegister(NewVReg)) {
- unsigned ZeroReg = NewVReg;
- NewVReg = MRI.createVirtualRegister(RC);
- BuildMI(MBB, I, DL, get(TargetOpcode::COPY), NewVReg).addReg(ZeroReg);
+ if (!Register::isVirtualRegister(NewReg)) {
+ unsigned ZeroReg = NewReg;
+ NewReg = MRI.createVirtualRegister(RC);
+ BuildMI(MBB, I, DL, get(TargetOpcode::COPY), NewReg).addReg(ZeroReg);
}
- FalseReg = NewVReg;
+ FalseReg = NewReg;
Opc = FoldedOpc;
// The extends the live range of NewVReg.
- MRI.clearKillFlags(NewVReg);
+ MRI.clearKillFlags(NewReg);
}
}
>From 51f0dc6d27586efc55a2ab907c9eb7c9e06ab2f7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Csan=C3=A1d=20Hajd=C3=BA?= <csanad.hajdu at arm.com>
Date: Fri, 17 Oct 2025 15:35:03 +0200
Subject: [PATCH 3/3] Address review feedback
* NewVReg -> NewReg in comments.
* Use WZR/XZR directly in the folded instruction.
---
llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 18 ++++++++----------
1 file changed, 8 insertions(+), 10 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 777df137b9d6a..5d13691a328b9 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -1005,24 +1005,22 @@ void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB,
// Fold the operation. Leave any dead instructions for DCE to clean up.
if (FoldedOpc) {
- // NewVReg might be XZR/WZR. In that case create a COPY into a virtual
- // register.
- if (!Register::isVirtualRegister(NewReg)) {
- unsigned ZeroReg = NewReg;
- NewReg = MRI.createVirtualRegister(RC);
- BuildMI(MBB, I, DL, get(TargetOpcode::COPY), NewReg).addReg(ZeroReg);
- }
-
FalseReg = NewReg;
Opc = FoldedOpc;
- // The extends the live range of NewVReg.
+ // Extend the live range of NewReg.
MRI.clearKillFlags(NewReg);
}
}
// Pull all virtual register into the appropriate class.
MRI.constrainRegClass(TrueReg, RC);
- MRI.constrainRegClass(FalseReg, RC);
+ // FalseReg might be WZR or XZR if the folded operand is a literal 1.
+ assert(
+ (FalseReg.isVirtual() || FalseReg == AArch64::WZR ||
+ FalseReg == AArch64::XZR) &&
+ "FalseReg was folded into a non-virtual register other than WZR or XZR");
+ if (FalseReg.isVirtual())
+ MRI.constrainRegClass(FalseReg, RC);
// Insert the csel.
BuildMI(MBB, I, DL, get(Opc), DstReg)
More information about the llvm-commits
mailing list