[llvm] [ARM] Copy SMAX(lhs, 0) and SMIN(lhs, 0) patterns from AArch64 to ARM (PR #146565)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 1 10:29:34 PDT 2025
https://github.com/AZero13 updated https://github.com/llvm/llvm-project/pull/146565
>From 32bd2dafea7f8ab215c1823bc8b270b681c9f0fb Mon Sep 17 00:00:00 2001
From: Rose <gfunni234 at gmail.com>
Date: Tue, 1 Jul 2025 12:11:39 -0400
Subject: [PATCH 1/2] Pre-commit test (NFC)
---
llvm/test/CodeGen/ARM/min-max-combine.ll | 168 +++++++++++++++++++++++
1 file changed, 168 insertions(+)
create mode 100644 llvm/test/CodeGen/ARM/min-max-combine.ll
diff --git a/llvm/test/CodeGen/ARM/min-max-combine.ll b/llvm/test/CodeGen/ARM/min-max-combine.ll
new file mode 100644
index 0000000000000..a171d06eec32d
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/min-max-combine.ll
@@ -0,0 +1,168 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=armv7a < %s | FileCheck %s --check-prefix=ARM
+; RUN: llc -mtriple=armv6m < %s | FileCheck %s --check-prefix=THUMB
+; RUN: llc -mtriple=armv7m < %s | FileCheck %s --check-prefix=THUMB2
+
+declare i8 @llvm.smax.i8(i8 %a, i8 %b) readnone
+
+define i8 @smaxi8_zero(i8 %a) {
+; ARM-LABEL: smaxi8_zero:
+; ARM: @ %bb.0:
+; ARM-NEXT: sxtb r0, r0
+; ARM-NEXT: bic r0, r0, r0, asr #31
+; ARM-NEXT: bx lr
+;
+; THUMB-LABEL: smaxi8_zero:
+; THUMB: @ %bb.0:
+; THUMB-NEXT: sxtb r0, r0
+; THUMB-NEXT: asrs r1, r0, #31
+; THUMB-NEXT: bics r0, r1
+; THUMB-NEXT: bx lr
+;
+; THUMB2-LABEL: smaxi8_zero:
+; THUMB2: @ %bb.0:
+; THUMB2-NEXT: sxtb r0, r0
+; THUMB2-NEXT: bic.w r0, r0, r0, asr #31
+; THUMB2-NEXT: bx lr
+ %c = call i8 @llvm.smax.i8(i8 %a, i8 0)
+ ret i8 %c
+}
+
+declare i16 @llvm.smax.i16(i16 %a, i16 %b) readnone
+
+define i16 @smaxi16_zero(i16 %a) {
+; ARM-LABEL: smaxi16_zero:
+; ARM: @ %bb.0:
+; ARM-NEXT: sxth r0, r0
+; ARM-NEXT: bic r0, r0, r0, asr #31
+; ARM-NEXT: bx lr
+;
+; THUMB-LABEL: smaxi16_zero:
+; THUMB: @ %bb.0:
+; THUMB-NEXT: sxth r0, r0
+; THUMB-NEXT: asrs r1, r0, #31
+; THUMB-NEXT: bics r0, r1
+; THUMB-NEXT: bx lr
+;
+; THUMB2-LABEL: smaxi16_zero:
+; THUMB2: @ %bb.0:
+; THUMB2-NEXT: sxth r0, r0
+; THUMB2-NEXT: bic.w r0, r0, r0, asr #31
+; THUMB2-NEXT: bx lr
+ %c = call i16 @llvm.smax.i16(i16 %a, i16 0)
+ ret i16 %c
+}
+
+declare i32 @llvm.smax.i32(i32 %a, i32 %b) readnone
+
+define i32 @smaxi32_zero(i32 %a) {
+; ARM-LABEL: smaxi32_zero:
+; ARM: @ %bb.0:
+; ARM-NEXT: bic r0, r0, r0, asr #31
+; ARM-NEXT: bx lr
+;
+; THUMB-LABEL: smaxi32_zero:
+; THUMB: @ %bb.0:
+; THUMB-NEXT: asrs r1, r0, #31
+; THUMB-NEXT: bics r0, r1
+; THUMB-NEXT: bx lr
+;
+; THUMB2-LABEL: smaxi32_zero:
+; THUMB2: @ %bb.0:
+; THUMB2-NEXT: bic.w r0, r0, r0, asr #31
+; THUMB2-NEXT: bx lr
+ %c = call i32 @llvm.smax.i32(i32 %a, i32 0)
+ ret i32 %c
+}
+
+; SMIN
+
+declare i8 @llvm.smin.i8(i8 %a, i8 %b) readnone
+
+define i8 @smini8_zero(i8 %a) {
+; ARM-LABEL: smini8_zero:
+; ARM: @ %bb.0:
+; ARM-NEXT: sxtb r0, r0
+; ARM-NEXT: cmp r0, #0
+; ARM-NEXT: movpl r0, #0
+; ARM-NEXT: bx lr
+;
+; THUMB-LABEL: smini8_zero:
+; THUMB: @ %bb.0:
+; THUMB-NEXT: sxtb r0, r0
+; THUMB-NEXT: cmp r0, #0
+; THUMB-NEXT: bmi .LBB3_2
+; THUMB-NEXT: @ %bb.1:
+; THUMB-NEXT: movs r0, #0
+; THUMB-NEXT: .LBB3_2:
+; THUMB-NEXT: bx lr
+;
+; THUMB2-LABEL: smini8_zero:
+; THUMB2: @ %bb.0:
+; THUMB2-NEXT: sxtb r0, r0
+; THUMB2-NEXT: cmp r0, #0
+; THUMB2-NEXT: it pl
+; THUMB2-NEXT: movpl r0, #0
+; THUMB2-NEXT: bx lr
+ %c = call i8 @llvm.smin.i8(i8 %a, i8 0)
+ ret i8 %c
+}
+
+declare i16 @llvm.smin.i16(i16 %a, i16 %b) readnone
+
+define i16 @smini16_zero(i16 %a) {
+; ARM-LABEL: smini16_zero:
+; ARM: @ %bb.0:
+; ARM-NEXT: sxth r0, r0
+; ARM-NEXT: cmp r0, #0
+; ARM-NEXT: movpl r0, #0
+; ARM-NEXT: bx lr
+;
+; THUMB-LABEL: smini16_zero:
+; THUMB: @ %bb.0:
+; THUMB-NEXT: sxth r0, r0
+; THUMB-NEXT: cmp r0, #0
+; THUMB-NEXT: bmi .LBB4_2
+; THUMB-NEXT: @ %bb.1:
+; THUMB-NEXT: movs r0, #0
+; THUMB-NEXT: .LBB4_2:
+; THUMB-NEXT: bx lr
+;
+; THUMB2-LABEL: smini16_zero:
+; THUMB2: @ %bb.0:
+; THUMB2-NEXT: sxth r0, r0
+; THUMB2-NEXT: cmp r0, #0
+; THUMB2-NEXT: it pl
+; THUMB2-NEXT: movpl r0, #0
+; THUMB2-NEXT: bx lr
+ %c = call i16 @llvm.smin.i16(i16 %a, i16 0)
+ ret i16 %c
+}
+
+declare i32 @llvm.smin.i32(i32 %a, i32 %b) readnone
+
+define i32 @smini32_zero(i32 %a) {
+; ARM-LABEL: smini32_zero:
+; ARM: @ %bb.0:
+; ARM-NEXT: cmp r0, #0
+; ARM-NEXT: movpl r0, #0
+; ARM-NEXT: bx lr
+;
+; THUMB-LABEL: smini32_zero:
+; THUMB: @ %bb.0:
+; THUMB-NEXT: cmp r0, #0
+; THUMB-NEXT: bmi .LBB5_2
+; THUMB-NEXT: @ %bb.1:
+; THUMB-NEXT: movs r0, #0
+; THUMB-NEXT: .LBB5_2:
+; THUMB-NEXT: bx lr
+;
+; THUMB2-LABEL: smini32_zero:
+; THUMB2: @ %bb.0:
+; THUMB2-NEXT: cmp r0, #0
+; THUMB2-NEXT: it pl
+; THUMB2-NEXT: movpl r0, #0
+; THUMB2-NEXT: bx lr
+ %c = call i32 @llvm.smin.i32(i32 %a, i32 0)
+ ret i32 %c
+}
>From 4f863595bbebd2797b57af7f8f5d0a218868c2a7 Mon Sep 17 00:00:00 2001
From: Rose <gfunni234 at gmail.com>
Date: Tue, 1 Jul 2025 12:18:35 -0400
Subject: [PATCH 2/2] [ARM] Copy SMAX(lhs, 0) and SMIN(lhs, 0) patterns from
AArch64 to ARM
They work on ARM too.
---
llvm/lib/Target/ARM/ARMISelLowering.cpp | 20 ++++++++
llvm/test/CodeGen/ARM/min-max-combine.ll | 46 ++++++-------------
.../predicated-liveout-unknown-lanes.ll | 3 +-
.../Thumb2/LowOverheadLoops/unpredload.ll | 1 -
4 files changed, 35 insertions(+), 35 deletions(-)
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 2f89e23993385..c520bfa5d2468 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -5510,6 +5510,26 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
SDValue FalseVal = Op.getOperand(3);
ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FalseVal);
ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TrueVal);
+ ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS);
+ if (Op.getValueType().isInteger()) {
+ // Check for SMAX(lhs, 0) and SMIN(lhs, 0) patterns.
+ // (SELECT_CC setgt, lhs, 0, lhs, 0) -> (BIC lhs, (SRA lhs, typesize-1))
+ // (SELECT_CC setlt, lhs, 0, lhs, 0) -> (AND lhs, (SRA lhs, typesize-1))
+ // Both require less instructions than compare and conditional select.
+ if ((CC == ISD::SETGT || CC == ISD::SETLT) && LHS == TrueVal && RHSC &&
+ RHSC->isZero() && CFVal && CFVal->isZero() &&
+ LHS.getValueType() == RHS.getValueType()) {
+ EVT VT = LHS.getValueType();
+ SDValue Shift =
+ DAG.getNode(ISD::SRA, dl, VT, LHS,
+ DAG.getConstant(VT.getSizeInBits() - 1, dl, VT));
+
+ if (CC == ISD::SETGT)
+ Shift = DAG.getNOT(dl, Shift, VT);
+
+ return DAG.getNode(ISD::AND, dl, VT, LHS, Shift);
+ }
+ }
if (Subtarget->hasV8_1MMainlineOps() && CFVal && CTVal &&
LHS.getValueType() == MVT::i32 && RHS.getValueType() == MVT::i32) {
diff --git a/llvm/test/CodeGen/ARM/min-max-combine.ll b/llvm/test/CodeGen/ARM/min-max-combine.ll
index a171d06eec32d..b9a7690009337 100644
--- a/llvm/test/CodeGen/ARM/min-max-combine.ll
+++ b/llvm/test/CodeGen/ARM/min-max-combine.ll
@@ -83,26 +83,20 @@ define i8 @smini8_zero(i8 %a) {
; ARM-LABEL: smini8_zero:
; ARM: @ %bb.0:
; ARM-NEXT: sxtb r0, r0
-; ARM-NEXT: cmp r0, #0
-; ARM-NEXT: movpl r0, #0
+; ARM-NEXT: and r0, r0, r0, asr #31
; ARM-NEXT: bx lr
;
; THUMB-LABEL: smini8_zero:
; THUMB: @ %bb.0:
-; THUMB-NEXT: sxtb r0, r0
-; THUMB-NEXT: cmp r0, #0
-; THUMB-NEXT: bmi .LBB3_2
-; THUMB-NEXT: @ %bb.1:
-; THUMB-NEXT: movs r0, #0
-; THUMB-NEXT: .LBB3_2:
+; THUMB-NEXT: sxtb r1, r0
+; THUMB-NEXT: asrs r0, r1, #31
+; THUMB-NEXT: ands r0, r1
; THUMB-NEXT: bx lr
;
; THUMB2-LABEL: smini8_zero:
; THUMB2: @ %bb.0:
; THUMB2-NEXT: sxtb r0, r0
-; THUMB2-NEXT: cmp r0, #0
-; THUMB2-NEXT: it pl
-; THUMB2-NEXT: movpl r0, #0
+; THUMB2-NEXT: and.w r0, r0, r0, asr #31
; THUMB2-NEXT: bx lr
%c = call i8 @llvm.smin.i8(i8 %a, i8 0)
ret i8 %c
@@ -114,26 +108,20 @@ define i16 @smini16_zero(i16 %a) {
; ARM-LABEL: smini16_zero:
; ARM: @ %bb.0:
; ARM-NEXT: sxth r0, r0
-; ARM-NEXT: cmp r0, #0
-; ARM-NEXT: movpl r0, #0
+; ARM-NEXT: and r0, r0, r0, asr #31
; ARM-NEXT: bx lr
;
; THUMB-LABEL: smini16_zero:
; THUMB: @ %bb.0:
-; THUMB-NEXT: sxth r0, r0
-; THUMB-NEXT: cmp r0, #0
-; THUMB-NEXT: bmi .LBB4_2
-; THUMB-NEXT: @ %bb.1:
-; THUMB-NEXT: movs r0, #0
-; THUMB-NEXT: .LBB4_2:
+; THUMB-NEXT: sxth r1, r0
+; THUMB-NEXT: asrs r0, r1, #31
+; THUMB-NEXT: ands r0, r1
; THUMB-NEXT: bx lr
;
; THUMB2-LABEL: smini16_zero:
; THUMB2: @ %bb.0:
; THUMB2-NEXT: sxth r0, r0
-; THUMB2-NEXT: cmp r0, #0
-; THUMB2-NEXT: it pl
-; THUMB2-NEXT: movpl r0, #0
+; THUMB2-NEXT: and.w r0, r0, r0, asr #31
; THUMB2-NEXT: bx lr
%c = call i16 @llvm.smin.i16(i16 %a, i16 0)
ret i16 %c
@@ -144,24 +132,18 @@ declare i32 @llvm.smin.i32(i32 %a, i32 %b) readnone
define i32 @smini32_zero(i32 %a) {
; ARM-LABEL: smini32_zero:
; ARM: @ %bb.0:
-; ARM-NEXT: cmp r0, #0
-; ARM-NEXT: movpl r0, #0
+; ARM-NEXT: and r0, r0, r0, asr #31
; ARM-NEXT: bx lr
;
; THUMB-LABEL: smini32_zero:
; THUMB: @ %bb.0:
-; THUMB-NEXT: cmp r0, #0
-; THUMB-NEXT: bmi .LBB5_2
-; THUMB-NEXT: @ %bb.1:
-; THUMB-NEXT: movs r0, #0
-; THUMB-NEXT: .LBB5_2:
+; THUMB-NEXT: asrs r1, r0, #31
+; THUMB-NEXT: ands r0, r1
; THUMB-NEXT: bx lr
;
; THUMB2-LABEL: smini32_zero:
; THUMB2: @ %bb.0:
-; THUMB2-NEXT: cmp r0, #0
-; THUMB2-NEXT: it pl
-; THUMB2-NEXT: movpl r0, #0
+; THUMB2-NEXT: and.w r0, r0, r0, asr #31
; THUMB2-NEXT: bx lr
%c = call i32 @llvm.smin.i32(i32 %a, i32 0)
ret i32 %c
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/predicated-liveout-unknown-lanes.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/predicated-liveout-unknown-lanes.ll
index 9194d7842a6d3..9772c8311bfbc 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/predicated-liveout-unknown-lanes.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/predicated-liveout-unknown-lanes.ll
@@ -6,8 +6,7 @@ define arm_aapcs_vfpcc <4 x float> @arm_max_no_idx_f32_mve(ptr %pSrc, i32 %block
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
-; CHECK-NEXT: subs r2, r1, #4
-; CHECK-NEXT: movw r3, #0
+; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: movt r3, #65408
; CHECK-NEXT: vdup.32 q0, r3
; CHECK-NEXT: dlstp.32 lr, r1
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unpredload.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unpredload.ll
index 6b5b6b2b1b677..573a9420b5278 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unpredload.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unpredload.ll
@@ -5,7 +5,6 @@ define void @arm_cmplx_mag_squared_q15_mve(ptr %pSrc, ptr %pDst, i32 %blockSize)
; CHECK-LABEL: arm_cmplx_mag_squared_q15_mve:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: push {r7, lr}
-; CHECK-NEXT: subs.w r3, r2, #8
; CHECK-NEXT: dlstp.16 lr, r2
; CHECK-NEXT: .LBB0_1: @ %do.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
More information about the llvm-commits
mailing list