[llvm] [LLVM][AArch64] Optimize sign bit tests with TST instruction for SIGN_EXTEND patterns (PR #158061)
guan jian via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 15 08:48:09 PDT 2025
https://github.com/rez5427 updated https://github.com/llvm/llvm-project/pull/158061
>From f4fb9b8b6d7c3a5bc44e86516dffcaf3be4efc38 Mon Sep 17 00:00:00 2001
From: rez5427 <guanjian at stu.cdut.edu.cn>
Date: Tue, 9 Sep 2025 19:47:09 +0800
Subject: [PATCH 1/3] Add select setlt extend to tst
---
.../Target/AArch64/AArch64ISelLowering.cpp | 42 +++++++++++++++
.../check-sign-bit-before-extension.ll | 10 ++--
llvm/test/CodeGen/AArch64/icmp.ll | 51 +++++++++++++++++++
llvm/test/CodeGen/AArch64/vecreduce-bool.ll | 24 ++++-----
4 files changed, 108 insertions(+), 19 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index e788bee6be322..2510f97d7d846 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -11630,6 +11630,48 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(
return DAG.getNode(ISD::AND, DL, VT, LHS, Shift);
}
+ // Check for sign bit test patterns that can use TST optimization.
+ // (SELECT_CC setlt, singn_extend_inreg, 0, tval, fval)
+ // -> TST %operand, sign_bit; CSEL
+ // (SELECT_CC setlt, singn_extend, 0, tval, fval)
+ // -> TST %operand, sign_bit; CSEL
+ if (CC == ISD::SETLT && RHSC && RHSC->isZero() && LHS.hasOneUse() &&
+ (LHS.getOpcode() == ISD::SIGN_EXTEND_INREG ||
+ LHS.getOpcode() == ISD::SIGN_EXTEND)) {
+
+ SDValue OriginalVal = LHS.getOperand(0);
+ EVT OriginalVT = LHS.getOpcode() == ISD::SIGN_EXTEND_INREG
+ ? cast<VTSDNode>(LHS.getOperand(1))->getVT()
+ : OriginalVal.getValueType();
+
+ // Apply TST optimization for integer types
+ if (OriginalVT.isInteger()) {
+ // Calculate the sign bit for the original type
+ unsigned BitWidth = OriginalVT.getSizeInBits();
+ APInt SignBit = APInt::getSignedMinValue(BitWidth);
+ EVT TestVT = (BitWidth <= 32) ? MVT::i32 : MVT::i64;
+ unsigned TestBitWidth = TestVT.getSizeInBits();
+ if (BitWidth < TestBitWidth) {
+ SignBit = SignBit.zext(TestBitWidth);
+ }
+
+ SDValue SignBitConst = DAG.getConstant(SignBit, DL, TestVT);
+ SDValue TestOperand = OriginalVal;
+ if (OriginalVal.getValueType() != TestVT) {
+ TestOperand = DAG.getNode(ISD::ZERO_EXTEND, DL, TestVT, OriginalVal);
+ }
+
+ SDValue TST =
+ DAG.getNode(AArch64ISD::ANDS, DL, DAG.getVTList(TestVT, MVT::i32),
+ TestOperand, SignBitConst);
+
+ SDValue Flags = TST.getValue(1);
+ return DAG.getNode(AArch64ISD::CSEL, DL, TVal.getValueType(), TVal,
+ FVal, DAG.getConstant(AArch64CC::MI, DL, MVT::i32),
+ Flags);
+ }
+ }
+
// Canonicalise absolute difference patterns:
// select_cc lhs, rhs, sub(lhs, rhs), sub(rhs, lhs), cc ->
// select_cc lhs, rhs, sub(lhs, rhs), neg(sub(lhs, rhs)), cc
diff --git a/llvm/test/CodeGen/AArch64/check-sign-bit-before-extension.ll b/llvm/test/CodeGen/AArch64/check-sign-bit-before-extension.ll
index 0960c4c2a3342..b81a141b63c3a 100644
--- a/llvm/test/CodeGen/AArch64/check-sign-bit-before-extension.ll
+++ b/llvm/test/CodeGen/AArch64/check-sign-bit-before-extension.ll
@@ -78,8 +78,7 @@ B:
define i32 @g_i8_sign_extend_inreg(i8 %in, i32 %a, i32 %b) nounwind {
; CHECK-LABEL: g_i8_sign_extend_inreg:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: sxtb w8, w0
-; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: tst w0, #0x80
; CHECK-NEXT: csel w8, w1, w2, mi
; CHECK-NEXT: add w0, w8, w0, uxtb
; CHECK-NEXT: ret
@@ -100,8 +99,7 @@ B:
define i32 @g_i16_sign_extend_inreg(i16 %in, i32 %a, i32 %b) nounwind {
; CHECK-LABEL: g_i16_sign_extend_inreg:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: sxth w8, w0
-; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: tst w0, #0x8000
; CHECK-NEXT: csel w8, w1, w2, mi
; CHECK-NEXT: add w0, w8, w0, uxth
; CHECK-NEXT: ret
@@ -167,9 +165,7 @@ B:
define i64 @g_i32_sign_extend_i64(i32 %in, i64 %a, i64 %b) nounwind {
; CHECK-LABEL: g_i32_sign_extend_i64:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
-; CHECK-NEXT: sxtw x8, w0
-; CHECK-NEXT: cmp x8, #0
+; CHECK-NEXT: tst w0, #0x80000000
; CHECK-NEXT: csel x8, x1, x2, mi
; CHECK-NEXT: add x0, x8, w0, uxtw
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/icmp.ll b/llvm/test/CodeGen/AArch64/icmp.ll
index 18665bcbeae83..6e9d13135410c 100644
--- a/llvm/test/CodeGen/AArch64/icmp.ll
+++ b/llvm/test/CodeGen/AArch64/icmp.ll
@@ -2093,3 +2093,54 @@ define <2 x i1> @icmp_slt_v2i64_Zero_LHS(<2 x i64> %a) {
%c = icmp slt <2 x i64> <i64 0, i64 0>, %a
ret <2 x i1> %c
}
+
+; Test TST optimization for i8 sign bit testing with cross-type select
+; This tests the pattern: icmp slt i8 %val, 0; select i1 %cmp, i32 %a, i32 %b
+; The optimization should convert sxtb+cmp to tst for sign bit testing.
+
+define i32 @i8_signbit_tst_constants(i8 %x, i8 %y) {
+; CHECK-SD-LABEL: i8_signbit_tst_constants:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: add w9, w0, w1
+; CHECK-SD-NEXT: mov w8, #42 // =0x2a
+; CHECK-SD-NEXT: tst w9, #0x80
+; CHECK-SD-NEXT: mov w9, #20894 // =0x519e
+; CHECK-SD-NEXT: csel w0, w9, w8, mi
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: i8_signbit_tst_constants:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: add w8, w0, w1
+; CHECK-GI-NEXT: mov w9, #42 // =0x2a
+; CHECK-GI-NEXT: mov w10, #20894 // =0x519e
+; CHECK-GI-NEXT: sxtb w8, w8
+; CHECK-GI-NEXT: cmp w8, #0
+; CHECK-GI-NEXT: csel w0, w10, w9, mi
+; CHECK-GI-NEXT: ret
+ %add = add i8 %x, %y
+ %cmp = icmp slt i8 %add, 0
+ %sel = select i1 %cmp, i32 20894, i32 42
+ ret i32 %sel
+}
+
+; Test i8 sign bit testing with variable select values (problematic case)
+define i32 @i8_signbit_variables(i8 %x, i8 %y, i32 %a, i32 %b) {
+; CHECK-SD-LABEL: i8_signbit_variables:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: add w8, w0, w1
+; CHECK-SD-NEXT: tst w8, #0x80
+; CHECK-SD-NEXT: csel w0, w2, w3, mi
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: i8_signbit_variables:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: add w8, w0, w1
+; CHECK-GI-NEXT: sxtb w8, w8
+; CHECK-GI-NEXT: cmp w8, #0
+; CHECK-GI-NEXT: csel w0, w2, w3, mi
+; CHECK-GI-NEXT: ret
+ %add = add i8 %x, %y
+ %cmp = icmp slt i8 %add, 0
+ %sel = select i1 %cmp, i32 %a, i32 %b
+ ret i32 %sel
+}
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-bool.ll b/llvm/test/CodeGen/AArch64/vecreduce-bool.ll
index 62d41fca10db3..198428e26825f 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-bool.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-bool.ll
@@ -26,8 +26,8 @@ define i32 @reduce_and_v1i8(<1 x i8> %a0, i32 %a1, i32 %a2) nounwind {
; CHECK-LABEL: reduce_and_v1i8:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: smov w8, v0.b[0]
-; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: umov w8, v0.b[0]
+; CHECK-NEXT: tst w8, #0x80
; CHECK-NEXT: csel w0, w0, w1, mi
; CHECK-NEXT: ret
%x = icmp slt <1 x i8> %a0, zeroinitializer
@@ -120,8 +120,8 @@ define i32 @reduce_and_v1i16(<1 x i16> %a0, i32 %a1, i32 %a2) nounwind {
; CHECK-LABEL: reduce_and_v1i16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: smov w8, v0.h[0]
-; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: umov w8, v0.h[0]
+; CHECK-NEXT: tst w8, #0x8000
; CHECK-NEXT: csel w0, w0, w1, mi
; CHECK-NEXT: ret
%x = icmp slt <1 x i16> %a0, zeroinitializer
@@ -305,8 +305,8 @@ define i32 @reduce_or_v1i8(<1 x i8> %a0, i32 %a1, i32 %a2) nounwind {
; CHECK-LABEL: reduce_or_v1i8:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: smov w8, v0.b[0]
-; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: umov w8, v0.b[0]
+; CHECK-NEXT: tst w8, #0x80
; CHECK-NEXT: csel w0, w0, w1, mi
; CHECK-NEXT: ret
%x = icmp slt <1 x i8> %a0, zeroinitializer
@@ -399,8 +399,8 @@ define i32 @reduce_or_v1i16(<1 x i16> %a0, i32 %a1, i32 %a2) nounwind {
; CHECK-LABEL: reduce_or_v1i16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: smov w8, v0.h[0]
-; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: umov w8, v0.h[0]
+; CHECK-NEXT: tst w8, #0x8000
; CHECK-NEXT: csel w0, w0, w1, mi
; CHECK-NEXT: ret
%x = icmp slt <1 x i16> %a0, zeroinitializer
@@ -584,8 +584,8 @@ define i32 @reduce_xor_v1i8(<1 x i8> %a0, i32 %a1, i32 %a2) nounwind {
; CHECK-LABEL: reduce_xor_v1i8:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: smov w8, v0.b[0]
-; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: umov w8, v0.b[0]
+; CHECK-NEXT: tst w8, #0x80
; CHECK-NEXT: csel w0, w0, w1, mi
; CHECK-NEXT: ret
%x = icmp slt <1 x i8> %a0, zeroinitializer
@@ -679,8 +679,8 @@ define i32 @reduce_xor_v1i16(<1 x i16> %a0, i32 %a1, i32 %a2) nounwind {
; CHECK-LABEL: reduce_xor_v1i16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: smov w8, v0.h[0]
-; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: umov w8, v0.h[0]
+; CHECK-NEXT: tst w8, #0x8000
; CHECK-NEXT: csel w0, w0, w1, mi
; CHECK-NEXT: ret
%x = icmp slt <1 x i16> %a0, zeroinitializer
>From d52b40c7a04a3db7a467cef5ec4595752791567f Mon Sep 17 00:00:00 2001
From: rez5427 <guanjian at stu.cdut.edu.cn>
Date: Sat, 13 Sep 2025 14:45:21 +0800
Subject: [PATCH 2/3] Use flag NE instead of MI
---
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 6 +++---
.../AArch64/check-sign-bit-before-extension.ll | 6 +++---
llvm/test/CodeGen/AArch64/icmp.ll | 4 ++--
llvm/test/CodeGen/AArch64/vecreduce-bool.ll | 12 ++++++------
4 files changed, 14 insertions(+), 14 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 2510f97d7d846..b0cc6fc916a8c 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -11631,9 +11631,9 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(
}
// Check for sign bit test patterns that can use TST optimization.
- // (SELECT_CC setlt, singn_extend_inreg, 0, tval, fval)
+ // (SELECT_CC setlt, sign_extend_inreg, 0, tval, fval)
// -> TST %operand, sign_bit; CSEL
- // (SELECT_CC setlt, singn_extend, 0, tval, fval)
+ // (SELECT_CC setlt, sign_extend, 0, tval, fval)
// -> TST %operand, sign_bit; CSEL
if (CC == ISD::SETLT && RHSC && RHSC->isZero() && LHS.hasOneUse() &&
(LHS.getOpcode() == ISD::SIGN_EXTEND_INREG ||
@@ -11667,7 +11667,7 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(
SDValue Flags = TST.getValue(1);
return DAG.getNode(AArch64ISD::CSEL, DL, TVal.getValueType(), TVal,
- FVal, DAG.getConstant(AArch64CC::MI, DL, MVT::i32),
+ FVal, DAG.getConstant(AArch64CC::NE, DL, MVT::i32),
Flags);
}
}
diff --git a/llvm/test/CodeGen/AArch64/check-sign-bit-before-extension.ll b/llvm/test/CodeGen/AArch64/check-sign-bit-before-extension.ll
index b81a141b63c3a..a56d5b1b49b38 100644
--- a/llvm/test/CodeGen/AArch64/check-sign-bit-before-extension.ll
+++ b/llvm/test/CodeGen/AArch64/check-sign-bit-before-extension.ll
@@ -79,7 +79,7 @@ define i32 @g_i8_sign_extend_inreg(i8 %in, i32 %a, i32 %b) nounwind {
; CHECK-LABEL: g_i8_sign_extend_inreg:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: tst w0, #0x80
-; CHECK-NEXT: csel w8, w1, w2, mi
+; CHECK-NEXT: csel w8, w1, w2, ne
; CHECK-NEXT: add w0, w8, w0, uxtb
; CHECK-NEXT: ret
entry:
@@ -100,7 +100,7 @@ define i32 @g_i16_sign_extend_inreg(i16 %in, i32 %a, i32 %b) nounwind {
; CHECK-LABEL: g_i16_sign_extend_inreg:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: tst w0, #0x8000
-; CHECK-NEXT: csel w8, w1, w2, mi
+; CHECK-NEXT: csel w8, w1, w2, ne
; CHECK-NEXT: add w0, w8, w0, uxth
; CHECK-NEXT: ret
entry:
@@ -166,7 +166,7 @@ define i64 @g_i32_sign_extend_i64(i32 %in, i64 %a, i64 %b) nounwind {
; CHECK-LABEL: g_i32_sign_extend_i64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: tst w0, #0x80000000
-; CHECK-NEXT: csel x8, x1, x2, mi
+; CHECK-NEXT: csel x8, x1, x2, ne
; CHECK-NEXT: add x0, x8, w0, uxtw
; CHECK-NEXT: ret
entry:
diff --git a/llvm/test/CodeGen/AArch64/icmp.ll b/llvm/test/CodeGen/AArch64/icmp.ll
index 6e9d13135410c..7195e2b2f1255 100644
--- a/llvm/test/CodeGen/AArch64/icmp.ll
+++ b/llvm/test/CodeGen/AArch64/icmp.ll
@@ -2105,7 +2105,7 @@ define i32 @i8_signbit_tst_constants(i8 %x, i8 %y) {
; CHECK-SD-NEXT: mov w8, #42 // =0x2a
; CHECK-SD-NEXT: tst w9, #0x80
; CHECK-SD-NEXT: mov w9, #20894 // =0x519e
-; CHECK-SD-NEXT: csel w0, w9, w8, mi
+; CHECK-SD-NEXT: csel w0, w9, w8, ne
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: i8_signbit_tst_constants:
@@ -2129,7 +2129,7 @@ define i32 @i8_signbit_variables(i8 %x, i8 %y, i32 %a, i32 %b) {
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: add w8, w0, w1
; CHECK-SD-NEXT: tst w8, #0x80
-; CHECK-SD-NEXT: csel w0, w2, w3, mi
+; CHECK-SD-NEXT: csel w0, w2, w3, ne
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: i8_signbit_variables:
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-bool.ll b/llvm/test/CodeGen/AArch64/vecreduce-bool.ll
index 198428e26825f..19e1aa5d152ce 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-bool.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-bool.ll
@@ -28,7 +28,7 @@ define i32 @reduce_and_v1i8(<1 x i8> %a0, i32 %a1, i32 %a2) nounwind {
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: umov w8, v0.b[0]
; CHECK-NEXT: tst w8, #0x80
-; CHECK-NEXT: csel w0, w0, w1, mi
+; CHECK-NEXT: csel w0, w0, w1, ne
; CHECK-NEXT: ret
%x = icmp slt <1 x i8> %a0, zeroinitializer
%y = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> %x)
@@ -122,7 +122,7 @@ define i32 @reduce_and_v1i16(<1 x i16> %a0, i32 %a1, i32 %a2) nounwind {
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: umov w8, v0.h[0]
; CHECK-NEXT: tst w8, #0x8000
-; CHECK-NEXT: csel w0, w0, w1, mi
+; CHECK-NEXT: csel w0, w0, w1, ne
; CHECK-NEXT: ret
%x = icmp slt <1 x i16> %a0, zeroinitializer
%y = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> %x)
@@ -307,7 +307,7 @@ define i32 @reduce_or_v1i8(<1 x i8> %a0, i32 %a1, i32 %a2) nounwind {
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: umov w8, v0.b[0]
; CHECK-NEXT: tst w8, #0x80
-; CHECK-NEXT: csel w0, w0, w1, mi
+; CHECK-NEXT: csel w0, w0, w1, ne
; CHECK-NEXT: ret
%x = icmp slt <1 x i8> %a0, zeroinitializer
%y = call i1 @llvm.vector.reduce.or.v1i1(<1 x i1> %x)
@@ -401,7 +401,7 @@ define i32 @reduce_or_v1i16(<1 x i16> %a0, i32 %a1, i32 %a2) nounwind {
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: umov w8, v0.h[0]
; CHECK-NEXT: tst w8, #0x8000
-; CHECK-NEXT: csel w0, w0, w1, mi
+; CHECK-NEXT: csel w0, w0, w1, ne
; CHECK-NEXT: ret
%x = icmp slt <1 x i16> %a0, zeroinitializer
%y = call i1 @llvm.vector.reduce.or.v1i1(<1 x i1> %x)
@@ -586,7 +586,7 @@ define i32 @reduce_xor_v1i8(<1 x i8> %a0, i32 %a1, i32 %a2) nounwind {
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: umov w8, v0.b[0]
; CHECK-NEXT: tst w8, #0x80
-; CHECK-NEXT: csel w0, w0, w1, mi
+; CHECK-NEXT: csel w0, w0, w1, ne
; CHECK-NEXT: ret
%x = icmp slt <1 x i8> %a0, zeroinitializer
%y = call i1 @llvm.vector.reduce.xor.v1i1(<1 x i1> %x)
@@ -681,7 +681,7 @@ define i32 @reduce_xor_v1i16(<1 x i16> %a0, i32 %a1, i32 %a2) nounwind {
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: umov w8, v0.h[0]
; CHECK-NEXT: tst w8, #0x8000
-; CHECK-NEXT: csel w0, w0, w1, mi
+; CHECK-NEXT: csel w0, w0, w1, ne
; CHECK-NEXT: ret
%x = icmp slt <1 x i16> %a0, zeroinitializer
%y = call i1 @llvm.vector.reduce.xor.v1i1(<1 x i1> %x)
>From 3762d7b7623aeb4984e86cce509c9cef354f90a3 Mon Sep 17 00:00:00 2001
From: rez5427 <guanjian at stu.cdut.edu.cn>
Date: Mon, 15 Sep 2025 23:46:14 +0800
Subject: [PATCH 3/3] Use lookThroughSignExtension
---
.../Target/AArch64/AArch64ISelLowering.cpp | 42 +++++--------------
1 file changed, 11 insertions(+), 31 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index b0cc6fc916a8c..df824655a6b27 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -11639,37 +11639,17 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(
(LHS.getOpcode() == ISD::SIGN_EXTEND_INREG ||
LHS.getOpcode() == ISD::SIGN_EXTEND)) {
- SDValue OriginalVal = LHS.getOperand(0);
- EVT OriginalVT = LHS.getOpcode() == ISD::SIGN_EXTEND_INREG
- ? cast<VTSDNode>(LHS.getOperand(1))->getVT()
- : OriginalVal.getValueType();
-
- // Apply TST optimization for integer types
- if (OriginalVT.isInteger()) {
- // Calculate the sign bit for the original type
- unsigned BitWidth = OriginalVT.getSizeInBits();
- APInt SignBit = APInt::getSignedMinValue(BitWidth);
- EVT TestVT = (BitWidth <= 32) ? MVT::i32 : MVT::i64;
- unsigned TestBitWidth = TestVT.getSizeInBits();
- if (BitWidth < TestBitWidth) {
- SignBit = SignBit.zext(TestBitWidth);
- }
-
- SDValue SignBitConst = DAG.getConstant(SignBit, DL, TestVT);
- SDValue TestOperand = OriginalVal;
- if (OriginalVal.getValueType() != TestVT) {
- TestOperand = DAG.getNode(ISD::ZERO_EXTEND, DL, TestVT, OriginalVal);
- }
-
- SDValue TST =
- DAG.getNode(AArch64ISD::ANDS, DL, DAG.getVTList(TestVT, MVT::i32),
- TestOperand, SignBitConst);
-
- SDValue Flags = TST.getValue(1);
- return DAG.getNode(AArch64ISD::CSEL, DL, TVal.getValueType(), TVal,
- FVal, DAG.getConstant(AArch64CC::NE, DL, MVT::i32),
- Flags);
- }
+ uint64_t SignBitPos;
+ std::tie(LHS, SignBitPos) = lookThroughSignExtension(LHS);
+ EVT TestVT = LHS.getValueType();
+ SDValue SignBitConst = DAG.getConstant(1ULL << SignBitPos, DL, TestVT);
+ SDValue TST =
+ DAG.getNode(AArch64ISD::ANDS, DL, DAG.getVTList(TestVT, MVT::i32),
+ LHS, SignBitConst);
+
+ SDValue Flags = TST.getValue(1);
+ return DAG.getNode(AArch64ISD::CSEL, DL, TVal.getValueType(), TVal, FVal,
+ DAG.getConstant(AArch64CC::NE, DL, MVT::i32), Flags);
}
// Canonicalise absolute difference patterns:
More information about the llvm-commits
mailing list