[llvm] [LegalizeTypes][RISCV][LoongArch] Optimize promotion of ucmp. (PR #101366)
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 31 10:15:06 PDT 2024
https://github.com/topperc created https://github.com/llvm/llvm-project/pull/101366
ucmp can be promoted with either sext or zext. RISC-V and LoongArch prefer sext for promoting i32 to i64 unless the inputs are known to be zero extended already.
This patch uses the existing SExtOrZExtPromotedOperands function that is used by SETCC promotion to intelligently handle this.
>From 7f6179f89c6b6ab50e841f934c3b6d9fa3336a7e Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Wed, 31 Jul 2024 10:11:32 -0700
Subject: [PATCH] [LegalizeTypes][RISCV][LoongArch] Optimize promotion of ucmp.
ucmp can be promoted with either sext or zext. RISC-V prefers sext
for promoting i32 to i64 unless the inputs are known to be zero
extended already.
This patch uses the existing SExtOrZExtPromotedOperands function
that is used by SETCC promotion to intelligently handle this.
---
.../SelectionDAG/LegalizeIntegerTypes.cpp | 15 +++--
llvm/test/CodeGen/LoongArch/ucmp.ll | 8 +--
llvm/test/CodeGen/RISCV/ucmp.ll | 60 ++++++++++++++-----
3 files changed, 59 insertions(+), 24 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 33a53dfc81379..b1ada66aa9aeb 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -2294,12 +2294,15 @@ SDValue DAGTypeLegalizer::PromoteIntOp_Shift(SDNode *N) {
}
SDValue DAGTypeLegalizer::PromoteIntOp_CMP(SDNode *N) {
- SDValue LHS = N->getOpcode() == ISD::UCMP
- ? ZExtPromotedInteger(N->getOperand(0))
- : SExtPromotedInteger(N->getOperand(0));
- SDValue RHS = N->getOpcode() == ISD::UCMP
- ? ZExtPromotedInteger(N->getOperand(1))
- : SExtPromotedInteger(N->getOperand(1));
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+
+ if (N->getOpcode() == ISD::SCMP) {
+ LHS = SExtPromotedInteger(LHS);
+ RHS = SExtPromotedInteger(RHS);
+ } else {
+ SExtOrZExtPromotedOperands(LHS, RHS);
+ }
return SDValue(DAG.UpdateNodeOperands(N, LHS, RHS), 0);
}
diff --git a/llvm/test/CodeGen/LoongArch/ucmp.ll b/llvm/test/CodeGen/LoongArch/ucmp.ll
index 548c5bd0db72b..b91d3bf15d812 100644
--- a/llvm/test/CodeGen/LoongArch/ucmp.ll
+++ b/llvm/test/CodeGen/LoongArch/ucmp.ll
@@ -26,8 +26,8 @@ define i8 @ucmp.8.16(i16 zeroext %x, i16 zeroext %y) nounwind {
define i8 @ucmp.8.32(i32 %x, i32 %y) nounwind {
; CHECK-LABEL: ucmp.8.32:
; CHECK: # %bb.0:
-; CHECK-NEXT: bstrpick.d $a1, $a1, 31, 0
-; CHECK-NEXT: bstrpick.d $a0, $a0, 31, 0
+; CHECK-NEXT: addi.w $a1, $a1, 0
+; CHECK-NEXT: addi.w $a0, $a0, 0
; CHECK-NEXT: sltu $a2, $a0, $a1
; CHECK-NEXT: sltu $a0, $a1, $a0
; CHECK-NEXT: sub.d $a0, $a0, $a2
@@ -71,8 +71,8 @@ define i8 @ucmp.8.128(i128 %x, i128 %y) nounwind {
define i32 @ucmp.32.32(i32 %x, i32 %y) nounwind {
; CHECK-LABEL: ucmp.32.32:
; CHECK: # %bb.0:
-; CHECK-NEXT: bstrpick.d $a1, $a1, 31, 0
-; CHECK-NEXT: bstrpick.d $a0, $a0, 31, 0
+; CHECK-NEXT: addi.w $a1, $a1, 0
+; CHECK-NEXT: addi.w $a0, $a0, 0
; CHECK-NEXT: sltu $a2, $a0, $a1
; CHECK-NEXT: sltu $a0, $a1, $a0
; CHECK-NEXT: sub.d $a0, $a0, $a2
diff --git a/llvm/test/CodeGen/RISCV/ucmp.ll b/llvm/test/CodeGen/RISCV/ucmp.ll
index 026340ede1f90..c74bc6838ff7d 100644
--- a/llvm/test/CodeGen/RISCV/ucmp.ll
+++ b/llvm/test/CodeGen/RISCV/ucmp.ll
@@ -48,10 +48,8 @@ define i8 @ucmp.8.32(i32 %x, i32 %y) nounwind {
;
; RV64I-LABEL: ucmp.8.32:
; RV64I: # %bb.0:
-; RV64I-NEXT: slli a1, a1, 32
-; RV64I-NEXT: srli a1, a1, 32
-; RV64I-NEXT: slli a0, a0, 32
-; RV64I-NEXT: srli a0, a0, 32
+; RV64I-NEXT: sext.w a1, a1
+; RV64I-NEXT: sext.w a0, a0
; RV64I-NEXT: sltu a2, a0, a1
; RV64I-NEXT: sltu a0, a1, a0
; RV64I-NEXT: sub a0, a0, a2
@@ -164,10 +162,44 @@ define i32 @ucmp.32.32(i32 %x, i32 %y) nounwind {
;
; RV64I-LABEL: ucmp.32.32:
; RV64I: # %bb.0:
-; RV64I-NEXT: slli a1, a1, 32
-; RV64I-NEXT: srli a1, a1, 32
-; RV64I-NEXT: slli a0, a0, 32
-; RV64I-NEXT: srli a0, a0, 32
+; RV64I-NEXT: sext.w a1, a1
+; RV64I-NEXT: sext.w a0, a0
+; RV64I-NEXT: sltu a2, a0, a1
+; RV64I-NEXT: sltu a0, a1, a0
+; RV64I-NEXT: sub a0, a0, a2
+; RV64I-NEXT: ret
+ %1 = call i32 @llvm.ucmp(i32 %x, i32 %y)
+ ret i32 %1
+}
+
+define i32 @ucmp.32.32_sext(i32 signext %x, i32 signext %y) nounwind {
+; RV32I-LABEL: ucmp.32.32_sext:
+; RV32I: # %bb.0:
+; RV32I-NEXT: sltu a2, a0, a1
+; RV32I-NEXT: sltu a0, a1, a0
+; RV32I-NEXT: sub a0, a0, a2
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: ucmp.32.32_sext:
+; RV64I: # %bb.0:
+; RV64I-NEXT: sltu a2, a0, a1
+; RV64I-NEXT: sltu a0, a1, a0
+; RV64I-NEXT: sub a0, a0, a2
+; RV64I-NEXT: ret
+ %1 = call i32 @llvm.ucmp(i32 %x, i32 %y)
+ ret i32 %1
+}
+
+define i32 @ucmp.32.32_zext(i32 zeroext %x, i32 zeroext %y) nounwind {
+; RV32I-LABEL: ucmp.32.32_zext:
+; RV32I: # %bb.0:
+; RV32I-NEXT: sltu a2, a0, a1
+; RV32I-NEXT: sltu a0, a1, a0
+; RV32I-NEXT: sub a0, a0, a2
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: ucmp.32.32_zext:
+; RV64I: # %bb.0:
; RV64I-NEXT: sltu a2, a0, a1
; RV64I-NEXT: sltu a0, a1, a0
; RV64I-NEXT: sub a0, a0, a2
@@ -179,13 +211,13 @@ define i32 @ucmp.32.32(i32 %x, i32 %y) nounwind {
define i32 @ucmp.32.64(i64 %x, i64 %y) nounwind {
; RV32I-LABEL: ucmp.32.64:
; RV32I: # %bb.0:
-; RV32I-NEXT: beq a1, a3, .LBB6_2
+; RV32I-NEXT: beq a1, a3, .LBB8_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: sltu a4, a1, a3
; RV32I-NEXT: sltu a0, a3, a1
; RV32I-NEXT: sub a0, a0, a4
; RV32I-NEXT: ret
-; RV32I-NEXT: .LBB6_2:
+; RV32I-NEXT: .LBB8_2:
; RV32I-NEXT: sltu a4, a0, a2
; RV32I-NEXT: sltu a0, a2, a0
; RV32I-NEXT: sub a0, a0, a4
@@ -204,15 +236,15 @@ define i32 @ucmp.32.64(i64 %x, i64 %y) nounwind {
define i64 @ucmp.64.64(i64 %x, i64 %y) nounwind {
; RV32I-LABEL: ucmp.64.64:
; RV32I: # %bb.0:
-; RV32I-NEXT: beq a1, a3, .LBB7_2
+; RV32I-NEXT: beq a1, a3, .LBB9_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: sltu a4, a1, a3
; RV32I-NEXT: sltu a0, a3, a1
-; RV32I-NEXT: j .LBB7_3
-; RV32I-NEXT: .LBB7_2:
+; RV32I-NEXT: j .LBB9_3
+; RV32I-NEXT: .LBB9_2:
; RV32I-NEXT: sltu a4, a0, a2
; RV32I-NEXT: sltu a0, a2, a0
-; RV32I-NEXT: .LBB7_3:
+; RV32I-NEXT: .LBB9_3:
; RV32I-NEXT: sub a0, a0, a4
; RV32I-NEXT: srai a1, a0, 31
; RV32I-NEXT: ret
More information about the llvm-commits
mailing list