[llvm] 22c590b - [RISCV][ISel] Optimize setcc with mask test idioms (#147015)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 4 22:02:01 PDT 2025
Author: Yingwei Zheng
Date: 2025-07-05T13:01:57+08:00
New Revision: 22c590bd3c75ccb6b443e074e4df2c82771a96eb
URL: https://github.com/llvm/llvm-project/commit/22c590bd3c75ccb6b443e074e4df2c82771a96eb
DIFF: https://github.com/llvm/llvm-project/commit/22c590bd3c75ccb6b443e074e4df2c82771a96eb.diff
LOG: [RISCV][ISel] Optimize setcc with mask test idioms (#147015)
As we are converting more comparisons/differences of pointers into those
of offsets in InstCombine, the mask test idiom `icmp eq/ne (and X,
Mask), 0` may be more common in real-world programs.
This patch eliminates unnecessary srli instructions for this pattern. We
have a similar optimization for `RISCVISD::SELECT_CC/BR_CC`:
https://github.com/llvm/llvm-project/blob/a89e232058a29260eb9bfe77b862715ce875f962/llvm/lib/Target/RISCV/RISCVISelLowering.cpp#L2416-L2446
However, I cannot reuse the function `translateSetCCForBranch` due to
some regressions caused by other DAGCombiner folds:
https://github.com/llvm/llvm-project/compare/main...dtcxzyw:llvm-project:rv-mask-test.
So this patch defers the transformation to ISel.
Added:
Modified:
llvm/lib/Target/RISCV/RISCVInstrInfo.td
llvm/test/CodeGen/RISCV/GlobalISel/float-intrinsics.ll
llvm/test/CodeGen/RISCV/float-intrinsics.ll
llvm/test/CodeGen/RISCV/i32-icmp.ll
llvm/test/CodeGen/RISCV/i64-icmp.ll
llvm/test/CodeGen/RISCV/overflow-intrinsics.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
index d7a347f47595b..8f8fb6eba9a62 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -1615,6 +1615,10 @@ def riscv_seteq : ComplexPattern<XLenVT, 1, "selectSETEQ", [setcc]>;
def : Pat<(riscv_seteq (XLenVT GPR:$rs1)), (SLTIU GPR:$rs1, 1)>;
def : Pat<(riscv_setne (XLenVT GPR:$rs1)), (SLTU (XLenVT X0), GPR:$rs1)>;
def : Pat<(XLenVT (setne (XLenVT GPR:$rs1), -1)), (SLTIU GPR:$rs1, -1)>;
+def : Pat<(XLenVT (seteq (XLenVT (and GPR:$rs, immop_oneuse<TrailingOnesMask>:$mask)), 0)),
+ (SLTIU (XLenVT (SLLI GPR:$rs, (XLenSubTrailingOnes imm:$mask))), 1)>;
+def : Pat<(XLenVT (setne (XLenVT (and GPR:$rs, immop_oneuse<TrailingOnesMask>:$mask)), 0)),
+ (SLTU (XLenVT X0), (XLenVT (SLLI GPR:$rs, (XLenSubTrailingOnes imm:$mask))))>;
def IntCCtoRISCVCC : SDNodeXForm<riscv_selectcc, [{
ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/float-intrinsics.ll b/llvm/test/CodeGen/RISCV/GlobalISel/float-intrinsics.ll
index 88413291c26cd..7f387a763b6da 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/float-intrinsics.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/float-intrinsics.ll
@@ -972,19 +972,19 @@ define i1 @fpclass(float %x) {
; RV32I-NEXT: slli a2, a0, 1
; RV32I-NEXT: lui a3, 2048
; RV32I-NEXT: lui a4, 1046528
-; RV32I-NEXT: srli a2, a2, 1
+; RV32I-NEXT: srli a5, a2, 1
; RV32I-NEXT: addi a3, a3, -1
-; RV32I-NEXT: addi a5, a2, -1
+; RV32I-NEXT: xor a0, a0, a5
+; RV32I-NEXT: xor a6, a5, a1
+; RV32I-NEXT: sltu a1, a1, a5
+; RV32I-NEXT: add a4, a5, a4
+; RV32I-NEXT: addi a5, a5, -1
; RV32I-NEXT: sltu a3, a5, a3
; RV32I-NEXT: lui a5, 520192
-; RV32I-NEXT: xor a0, a0, a2
-; RV32I-NEXT: add a4, a2, a4
; RV32I-NEXT: sltu a4, a4, a5
-; RV32I-NEXT: xor a5, a2, a1
-; RV32I-NEXT: sltu a1, a1, a2
; RV32I-NEXT: seqz a2, a2
; RV32I-NEXT: snez a0, a0
-; RV32I-NEXT: seqz a5, a5
+; RV32I-NEXT: seqz a5, a6
; RV32I-NEXT: and a3, a3, a0
; RV32I-NEXT: or a2, a2, a5
; RV32I-NEXT: and a0, a4, a0
@@ -1000,19 +1000,19 @@ define i1 @fpclass(float %x) {
; RV64I-NEXT: sext.w a0, a0
; RV64I-NEXT: li a3, 1
; RV64I-NEXT: lui a4, 2048
-; RV64I-NEXT: lui a5, 520192
-; RV64I-NEXT: srli a2, a2, 33
-; RV64I-NEXT: addi a6, a4, -1
-; RV64I-NEXT: xor a0, a0, a2
-; RV64I-NEXT: subw a3, a2, a3
-; RV64I-NEXT: sltu a3, a3, a6
-; RV64I-NEXT: xor a6, a2, a1
-; RV64I-NEXT: sltu a1, a1, a2
-; RV64I-NEXT: subw a4, a2, a4
+; RV64I-NEXT: srli a5, a2, 33
+; RV64I-NEXT: xor a0, a0, a5
+; RV64I-NEXT: subw a3, a5, a3
+; RV64I-NEXT: xor a6, a5, a1
+; RV64I-NEXT: sltu a1, a1, a5
+; RV64I-NEXT: subw a5, a5, a4
+; RV64I-NEXT: addi a4, a4, -1
+; RV64I-NEXT: sltu a3, a3, a4
+; RV64I-NEXT: lui a4, 520192
; RV64I-NEXT: seqz a2, a2
; RV64I-NEXT: snez a0, a0
; RV64I-NEXT: seqz a6, a6
-; RV64I-NEXT: sltu a4, a4, a5
+; RV64I-NEXT: sltu a4, a5, a4
; RV64I-NEXT: and a3, a3, a0
; RV64I-NEXT: or a2, a2, a6
; RV64I-NEXT: or a1, a2, a1
diff --git a/llvm/test/CodeGen/RISCV/float-intrinsics.ll b/llvm/test/CodeGen/RISCV/float-intrinsics.ll
index be9ddc68ce667..ed50042f54ab5 100644
--- a/llvm/test/CodeGen/RISCV/float-intrinsics.ll
+++ b/llvm/test/CodeGen/RISCV/float-intrinsics.ll
@@ -1625,18 +1625,18 @@ define i1 @fpclass(float %x) {
; RV32I: # %bb.0:
; RV32I-NEXT: slli a1, a0, 1
; RV32I-NEXT: lui a2, 2048
-; RV32I-NEXT: slti a0, a0, 0
; RV32I-NEXT: lui a3, 522240
; RV32I-NEXT: lui a4, 1046528
-; RV32I-NEXT: srli a1, a1, 1
+; RV32I-NEXT: srli a5, a1, 1
; RV32I-NEXT: addi a2, a2, -1
-; RV32I-NEXT: addi a5, a1, -1
+; RV32I-NEXT: xor a6, a5, a3
+; RV32I-NEXT: slt a3, a3, a5
+; RV32I-NEXT: add a4, a5, a4
+; RV32I-NEXT: addi a5, a5, -1
; RV32I-NEXT: sltu a2, a5, a2
-; RV32I-NEXT: xor a5, a1, a3
-; RV32I-NEXT: slt a3, a3, a1
-; RV32I-NEXT: add a4, a1, a4
+; RV32I-NEXT: slti a0, a0, 0
; RV32I-NEXT: seqz a1, a1
-; RV32I-NEXT: seqz a5, a5
+; RV32I-NEXT: seqz a5, a6
; RV32I-NEXT: srli a4, a4, 24
; RV32I-NEXT: and a2, a2, a0
; RV32I-NEXT: or a1, a1, a5
@@ -1649,29 +1649,29 @@ define i1 @fpclass(float %x) {
;
; RV64I-LABEL: fpclass:
; RV64I: # %bb.0:
-; RV64I-NEXT: sext.w a1, a0
-; RV64I-NEXT: slli a0, a0, 33
+; RV64I-NEXT: slli a1, a0, 33
; RV64I-NEXT: lui a2, 2048
; RV64I-NEXT: lui a3, 522240
; RV64I-NEXT: lui a4, 1046528
-; RV64I-NEXT: srli a0, a0, 33
+; RV64I-NEXT: srli a5, a1, 33
; RV64I-NEXT: addi a2, a2, -1
-; RV64I-NEXT: slti a1, a1, 0
-; RV64I-NEXT: addi a5, a0, -1
+; RV64I-NEXT: xor a6, a5, a3
+; RV64I-NEXT: slt a3, a3, a5
+; RV64I-NEXT: add a4, a5, a4
+; RV64I-NEXT: addi a5, a5, -1
; RV64I-NEXT: sltu a2, a5, a2
-; RV64I-NEXT: xor a5, a0, a3
-; RV64I-NEXT: slt a3, a3, a0
-; RV64I-NEXT: add a4, a0, a4
-; RV64I-NEXT: seqz a0, a0
-; RV64I-NEXT: seqz a5, a5
+; RV64I-NEXT: sext.w a0, a0
+; RV64I-NEXT: slti a0, a0, 0
+; RV64I-NEXT: seqz a1, a1
+; RV64I-NEXT: seqz a5, a6
; RV64I-NEXT: srliw a4, a4, 24
-; RV64I-NEXT: and a2, a2, a1
-; RV64I-NEXT: or a0, a0, a5
+; RV64I-NEXT: and a2, a2, a0
+; RV64I-NEXT: or a1, a1, a5
; RV64I-NEXT: sltiu a4, a4, 127
-; RV64I-NEXT: or a0, a0, a2
-; RV64I-NEXT: or a0, a0, a3
-; RV64I-NEXT: and a1, a4, a1
-; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: or a1, a1, a2
+; RV64I-NEXT: or a1, a1, a3
+; RV64I-NEXT: and a0, a4, a0
+; RV64I-NEXT: or a0, a1, a0
; RV64I-NEXT: ret
%cmp = call i1 @llvm.is.fpclass.f32(float %x, i32 639)
ret i1 %cmp
diff --git a/llvm/test/CodeGen/RISCV/i32-icmp.ll b/llvm/test/CodeGen/RISCV/i32-icmp.ll
index 6e3e0fe39cca7..53892f9497bba 100644
--- a/llvm/test/CodeGen/RISCV/i32-icmp.ll
+++ b/llvm/test/CodeGen/RISCV/i32-icmp.ll
@@ -1136,3 +1136,57 @@ define i32 @icmp_sle_constant_neg_2050(i32 %a) nounwind {
%2 = zext i1 %1 to i32
ret i32 %2
}
+
+define i32 @mask_test_eq(i32 %x) nounwind {
+; RV32I-LABEL: mask_test_eq:
+; RV32I: # %bb.0:
+; RV32I-NEXT: slli a0, a0, 12
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: ret
+;
+; RV32XQCILIA-LABEL: mask_test_eq:
+; RV32XQCILIA: # %bb.0:
+; RV32XQCILIA-NEXT: slli a0, a0, 12
+; RV32XQCILIA-NEXT: seqz a0, a0
+; RV32XQCILIA-NEXT: ret
+ %y = and i32 %x, 1048575
+ %cmp = icmp eq i32 %y, 0
+ %ext = zext i1 %cmp to i32
+ ret i32 %ext
+}
+
+define i32 @mask_test_ne(i32 %x) nounwind {
+; RV32I-LABEL: mask_test_ne:
+; RV32I: # %bb.0:
+; RV32I-NEXT: slli a0, a0, 12
+; RV32I-NEXT: snez a0, a0
+; RV32I-NEXT: ret
+;
+; RV32XQCILIA-LABEL: mask_test_ne:
+; RV32XQCILIA: # %bb.0:
+; RV32XQCILIA-NEXT: slli a0, a0, 12
+; RV32XQCILIA-NEXT: snez a0, a0
+; RV32XQCILIA-NEXT: ret
+ %y = and i32 %x, 1048575
+ %cmp = icmp ne i32 %y, 0
+ %ext = zext i1 %cmp to i32
+ ret i32 %ext
+}
+
+define i32 @mask_test_eq_simm12(i32 %x) nounwind {
+; RV32I-LABEL: mask_test_eq_simm12:
+; RV32I: # %bb.0:
+; RV32I-NEXT: andi a0, a0, 3
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: ret
+;
+; RV32XQCILIA-LABEL: mask_test_eq_simm12:
+; RV32XQCILIA: # %bb.0:
+; RV32XQCILIA-NEXT: andi a0, a0, 3
+; RV32XQCILIA-NEXT: seqz a0, a0
+; RV32XQCILIA-NEXT: ret
+ %y = and i32 %x, 3
+ %cmp = icmp eq i32 %y, 0
+ %ext = zext i1 %cmp to i32
+ ret i32 %ext
+}
diff --git a/llvm/test/CodeGen/RISCV/i64-icmp.ll b/llvm/test/CodeGen/RISCV/i64-icmp.ll
index 49103231a075f..837987d8b9162 100644
--- a/llvm/test/CodeGen/RISCV/i64-icmp.ll
+++ b/llvm/test/CodeGen/RISCV/i64-icmp.ll
@@ -767,4 +767,56 @@ define i64 @icmp_ne_zext_inreg_umin(i64 %a) nounwind {
%4 = zext i1 %3 to i64
ret i64 %4
}
+
+define i64 @mask_test_eq(i64 %x) nounwind {
+; RV64I-LABEL: mask_test_eq:
+; RV64I: # %bb.0:
+; RV64I-NEXT: slli a0, a0, 2
+; RV64I-NEXT: seqz a0, a0
+; RV64I-NEXT: ret
+ %y = and i64 %x, 4611686018427387903
+ %cmp = icmp eq i64 %y, 0
+ %ext = zext i1 %cmp to i64
+ ret i64 %ext
+}
+
+define i64 @mask_test_ne(i64 %x) nounwind {
+; RV64I-LABEL: mask_test_ne:
+; RV64I: # %bb.0:
+; RV64I-NEXT: slli a0, a0, 2
+; RV64I-NEXT: snez a0, a0
+; RV64I-NEXT: ret
+ %y = and i64 %x, 4611686018427387903
+ %cmp = icmp ne i64 %y, 0
+ %ext = zext i1 %cmp to i64
+ ret i64 %ext
+}
+
+define i64 @mask_test_eq_simm12(i64 %x) nounwind {
+; RV64I-LABEL: mask_test_eq_simm12:
+; RV64I: # %bb.0:
+; RV64I-NEXT: andi a0, a0, 3
+; RV64I-NEXT: seqz a0, a0
+; RV64I-NEXT: ret
+ %y = and i64 %x, 3
+ %cmp = icmp eq i64 %y, 0
+ %ext = zext i1 %cmp to i64
+ ret i64 %ext
+}
+
+define i64 @mask_test_eq_multiuse(i64 %x, ptr %p) nounwind {
+; RV64I-LABEL: mask_test_eq_multiuse:
+; RV64I: # %bb.0:
+; RV64I-NEXT: slli a0, a0, 2
+; RV64I-NEXT: srli a2, a0, 2
+; RV64I-NEXT: seqz a0, a0
+; RV64I-NEXT: sd a2, 0(a1)
+; RV64I-NEXT: ret
+ %y = and i64 %x, 4611686018427387903
+ store i64 %y, ptr %p, align 8
+ %cmp = icmp eq i64 %y, 0
+ %ext = zext i1 %cmp to i64
+ ret i64 %ext
+}
+
declare i64 @llvm.umin.i64(i64, i64)
diff --git a/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll b/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll
index 98c897084ab49..47b90a006a249 100644
--- a/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll
+++ b/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll
@@ -594,7 +594,6 @@ define i1 @uaddo_i16_increment_noncanonical_3(i16 %x, ptr %p) {
; RV32: # %bb.0:
; RV32-NEXT: addi a2, a0, 1
; RV32-NEXT: slli a0, a2, 16
-; RV32-NEXT: srli a0, a0, 16
; RV32-NEXT: seqz a0, a0
; RV32-NEXT: sh a2, 0(a1)
; RV32-NEXT: ret
@@ -603,7 +602,6 @@ define i1 @uaddo_i16_increment_noncanonical_3(i16 %x, ptr %p) {
; RV64: # %bb.0:
; RV64-NEXT: addi a2, a0, 1
; RV64-NEXT: slli a0, a2, 48
-; RV64-NEXT: srli a0, a0, 48
; RV64-NEXT: seqz a0, a0
; RV64-NEXT: sh a2, 0(a1)
; RV64-NEXT: ret
@@ -759,10 +757,9 @@ define i1 @uaddo_i42_increment_illegal_type(i42 %x, ptr %p) {
; RV64-LABEL: uaddo_i42_increment_illegal_type:
; RV64: # %bb.0:
; RV64-NEXT: addi a2, a0, 1
-; RV64-NEXT: slli a0, a2, 22
-; RV64-NEXT: srli a3, a0, 22
+; RV64-NEXT: slli a3, a2, 22
; RV64-NEXT: seqz a0, a3
-; RV64-NEXT: srli a3, a3, 32
+; RV64-NEXT: srli a3, a3, 54
; RV64-NEXT: sw a2, 0(a1)
; RV64-NEXT: sh a3, 4(a1)
; RV64-NEXT: ret
More information about the llvm-commits
mailing list