[llvm] 4f18f3f - [RISCV] Use addiw for or_is_add when or input is sign extended. (#128635)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 25 10:05:01 PST 2025
Author: Craig Topper
Date: 2025-02-25T10:04:57-08:00
New Revision: 4f18f3f09a744ddd05de2188592fa11533ff3054
URL: https://github.com/llvm/llvm-project/commit/4f18f3f09a744ddd05de2188592fa11533ff3054
DIFF: https://github.com/llvm/llvm-project/commit/4f18f3f09a744ddd05de2188592fa11533ff3054.diff
LOG: [RISCV] Use addiw for or_is_add when or input is sign extended. (#128635)
We prefer to emit addi instead of ori because its more compressible, but
this can pessimize the sext.w removal pass.
If the input to the OR is known to be a sign extended 32 bit value, we
can use addiw instead of addi which will give more power to the sext.w
removal pass. As it is known to produce sign a sign extended value and
only consume the lower 32 bits.
Fixes #128468.
Added:
Modified:
llvm/lib/Target/RISCV/RISCVInstrInfo.td
llvm/test/CodeGen/RISCV/or-is-add.ll
llvm/test/CodeGen/RISCV/select-const.ll
llvm/test/CodeGen/RISCV/select.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
index a962e64581797..c775561dd2d2d 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -2033,6 +2033,12 @@ def : PatGprImm<binop_allwusers<and>, ANDI, u32simm12>;
def : PatGprImm<binop_allwusers<or>, ORI, u32simm12>;
def : PatGprImm<binop_allwusers<xor>, XORI, u32simm12>;
+// Select 'or' as ADDIW if the immediate bits are known to be 0 in $rs1 and
+// $rs1 is sign extended. This can improve compressibility. Using ADDIW gives
+// more power to RISCVOptWInstrs.
+def : Pat<(or_is_add 33signbits_node:$rs1, simm12:$imm),
+ (ADDIW $rs1, simm12:$imm)>;
+
/// Loads
def : LdPat<sextloadi32, LW, i64>;
diff --git a/llvm/test/CodeGen/RISCV/or-is-add.ll b/llvm/test/CodeGen/RISCV/or-is-add.ll
index 73561675b17ec..ab20312d2c4f6 100644
--- a/llvm/test/CodeGen/RISCV/or-is-add.ll
+++ b/llvm/test/CodeGen/RISCV/or-is-add.ll
@@ -11,8 +11,8 @@ define signext i32 @test1(i32 signext %x) {
;
; RV64-LABEL: test1:
; RV64: # %bb.0:
-; RV64-NEXT: slliw a0, a0, 1
-; RV64-NEXT: addi a0, a0, 1
+; RV64-NEXT: slli a0, a0, 1
+; RV64-NEXT: addiw a0, a0, 1
; RV64-NEXT: ret
%a = shl i32 %x, 1
%b = or i32 %a, 1
@@ -45,8 +45,8 @@ define signext i32 @test3(i32 signext %x) {
;
; RV64-LABEL: test3:
; RV64: # %bb.0:
-; RV64-NEXT: slliw a0, a0, 3
-; RV64-NEXT: addi a0, a0, 6
+; RV64-NEXT: slli a0, a0, 3
+; RV64-NEXT: addiw a0, a0, 6
; RV64-NEXT: ret
%a = shl i32 %x, 3
%b = add i32 %a, 6
@@ -83,7 +83,7 @@ define signext i32 @test5(i32 signext %x) {
; RV64-LABEL: test5:
; RV64: # %bb.0:
; RV64-NEXT: srliw a0, a0, 24
-; RV64-NEXT: addi a0, a0, 256
+; RV64-NEXT: addiw a0, a0, 256
; RV64-NEXT: ret
%a = lshr i32 %x, 24
%b = xor i32 %a, 256
@@ -101,7 +101,7 @@ define i64 @test6(i64 %x) {
; RV64-LABEL: test6:
; RV64: # %bb.0:
; RV64-NEXT: srli a0, a0, 54
-; RV64-NEXT: addi a0, a0, 1024
+; RV64-NEXT: addiw a0, a0, 1024
; RV64-NEXT: ret
%a = lshr i64 %x, 54
%b = xor i64 %a, 1024
@@ -121,3 +121,105 @@ define signext i32 @test7(i32 signext %x) {
%a = or disjoint i32 %x, 1
ret i32 %a
}
+
+define void @pr128468(ptr %0, i32 signext %1, i32 signext %2) {
+; RV32-LABEL: pr128468:
+; RV32: # %bb.0:
+; RV32-NEXT: slli a3, a1, 3
+; RV32-NEXT: add a3, a0, a3
+; RV32-NEXT: lw a2, 4(a3)
+; RV32-NEXT: bgez a2, .LBB7_6
+; RV32-NEXT: # %bb.1:
+; RV32-NEXT: slli a2, a1, 1
+; RV32-NEXT: addi a2, a2, 1
+; RV32-NEXT: beq a2, a1, .LBB7_6
+; RV32-NEXT: # %bb.2: # %.preheader
+; RV32-NEXT: addi a3, a3, 4
+; RV32-NEXT: j .LBB7_4
+; RV32-NEXT: .LBB7_3: # in Loop: Header=BB7_4 Depth=1
+; RV32-NEXT: mv a2, a1
+; RV32-NEXT: addi a3, a3, 4
+; RV32-NEXT: beq a1, a1, .LBB7_6
+; RV32-NEXT: .LBB7_4: # =>This Inner Loop Header: Depth=1
+; RV32-NEXT: slli a1, a1, 2
+; RV32-NEXT: add a1, a0, a1
+; RV32-NEXT: lw a4, 0(a1)
+; RV32-NEXT: mv a1, a2
+; RV32-NEXT: sw a4, 0(a3)
+; RV32-NEXT: slli a3, a2, 3
+; RV32-NEXT: add a3, a0, a3
+; RV32-NEXT: lw a2, 4(a3)
+; RV32-NEXT: bgez a2, .LBB7_3
+; RV32-NEXT: # %bb.5: # in Loop: Header=BB7_4 Depth=1
+; RV32-NEXT: slli a2, a1, 1
+; RV32-NEXT: addi a2, a2, 1
+; RV32-NEXT: addi a3, a3, 4
+; RV32-NEXT: bne a2, a1, .LBB7_4
+; RV32-NEXT: .LBB7_6:
+; RV32-NEXT: ret
+;
+; RV64-LABEL: pr128468:
+; RV64: # %bb.0:
+; RV64-NEXT: slliw a2, a1, 1
+; RV64-NEXT: slli a3, a2, 2
+; RV64-NEXT: add a3, a0, a3
+; RV64-NEXT: lw a4, 4(a3)
+; RV64-NEXT: bgez a4, .LBB7_6
+; RV64-NEXT: # %bb.1:
+; RV64-NEXT: addiw a2, a2, 1
+; RV64-NEXT: beq a2, a1, .LBB7_6
+; RV64-NEXT: # %bb.2: # %.preheader
+; RV64-NEXT: addi a3, a3, 4
+; RV64-NEXT: j .LBB7_4
+; RV64-NEXT: .LBB7_3: # in Loop: Header=BB7_4 Depth=1
+; RV64-NEXT: mv a2, a1
+; RV64-NEXT: addi a3, a3, 4
+; RV64-NEXT: beq a1, a1, .LBB7_6
+; RV64-NEXT: .LBB7_4: # =>This Inner Loop Header: Depth=1
+; RV64-NEXT: slli a1, a1, 2
+; RV64-NEXT: add a1, a0, a1
+; RV64-NEXT: lw a4, 0(a1)
+; RV64-NEXT: mv a1, a2
+; RV64-NEXT: slliw a2, a2, 1
+; RV64-NEXT: sw a4, 0(a3)
+; RV64-NEXT: slli a3, a2, 2
+; RV64-NEXT: add a3, a0, a3
+; RV64-NEXT: lw a4, 4(a3)
+; RV64-NEXT: bgez a4, .LBB7_3
+; RV64-NEXT: # %bb.5: # in Loop: Header=BB7_4 Depth=1
+; RV64-NEXT: addiw a2, a2, 1
+; RV64-NEXT: addi a3, a3, 4
+; RV64-NEXT: bne a2, a1, .LBB7_4
+; RV64-NEXT: .LBB7_6:
+; RV64-NEXT: ret
+ %4 = shl nsw i32 %1, 1
+ %5 = or disjoint i32 %4, 1
+ %6 = sext i32 %5 to i64
+ %7 = getelementptr inbounds i32, ptr %0, i64 %6
+ %8 = load i32, ptr %7, align 4
+ %9 = icmp sgt i32 %8, -1
+ %10 = icmp eq i32 %5, %1
+ %11 = or i1 %9, %10
+ br i1 %11, label %27, label %12
+
+12: ; preds = %3, %12
+ %13 = phi i32 [ %25, %12 ], [ %5, %3 ]
+ %14 = phi ptr [ %22, %12 ], [ %7, %3 ]
+ %15 = phi i32 [ %13, %12 ], [ %1, %3 ]
+ %16 = sext i32 %15 to i64
+ %17 = getelementptr inbounds i32, ptr %0, i64 %16
+ %18 = load i32, ptr %17, align 4
+ store i32 %18, ptr %14, align 4
+ %19 = shl nsw i32 %13, 1
+ %20 = or disjoint i32 %19, 1
+ %21 = sext i32 %20 to i64
+ %22 = getelementptr inbounds i32, ptr %0, i64 %21
+ %23 = load i32, ptr %22, align 4
+ %24 = icmp slt i32 %23, 0
+ %25 = select i1 %24, i32 %20, i32 %13
+ %26 = icmp eq i32 %25, %13
+ br i1 %26, label %27, label %12
+
+27: ; preds = %12, %3
+ ret void
+}
diff --git a/llvm/test/CodeGen/RISCV/select-const.ll b/llvm/test/CodeGen/RISCV/select-const.ll
index 90a81c535cef2..4538572e90cac 100644
--- a/llvm/test/CodeGen/RISCV/select-const.ll
+++ b/llvm/test/CodeGen/RISCV/select-const.ll
@@ -114,7 +114,7 @@ define signext i32 @select_const_int_harder(i1 zeroext %a) nounwind {
; RV64ZICOND: # %bb.0:
; RV64ZICOND-NEXT: li a1, 32
; RV64ZICOND-NEXT: czero.nez a0, a1, a0
-; RV64ZICOND-NEXT: addi a0, a0, 6
+; RV64ZICOND-NEXT: addiw a0, a0, 6
; RV64ZICOND-NEXT: ret
%1 = select i1 %a, i32 6, i32 38
ret i32 %1
diff --git a/llvm/test/CodeGen/RISCV/select.ll b/llvm/test/CodeGen/RISCV/select.ll
index 4405cc3f5e163..303c4ac23b313 100644
--- a/llvm/test/CodeGen/RISCV/select.ll
+++ b/llvm/test/CodeGen/RISCV/select.ll
@@ -1884,15 +1884,22 @@ define i32 @select_cst_
diff 2(i1 zeroext %cond) {
; RV64IMXVTCONDOPS: # %bb.0:
; RV64IMXVTCONDOPS-NEXT: li a1, 2
; RV64IMXVTCONDOPS-NEXT: vt.maskcn a0, a1, a0
-; RV64IMXVTCONDOPS-NEXT: addi a0, a0, 120
+; RV64IMXVTCONDOPS-NEXT: addiw a0, a0, 120
; RV64IMXVTCONDOPS-NEXT: ret
;
-; CHECKZICOND-LABEL: select_cst_
diff 2:
-; CHECKZICOND: # %bb.0:
-; CHECKZICOND-NEXT: li a1, 2
-; CHECKZICOND-NEXT: czero.nez a0, a1, a0
-; CHECKZICOND-NEXT: addi a0, a0, 120
-; CHECKZICOND-NEXT: ret
+; RV32IMZICOND-LABEL: select_cst_
diff 2:
+; RV32IMZICOND: # %bb.0:
+; RV32IMZICOND-NEXT: li a1, 2
+; RV32IMZICOND-NEXT: czero.nez a0, a1, a0
+; RV32IMZICOND-NEXT: addi a0, a0, 120
+; RV32IMZICOND-NEXT: ret
+;
+; RV64IMZICOND-LABEL: select_cst_
diff 2:
+; RV64IMZICOND: # %bb.0:
+; RV64IMZICOND-NEXT: li a1, 2
+; RV64IMZICOND-NEXT: czero.nez a0, a1, a0
+; RV64IMZICOND-NEXT: addiw a0, a0, 120
+; RV64IMZICOND-NEXT: ret
%ret = select i1 %cond, i32 120, i32 122
ret i32 %ret
}
@@ -2074,15 +2081,22 @@ define i32 @select_cst_
diff 8_invert(i1 zeroext %cond) {
; RV64IMXVTCONDOPS: # %bb.0:
; RV64IMXVTCONDOPS-NEXT: li a1, 8
; RV64IMXVTCONDOPS-NEXT: vt.maskcn a0, a1, a0
-; RV64IMXVTCONDOPS-NEXT: addi a0, a0, 6
+; RV64IMXVTCONDOPS-NEXT: addiw a0, a0, 6
; RV64IMXVTCONDOPS-NEXT: ret
;
-; CHECKZICOND-LABEL: select_cst_
diff 8_invert:
-; CHECKZICOND: # %bb.0:
-; CHECKZICOND-NEXT: li a1, 8
-; CHECKZICOND-NEXT: czero.nez a0, a1, a0
-; CHECKZICOND-NEXT: addi a0, a0, 6
-; CHECKZICOND-NEXT: ret
+; RV32IMZICOND-LABEL: select_cst_
diff 8_invert:
+; RV32IMZICOND: # %bb.0:
+; RV32IMZICOND-NEXT: li a1, 8
+; RV32IMZICOND-NEXT: czero.nez a0, a1, a0
+; RV32IMZICOND-NEXT: addi a0, a0, 6
+; RV32IMZICOND-NEXT: ret
+;
+; RV64IMZICOND-LABEL: select_cst_
diff 8_invert:
+; RV64IMZICOND: # %bb.0:
+; RV64IMZICOND-NEXT: li a1, 8
+; RV64IMZICOND-NEXT: czero.nez a0, a1, a0
+; RV64IMZICOND-NEXT: addiw a0, a0, 6
+; RV64IMZICOND-NEXT: ret
%ret = select i1 %cond, i32 6, i32 14
ret i32 %ret
}
@@ -2151,15 +2165,22 @@ define i32 @select_cst_
diff 1024_invert(i1 zeroext %cond) {
; RV64IMXVTCONDOPS: # %bb.0:
; RV64IMXVTCONDOPS-NEXT: li a1, 1024
; RV64IMXVTCONDOPS-NEXT: vt.maskcn a0, a1, a0
-; RV64IMXVTCONDOPS-NEXT: addi a0, a0, 6
+; RV64IMXVTCONDOPS-NEXT: addiw a0, a0, 6
; RV64IMXVTCONDOPS-NEXT: ret
;
-; CHECKZICOND-LABEL: select_cst_
diff 1024_invert:
-; CHECKZICOND: # %bb.0:
-; CHECKZICOND-NEXT: li a1, 1024
-; CHECKZICOND-NEXT: czero.nez a0, a1, a0
-; CHECKZICOND-NEXT: addi a0, a0, 6
-; CHECKZICOND-NEXT: ret
+; RV32IMZICOND-LABEL: select_cst_
diff 1024_invert:
+; RV32IMZICOND: # %bb.0:
+; RV32IMZICOND-NEXT: li a1, 1024
+; RV32IMZICOND-NEXT: czero.nez a0, a1, a0
+; RV32IMZICOND-NEXT: addi a0, a0, 6
+; RV32IMZICOND-NEXT: ret
+;
+; RV64IMZICOND-LABEL: select_cst_
diff 1024_invert:
+; RV64IMZICOND: # %bb.0:
+; RV64IMZICOND-NEXT: li a1, 1024
+; RV64IMZICOND-NEXT: czero.nez a0, a1, a0
+; RV64IMZICOND-NEXT: addiw a0, a0, 6
+; RV64IMZICOND-NEXT: ret
%ret = select i1 %cond, i32 6, i32 1030
ret i32 %ret
}
More information about the llvm-commits
mailing list