[llvm] f40682a - [CSKY] Optimize subtraction with SUBI32/SUBI16
Ben Shi via llvm-commits
llvm-commits at lists.llvm.org
Thu Jun 29 20:53:15 PDT 2023
Author: Ben Shi
Date: 2023-06-30T11:33:20+08:00
New Revision: f40682a930d6812c930f90495933f9e3dfb9fcd3
URL: https://github.com/llvm/llvm-project/commit/f40682a930d6812c930f90495933f9e3dfb9fcd3
DIFF: https://github.com/llvm/llvm-project/commit/f40682a930d6812c930f90495933f9e3dfb9fcd3.diff
LOG: [CSKY] Optimize subtraction with SUBI32/SUBI16
Reviewed By: zixuan-wu
Differential Revision: https://reviews.llvm.org/D153326
Added:
Modified:
llvm/lib/Target/CSKY/CSKYInstrInfo.td
llvm/lib/Target/CSKY/CSKYInstrInfo16Instr.td
llvm/test/CodeGen/CSKY/base-i.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/CSKY/CSKYInstrInfo.td b/llvm/lib/Target/CSKY/CSKYInstrInfo.td
index 77b14102ed8c7b..ab053fae9f302d 100644
--- a/llvm/lib/Target/CSKY/CSKYInstrInfo.td
+++ b/llvm/lib/Target/CSKY/CSKYInstrInfo.td
@@ -102,6 +102,14 @@ class oimm<int num> : Operand<i32>,
let DecoderMethod = "decodeOImmOperand<"#num#">";
}
+def imm_neg_XFORM : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(-N->getSExtValue(), SDLoc(N), MVT::i32);
+}]>;
+
+class oimm_neg<int num> : Operand<i32>,
+ ImmLeaf<i32, "return isUInt<"#num#">(-Imm - 1);"> {
+}
+
class uimm<int num, int shift = 0> : Operand<i32>,
ImmLeaf<i32, "return isShiftedUInt<"#num#", "#shift#">(Imm);"> {
let EncoderMethod = "getImmOpValue<"#shift#">";
@@ -259,6 +267,23 @@ def oimm16 : oimm<16> {
}];
}
+def oimm8_neg : oimm_neg<8> {
+ let MCOperandPredicate = [{
+ int64_t Imm;
+ if (MCOp.evaluateAsConstantImm(Imm))
+ return isUInt<8>(-Imm - 1);
+ return MCOp.isBareSymbolRef();
+ }];
+}
+def oimm12_neg : oimm_neg<12> {
+ let MCOperandPredicate = [{
+ int64_t Imm;
+ if (MCOp.evaluateAsConstantImm(Imm))
+ return isUInt<12>(-Imm - 1);
+ return MCOp.isBareSymbolRef();
+ }];
+}
+
def nimm12 : nimm<12>;
def uimm1 : uimm<1>;
@@ -518,6 +543,9 @@ let Predicates = [iHasE2] in {
let Size = 8 in
def RSUBI32 : CSKYPseudo<(outs GPR:$rd), (ins GPR:$rx, uimm12:$imm12), "rsubi32 $rd, $rx, $imm12", []>;
+ def : Pat<(add GPR:$rs1, (oimm12_neg:$im)),
+ (SUBI32 GPR:$rs1, (imm_neg_XFORM oimm12_neg:$im))>;
+
def LSL32 : R_YXZ_SP_F1<0x10, 0x1,
BinOpFrag<(shl node:$LHS, node:$RHS)>, "lsl32">;
def LSR32 : R_YXZ_SP_F1<0x10, 0x2,
diff --git a/llvm/lib/Target/CSKY/CSKYInstrInfo16Instr.td b/llvm/lib/Target/CSKY/CSKYInstrInfo16Instr.td
index e84d73d36ce3b5..3e248019d73f9e 100644
--- a/llvm/lib/Target/CSKY/CSKYInstrInfo16Instr.td
+++ b/llvm/lib/Target/CSKY/CSKYInstrInfo16Instr.td
@@ -97,6 +97,9 @@ let Constraints = "$rZ = $rz", isReMaterializable = 1, isAsCheapAsAMove = 1 in {
def SUBI16 : I16_Z_8<0b101, (ins mGPR:$rZ, oimm8:$imm8), "subi16\t$rz, $imm8">;
}
+def : Pat<(add GPR:$rs1, (oimm8_neg:$im)),
+ (SUBI16 GPR:$rs1, (imm_neg_XFORM oimm8_neg:$im))>;
+
let isAdd = 1 in
def ADDI16ZSP : I16_Z_8<0b011, (ins GPRSP:$sp, uimm8_2:$imm8),
"addi16\t$rz, $sp, $imm8">;
diff --git a/llvm/test/CodeGen/CSKY/base-i.ll b/llvm/test/CodeGen/CSKY/base-i.ll
index 6e612cb3f5c4b5..65e20147ec66e5 100644
--- a/llvm/test/CodeGen/CSKY/base-i.ll
+++ b/llvm/test/CodeGen/CSKY/base-i.ll
@@ -40,6 +40,52 @@ entry:
ret i32 %add
}
+define i32 @addRI_256(i32 %x) {
+; CHECK-LABEL: addRI_256:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi16 a0, 256
+; CHECK-NEXT: rts16
+;
+; GENERIC-LABEL: addRI_256:
+; GENERIC: # %bb.0: # %entry
+; GENERIC-NEXT: .cfi_def_cfa_offset 0
+; GENERIC-NEXT: subi16 sp, sp, 4
+; GENERIC-NEXT: .cfi_def_cfa_offset 4
+; GENERIC-NEXT: addi16 a0, 256
+; GENERIC-NEXT: addi16 sp, sp, 4
+; GENERIC-NEXT: rts16
+entry:
+ %add = add nsw i32 %x, 256
+ ret i32 %add
+}
+
+define i32 @addRI_4096(i32 %x) {
+; CHECK-LABEL: addRI_4096:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi32 a0, a0, 4096
+; CHECK-NEXT: rts16
+;
+; GENERIC-LABEL: addRI_4096:
+; GENERIC: # %bb.0: # %entry
+; GENERIC-NEXT: .cfi_def_cfa_offset 0
+; GENERIC-NEXT: subi16 sp, sp, 4
+; GENERIC-NEXT: .cfi_def_cfa_offset 4
+; GENERIC-NEXT: movi16 a1, 0
+; GENERIC-NEXT: lsli16 a2, a1, 24
+; GENERIC-NEXT: lsli16 a3, a1, 16
+; GENERIC-NEXT: or16 a3, a2
+; GENERIC-NEXT: movi16 a2, 16
+; GENERIC-NEXT: lsli16 a2, a2, 8
+; GENERIC-NEXT: or16 a2, a3
+; GENERIC-NEXT: or16 a2, a1
+; GENERIC-NEXT: addu16 a0, a0, a2
+; GENERIC-NEXT: addi16 sp, sp, 4
+; GENERIC-NEXT: rts16
+entry:
+ %add = add nsw i32 %x, 4096
+ ret i32 %add
+}
+
define i32 @addRI_X(i32 %x) {
; CHECK-LABEL: addRI_X:
; CHECK: # %bb.0: # %entry
@@ -218,9 +264,7 @@ entry:
define i32 @subRI(i32 %x) {
; CHECK-LABEL: subRI:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: movih32 a1, 65535
-; CHECK-NEXT: ori32 a1, a1, 65526
-; CHECK-NEXT: addu16 a0, a1
+; CHECK-NEXT: subi16 a0, 10
; CHECK-NEXT: rts16
;
; GENERIC-LABEL: subRI:
@@ -228,19 +272,58 @@ define i32 @subRI(i32 %x) {
; GENERIC-NEXT: .cfi_def_cfa_offset 0
; GENERIC-NEXT: subi16 sp, sp, 4
; GENERIC-NEXT: .cfi_def_cfa_offset 4
+; GENERIC-NEXT: subi16 a0, 10
+; GENERIC-NEXT: addi16 sp, sp, 4
+; GENERIC-NEXT: rts16
+entry:
+ %sub = sub nsw i32 %x, 10
+ ret i32 %sub
+}
+
+define i32 @subRI_256(i32 %x) {
+; CHECK-LABEL: subRI_256:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: subi16 a0, 256
+; CHECK-NEXT: rts16
+;
+; GENERIC-LABEL: subRI_256:
+; GENERIC: # %bb.0: # %entry
+; GENERIC-NEXT: .cfi_def_cfa_offset 0
+; GENERIC-NEXT: subi16 sp, sp, 4
+; GENERIC-NEXT: .cfi_def_cfa_offset 4
+; GENERIC-NEXT: subi16 a0, 256
+; GENERIC-NEXT: addi16 sp, sp, 4
+; GENERIC-NEXT: rts16
+entry:
+ %sub = sub nsw i32 %x, 256
+ ret i32 %sub
+}
+
+define i32 @subRI_4096(i32 %x) {
+; CHECK-LABEL: subRI_4096:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: subi32 a0, a0, 4096
+; CHECK-NEXT: rts16
+;
+; GENERIC-LABEL: subRI_4096:
+; GENERIC: # %bb.0: # %entry
+; GENERIC-NEXT: .cfi_def_cfa_offset 0
+; GENERIC-NEXT: subi16 sp, sp, 4
+; GENERIC-NEXT: .cfi_def_cfa_offset 4
; GENERIC-NEXT: movi16 a1, 255
; GENERIC-NEXT: lsli16 a2, a1, 24
-; GENERIC-NEXT: lsli16 a3, a1, 16
-; GENERIC-NEXT: or16 a3, a2
-; GENERIC-NEXT: lsli16 a1, a1, 8
-; GENERIC-NEXT: or16 a1, a3
-; GENERIC-NEXT: movi16 a2, 246
+; GENERIC-NEXT: lsli16 a1, a1, 16
+; GENERIC-NEXT: or16 a1, a2
+; GENERIC-NEXT: movi16 a2, 240
+; GENERIC-NEXT: lsli16 a2, a2, 8
; GENERIC-NEXT: or16 a2, a1
-; GENERIC-NEXT: addu16 a0, a0, a2
+; GENERIC-NEXT: movi16 a1, 0
+; GENERIC-NEXT: or16 a1, a2
+; GENERIC-NEXT: addu16 a0, a0, a1
; GENERIC-NEXT: addi16 sp, sp, 4
; GENERIC-NEXT: rts16
entry:
- %sub = sub nsw i32 %x, 10
+ %sub = sub nsw i32 %x, 4096
ret i32 %sub
}
@@ -365,9 +448,7 @@ entry:
define i16 @SUB_SHORT_I(i16 %x) {
; CHECK-LABEL: SUB_SHORT_I:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: movih32 a1, 65535
-; CHECK-NEXT: ori32 a1, a1, 65535
-; CHECK-NEXT: addu16 a0, a1
+; CHECK-NEXT: subi16 a0, a0, 1
; CHECK-NEXT: rts16
;
; GENERIC-LABEL: SUB_SHORT_I:
@@ -375,14 +456,7 @@ define i16 @SUB_SHORT_I(i16 %x) {
; GENERIC-NEXT: .cfi_def_cfa_offset 0
; GENERIC-NEXT: subi16 sp, sp, 4
; GENERIC-NEXT: .cfi_def_cfa_offset 4
-; GENERIC-NEXT: movi16 a1, 255
-; GENERIC-NEXT: lsli16 a2, a1, 24
-; GENERIC-NEXT: lsli16 a3, a1, 16
-; GENERIC-NEXT: or16 a3, a2
-; GENERIC-NEXT: lsli16 a2, a1, 8
-; GENERIC-NEXT: or16 a2, a3
-; GENERIC-NEXT: or16 a2, a1
-; GENERIC-NEXT: addu16 a0, a0, a2
+; GENERIC-NEXT: subi16 a0, 1
; GENERIC-NEXT: addi16 sp, sp, 4
; GENERIC-NEXT: rts16
entry:
@@ -412,9 +486,7 @@ entry:
define i8 @SUB_CHAR_I(i8 %x) {
; CHECK-LABEL: SUB_CHAR_I:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: movih32 a1, 65535
-; CHECK-NEXT: ori32 a1, a1, 65535
-; CHECK-NEXT: addu16 a0, a1
+; CHECK-NEXT: subi16 a0, a0, 1
; CHECK-NEXT: rts16
;
; GENERIC-LABEL: SUB_CHAR_I:
@@ -422,14 +494,7 @@ define i8 @SUB_CHAR_I(i8 %x) {
; GENERIC-NEXT: .cfi_def_cfa_offset 0
; GENERIC-NEXT: subi16 sp, sp, 4
; GENERIC-NEXT: .cfi_def_cfa_offset 4
-; GENERIC-NEXT: movi16 a1, 255
-; GENERIC-NEXT: lsli16 a2, a1, 24
-; GENERIC-NEXT: lsli16 a3, a1, 16
-; GENERIC-NEXT: or16 a3, a2
-; GENERIC-NEXT: lsli16 a2, a1, 8
-; GENERIC-NEXT: or16 a2, a3
-; GENERIC-NEXT: or16 a2, a1
-; GENERIC-NEXT: addu16 a0, a0, a2
+; GENERIC-NEXT: subi16 a0, 1
; GENERIC-NEXT: addi16 sp, sp, 4
; GENERIC-NEXT: rts16
entry:
@@ -595,7 +660,7 @@ define i32 @udivRR(i32 %x, i32 %y) {
; GENERIC-NEXT: subi16 sp, sp, 4
; GENERIC-NEXT: .cfi_def_cfa_offset 8
; GENERIC-NEXT: mov16 a2, a0
-; GENERIC-NEXT: lrw32 a3, [.LCPI25_0]
+; GENERIC-NEXT: lrw32 a3, [.LCPI29_0]
; GENERIC-NEXT: mov16 a0, a1
; GENERIC-NEXT: mov16 a1, a2
; GENERIC-NEXT: jsr16 a3
@@ -606,7 +671,7 @@ define i32 @udivRR(i32 %x, i32 %y) {
; GENERIC-NEXT: .p2align 1
; GENERIC-NEXT: # %bb.1:
; GENERIC-NEXT: .p2align 2, 0x0
-; GENERIC-NEXT: .LCPI25_0:
+; GENERIC-NEXT: .LCPI29_0:
; GENERIC-NEXT: .long __udivsi3
entry:
%udiv = udiv i32 %y, %x
@@ -628,7 +693,7 @@ define i32 @udivRI(i32 %x) {
; GENERIC-NEXT: .cfi_offset lr, -4
; GENERIC-NEXT: subi16 sp, sp, 4
; GENERIC-NEXT: .cfi_def_cfa_offset 8
-; GENERIC-NEXT: lrw32 a2, [.LCPI26_0]
+; GENERIC-NEXT: lrw32 a2, [.LCPI30_0]
; GENERIC-NEXT: movi16 a1, 10
; GENERIC-NEXT: jsr16 a2
; GENERIC-NEXT: addi16 sp, sp, 4
@@ -638,7 +703,7 @@ define i32 @udivRI(i32 %x) {
; GENERIC-NEXT: .p2align 1
; GENERIC-NEXT: # %bb.1:
; GENERIC-NEXT: .p2align 2, 0x0
-; GENERIC-NEXT: .LCPI26_0:
+; GENERIC-NEXT: .LCPI30_0:
; GENERIC-NEXT: .long __udivsi3
entry:
%udiv = udiv i32 %x, 10
@@ -669,7 +734,7 @@ define i32 @udivRI_X(i32 %x) {
; GENERIC-NEXT: or16 a2, a1
; GENERIC-NEXT: movi16 a1, 1
; GENERIC-NEXT: or16 a1, a2
-; GENERIC-NEXT: lrw32 a2, [.LCPI27_0]
+; GENERIC-NEXT: lrw32 a2, [.LCPI31_0]
; GENERIC-NEXT: jsr16 a2
; GENERIC-NEXT: addi16 sp, sp, 4
; GENERIC-NEXT: ld32.w lr, (sp, 0) # 4-byte Folded Reload
@@ -678,7 +743,7 @@ define i32 @udivRI_X(i32 %x) {
; GENERIC-NEXT: .p2align 1
; GENERIC-NEXT: # %bb.1:
; GENERIC-NEXT: .p2align 2, 0x0
-; GENERIC-NEXT: .LCPI27_0:
+; GENERIC-NEXT: .LCPI31_0:
; GENERIC-NEXT: .long __udivsi3
entry:
%udiv = udiv i32 %x, 4097
@@ -714,7 +779,7 @@ define i16 @UDIV_SHORT(i16 %x, i16 %y) {
; GENERIC-NEXT: or16 a1, a3
; GENERIC-NEXT: and16 a2, a1
; GENERIC-NEXT: and16 a1, a0
-; GENERIC-NEXT: lrw32 a3, [.LCPI28_0]
+; GENERIC-NEXT: lrw32 a3, [.LCPI32_0]
; GENERIC-NEXT: mov16 a0, a2
; GENERIC-NEXT: jsr16 a3
; GENERIC-NEXT: addi16 sp, sp, 4
@@ -725,7 +790,7 @@ define i16 @UDIV_SHORT(i16 %x, i16 %y) {
; GENERIC-NEXT: .p2align 1
; GENERIC-NEXT: # %bb.1:
; GENERIC-NEXT: .p2align 2, 0x0
-; GENERIC-NEXT: .LCPI28_0:
+; GENERIC-NEXT: .LCPI32_0:
; GENERIC-NEXT: .long __udivsi3
entry:
%udiv = udiv i16 %y, %x
@@ -789,7 +854,7 @@ define i8 @UDIV_CHAR(i8 %x, i8 %y) {
; GENERIC-NEXT: movi16 a1, 255
; GENERIC-NEXT: and16 a2, a1
; GENERIC-NEXT: and16 a1, a0
-; GENERIC-NEXT: lrw32 a3, [.LCPI30_0]
+; GENERIC-NEXT: lrw32 a3, [.LCPI34_0]
; GENERIC-NEXT: mov16 a0, a2
; GENERIC-NEXT: jsr16 a3
; GENERIC-NEXT: addi16 sp, sp, 4
@@ -799,7 +864,7 @@ define i8 @UDIV_CHAR(i8 %x, i8 %y) {
; GENERIC-NEXT: .p2align 1
; GENERIC-NEXT: # %bb.1:
; GENERIC-NEXT: .p2align 2, 0x0
-; GENERIC-NEXT: .LCPI30_0:
+; GENERIC-NEXT: .LCPI34_0:
; GENERIC-NEXT: .long __udivsi3
entry:
%udiv = udiv i8 %y, %x
@@ -847,7 +912,7 @@ define i32 @sdivRR(i32 %x, i32 %y) {
; GENERIC-NEXT: subi16 sp, sp, 4
; GENERIC-NEXT: .cfi_def_cfa_offset 8
; GENERIC-NEXT: mov16 a2, a0
-; GENERIC-NEXT: lrw32 a3, [.LCPI32_0]
+; GENERIC-NEXT: lrw32 a3, [.LCPI36_0]
; GENERIC-NEXT: mov16 a0, a1
; GENERIC-NEXT: mov16 a1, a2
; GENERIC-NEXT: jsr16 a3
@@ -858,7 +923,7 @@ define i32 @sdivRR(i32 %x, i32 %y) {
; GENERIC-NEXT: .p2align 1
; GENERIC-NEXT: # %bb.1:
; GENERIC-NEXT: .p2align 2, 0x0
-; GENERIC-NEXT: .LCPI32_0:
+; GENERIC-NEXT: .LCPI36_0:
; GENERIC-NEXT: .long __divsi3
entry:
%sdiv = sdiv i32 %y, %x
@@ -880,7 +945,7 @@ define i32 @sdivRI(i32 %x) {
; GENERIC-NEXT: .cfi_offset lr, -4
; GENERIC-NEXT: subi16 sp, sp, 4
; GENERIC-NEXT: .cfi_def_cfa_offset 8
-; GENERIC-NEXT: lrw32 a2, [.LCPI33_0]
+; GENERIC-NEXT: lrw32 a2, [.LCPI37_0]
; GENERIC-NEXT: movi16 a1, 10
; GENERIC-NEXT: jsr16 a2
; GENERIC-NEXT: addi16 sp, sp, 4
@@ -890,7 +955,7 @@ define i32 @sdivRI(i32 %x) {
; GENERIC-NEXT: .p2align 1
; GENERIC-NEXT: # %bb.1:
; GENERIC-NEXT: .p2align 2, 0x0
-; GENERIC-NEXT: .LCPI33_0:
+; GENERIC-NEXT: .LCPI37_0:
; GENERIC-NEXT: .long __divsi3
entry:
%sdiv = sdiv i32 %x, 10
@@ -921,7 +986,7 @@ define i32 @sdivRI_X(i32 %x) {
; GENERIC-NEXT: or16 a2, a1
; GENERIC-NEXT: movi16 a1, 1
; GENERIC-NEXT: or16 a1, a2
-; GENERIC-NEXT: lrw32 a2, [.LCPI34_0]
+; GENERIC-NEXT: lrw32 a2, [.LCPI38_0]
; GENERIC-NEXT: jsr16 a2
; GENERIC-NEXT: addi16 sp, sp, 4
; GENERIC-NEXT: ld32.w lr, (sp, 0) # 4-byte Folded Reload
@@ -930,7 +995,7 @@ define i32 @sdivRI_X(i32 %x) {
; GENERIC-NEXT: .p2align 1
; GENERIC-NEXT: # %bb.1:
; GENERIC-NEXT: .p2align 2, 0x0
-; GENERIC-NEXT: .LCPI34_0:
+; GENERIC-NEXT: .LCPI38_0:
; GENERIC-NEXT: .long __divsi3
entry:
%sdiv = sdiv i32 %x, 4097
@@ -955,7 +1020,7 @@ define i16 @SDIV_SHORT(i16 %x, i16 %y) {
; GENERIC-NEXT: .cfi_def_cfa_offset 8
; GENERIC-NEXT: sexth16 a2, a1
; GENERIC-NEXT: sexth16 a1, a0
-; GENERIC-NEXT: lrw32 a3, [.LCPI35_0]
+; GENERIC-NEXT: lrw32 a3, [.LCPI39_0]
; GENERIC-NEXT: mov16 a0, a2
; GENERIC-NEXT: jsr16 a3
; GENERIC-NEXT: addi16 sp, sp, 4
@@ -965,7 +1030,7 @@ define i16 @SDIV_SHORT(i16 %x, i16 %y) {
; GENERIC-NEXT: .p2align 1
; GENERIC-NEXT: # %bb.1:
; GENERIC-NEXT: .p2align 2, 0x0
-; GENERIC-NEXT: .LCPI35_0:
+; GENERIC-NEXT: .LCPI39_0:
; GENERIC-NEXT: .long __divsi3
entry:
%sdiv = sdiv i16 %y, %x
@@ -1027,7 +1092,7 @@ define i8 @SDIV_CHAR(i8 %x, i8 %y) {
; GENERIC-NEXT: .cfi_def_cfa_offset 8
; GENERIC-NEXT: sextb16 a2, a1
; GENERIC-NEXT: sextb16 a1, a0
-; GENERIC-NEXT: lrw32 a3, [.LCPI37_0]
+; GENERIC-NEXT: lrw32 a3, [.LCPI41_0]
; GENERIC-NEXT: mov16 a0, a2
; GENERIC-NEXT: jsr16 a3
; GENERIC-NEXT: addi16 sp, sp, 4
@@ -1037,7 +1102,7 @@ define i8 @SDIV_CHAR(i8 %x, i8 %y) {
; GENERIC-NEXT: .p2align 1
; GENERIC-NEXT: # %bb.1:
; GENERIC-NEXT: .p2align 2, 0x0
-; GENERIC-NEXT: .LCPI37_0:
+; GENERIC-NEXT: .LCPI41_0:
; GENERIC-NEXT: .long __divsi3
entry:
%sdiv = sdiv i8 %y, %x
More information about the llvm-commits
mailing list