[llvm] 6a2442e - [AArch64] Increase AddedComplexity of BIC
Alexander Shaposhnikov via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 6 13:31:44 PDT 2022
Author: Alexander Shaposhnikov
Date: 2022-09-06T20:31:24Z
New Revision: 6a2442e9be2529ee00b0239330df068b91c92aab
URL: https://github.com/llvm/llvm-project/commit/6a2442e9be2529ee00b0239330df068b91c92aab
DIFF: https://github.com/llvm/llvm-project/commit/6a2442e9be2529ee00b0239330df068b91c92aab.diff
LOG: [AArch64] Increase AddedComplexity of BIC
This diff adjusts AddedComplexity of BIC to bump its position
in the list of patterns to make LLVM pick it instead of MVN + AND.
MVN + AND requires 2 cycles, so does e.g. MOV + BIC, but the latter
outperforms the former if the instructions producing the operands of
BIC can be issued in parallel.
One may consider the following example:
ldur x15, [x0, #2] # 4 cycles
mvn x10, x15 # 1 cycle (depends on ldur)
and x9, x10, #0x8080808080808080
vs.
ldur x15, [x0, #2] # 4 cycles
mov x9, #0x8080808080808080 # 1 cycle (can be executed in parallel with ldur)
bic x9, x9, x15. # 1 cycle
Test plan: ninja check-all
Differential revision: https://reviews.llvm.org/D133345
Added:
Modified:
llvm/lib/Target/AArch64/AArch64InstrFormats.td
llvm/lib/Target/AArch64/AArch64InstrInfo.td
llvm/test/CodeGen/AArch64/addsub.ll
llvm/test/CodeGen/AArch64/dag-combine-setcc.ll
llvm/test/CodeGen/AArch64/select_const.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index f703bf36866c7..455b874e7e697 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -2994,8 +2994,8 @@ class BaseLogicalRegPseudo<RegisterClass regtype, SDPatternOperator OpNode>
// Split from LogicalImm as not all instructions have both.
multiclass LogicalReg<bits<2> opc, bit N, string mnemonic,
- SDPatternOperator OpNode> {
- let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+ SDPatternOperator OpNode, int AddedComplexityVal = 0> {
+ let isReMaterializable = 1, isAsCheapAsAMove = 1, AddedComplexity = AddedComplexityVal in {
def Wrr : BaseLogicalRegPseudo<GPR32, OpNode>;
def Xrr : BaseLogicalRegPseudo<GPR64, OpNode>;
}
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index f6ec38a072793..a951bfafd8f31 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -2097,7 +2097,7 @@ defm BICS : LogicalRegS<0b11, 1, "bics",
BinOpFrag<(AArch64and_flag node:$LHS, (not node:$RHS))>>;
defm AND : LogicalReg<0b00, 0, "and", and>;
defm BIC : LogicalReg<0b00, 1, "bic",
- BinOpFrag<(and node:$LHS, (not node:$RHS))>>;
+ BinOpFrag<(and node:$LHS, (not node:$RHS))>, 3>;
defm EON : LogicalReg<0b10, 1, "eon",
BinOpFrag<(not (xor node:$LHS, node:$RHS))>>;
defm EOR : LogicalReg<0b10, 0, "eor", xor>;
diff --git a/llvm/test/CodeGen/AArch64/addsub.ll b/llvm/test/CodeGen/AArch64/addsub.ll
index dd36bf6e8d35b..ce92c88f0bff1 100644
--- a/llvm/test/CodeGen/AArch64/addsub.ll
+++ b/llvm/test/CodeGen/AArch64/addsub.ll
@@ -373,8 +373,8 @@ declare {i8, i1} @llvm.uadd.with.overflow.i8(i8 %a, i8 %b)
define i1 @uadd_add(i8 %a, i8 %b, i8* %p) {
; CHECK-LABEL: uadd_add:
; CHECK: // %bb.0:
-; CHECK-NEXT: mvn w8, w0
-; CHECK-NEXT: and w8, w8, #0xff
+; CHECK-NEXT: mov w8, #255
+; CHECK-NEXT: bic w8, w8, w0
; CHECK-NEXT: add w8, w8, w1, uxtb
; CHECK-NEXT: lsr w0, w8, #8
; CHECK-NEXT: add w8, w8, #1
diff --git a/llvm/test/CodeGen/AArch64/dag-combine-setcc.ll b/llvm/test/CodeGen/AArch64/dag-combine-setcc.ll
index ee892ae9cf518..f826a80940468 100644
--- a/llvm/test/CodeGen/AArch64/dag-combine-setcc.ll
+++ b/llvm/test/CodeGen/AArch64/dag-combine-setcc.ll
@@ -5,10 +5,10 @@ define i1 @combine_setcc_eq_vecreduce_or_v8i1(<8 x i8> %a) {
; CHECK-LABEL: combine_setcc_eq_vecreduce_or_v8i1:
; CHECK: // %bb.0:
; CHECK-NEXT: cmeq v0.8b, v0.8b, #0
+; CHECK-NEXT: mov w8, #1
; CHECK-NEXT: umaxv b0, v0.8b
-; CHECK-NEXT: fmov w8, s0
-; CHECK-NEXT: mvn w8, w8
-; CHECK-NEXT: and w0, w8, #0x1
+; CHECK-NEXT: fmov w9, s0
+; CHECK-NEXT: bic w0, w8, w9
; CHECK-NEXT: ret
%cmp1 = icmp eq <8 x i8> %a, zeroinitializer
%cast = bitcast <8 x i1> %cmp1 to i8
@@ -20,10 +20,10 @@ define i1 @combine_setcc_eq_vecreduce_or_v16i1(<16 x i8> %a) {
; CHECK-LABEL: combine_setcc_eq_vecreduce_or_v16i1:
; CHECK: // %bb.0:
; CHECK-NEXT: cmeq v0.16b, v0.16b, #0
+; CHECK-NEXT: mov w8, #1
; CHECK-NEXT: umaxv b0, v0.16b
-; CHECK-NEXT: fmov w8, s0
-; CHECK-NEXT: mvn w8, w8
-; CHECK-NEXT: and w0, w8, #0x1
+; CHECK-NEXT: fmov w9, s0
+; CHECK-NEXT: bic w0, w8, w9
; CHECK-NEXT: ret
%cmp1 = icmp eq <16 x i8> %a, zeroinitializer
%cast = bitcast <16 x i1> %cmp1 to i16
@@ -35,12 +35,12 @@ define i1 @combine_setcc_eq_vecreduce_or_v32i1(<32 x i8> %a) {
; CHECK-LABEL: combine_setcc_eq_vecreduce_or_v32i1:
; CHECK: // %bb.0:
; CHECK-NEXT: cmeq v1.16b, v1.16b, #0
+; CHECK-NEXT: mov w8, #1
; CHECK-NEXT: cmeq v0.16b, v0.16b, #0
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: umaxv b0, v0.16b
-; CHECK-NEXT: fmov w8, s0
-; CHECK-NEXT: mvn w8, w8
-; CHECK-NEXT: and w0, w8, #0x1
+; CHECK-NEXT: fmov w9, s0
+; CHECK-NEXT: bic w0, w8, w9
; CHECK-NEXT: ret
%cmp1 = icmp eq <32 x i8> %a, zeroinitializer
%cast = bitcast <32 x i1> %cmp1 to i32
@@ -52,6 +52,7 @@ define i1 @combine_setcc_eq_vecreduce_or_v64i1(<64 x i8> %a) {
; CHECK-LABEL: combine_setcc_eq_vecreduce_or_v64i1:
; CHECK: // %bb.0:
; CHECK-NEXT: cmeq v2.16b, v2.16b, #0
+; CHECK-NEXT: mov w8, #1
; CHECK-NEXT: cmeq v3.16b, v3.16b, #0
; CHECK-NEXT: cmeq v1.16b, v1.16b, #0
; CHECK-NEXT: cmeq v0.16b, v0.16b, #0
@@ -59,9 +60,8 @@ define i1 @combine_setcc_eq_vecreduce_or_v64i1(<64 x i8> %a) {
; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: umaxv b0, v0.16b
-; CHECK-NEXT: fmov w8, s0
-; CHECK-NEXT: mvn w8, w8
-; CHECK-NEXT: and w0, w8, #0x1
+; CHECK-NEXT: fmov w9, s0
+; CHECK-NEXT: bic w0, w8, w9
; CHECK-NEXT: ret
%cmp1 = icmp eq <64 x i8> %a, zeroinitializer
%cast = bitcast <64 x i1> %cmp1 to i64
diff --git a/llvm/test/CodeGen/AArch64/select_const.ll b/llvm/test/CodeGen/AArch64/select_const.ll
index 54065f69fdd1a..8da64191a268c 100644
--- a/llvm/test/CodeGen/AArch64/select_const.ll
+++ b/llvm/test/CodeGen/AArch64/select_const.ll
@@ -9,8 +9,8 @@
define i32 @select_0_or_1(i1 %cond) {
; CHECK-LABEL: select_0_or_1:
; CHECK: // %bb.0:
-; CHECK-NEXT: mvn w8, w0
-; CHECK-NEXT: and w0, w8, #0x1
+; CHECK-NEXT: mov w8, #1
+; CHECK-NEXT: bic w0, w8, w0
; CHECK-NEXT: ret
%sel = select i1 %cond, i32 0, i32 1
ret i32 %sel
@@ -28,8 +28,8 @@ define i32 @select_0_or_1_zeroext(i1 zeroext %cond) {
define i32 @select_0_or_1_signext(i1 signext %cond) {
; CHECK-LABEL: select_0_or_1_signext:
; CHECK: // %bb.0:
-; CHECK-NEXT: mvn w8, w0
-; CHECK-NEXT: and w0, w8, #0x1
+; CHECK-NEXT: mov w8, #1
+; CHECK-NEXT: bic w0, w8, w0
; CHECK-NEXT: ret
%sel = select i1 %cond, i32 0, i32 1
ret i32 %sel
More information about the llvm-commits
mailing list