[llvm] db0286a - [AArch64]Enhance 'isBitfieldPositioningOp' to find pattern (shl(and(val,mask), N).
Mingming Liu via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 17 09:02:21 PDT 2022
Author: Mingming Liu
Date: 2022-10-17T09:01:29-07:00
New Revision: db0286a09626a3a29ceacbe95a1a05004f62242c
URL: https://github.com/llvm/llvm-project/commit/db0286a09626a3a29ceacbe95a1a05004f62242c
DIFF: https://github.com/llvm/llvm-project/commit/db0286a09626a3a29ceacbe95a1a05004f62242c.diff
LOG: [AArch64]Enhance 'isBitfieldPositioningOp' to find pattern (shl(and(val,mask), N).
Before this patch (and D135844)
- Given DAG node shl(op, N), isBitfieldPositioningOp uses (optionally shifted [1] ) op as the Src (least significant bits of Src are inserted into DstLSB of Dst node).
After this patch
- If op is and(val, mask), isBitfieldPositioningOp tries to see through and and find if val is a simpler source than op.
It helps in a similar (probably symmetric) way how isSeveralBitsExtractOpFromShr [2] optimizes isBitfieldExtractOpFromShr
Existing test cases are improved without regressions.
[1] https://github.com/llvm/llvm-project/blob/cbd8464595220b5ea76c70ac9965d84970c4b712/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp#L2546
[2] https://github.com/llvm/llvm-project/blob/cbd8464595220b5ea76c70ac9965d84970c4b712/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp#L2057
Differential Revision: https://reviews.llvm.org/D135850
Added:
Modified:
llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll
llvm/test/CodeGen/AArch64/arm64-neon-copy.ll
llvm/test/CodeGen/AArch64/arm64-nvcast.ll
llvm/test/CodeGen/AArch64/pr58350.ll
llvm/test/CodeGen/AArch64/vec_uaddo.ll
llvm/test/CodeGen/AArch64/vec_umulo.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index fd8e6b59e036..2ebdbb1e8453 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -442,6 +442,15 @@ static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc,
isIntImmediate(N->getOperand(1).getNode(), Imm);
}
+// isIntImmediateEq - This method tests to see if N is a constant operand that
+// is equivalent to 'ImmExpected'.
+static bool isIntImmediateEq(SDValue N, const uint64_t ImmExpected) {
+ uint64_t Imm;
+ if (!isIntImmediate(N.getNode(), Imm))
+ return false;
+ return Imm == ImmExpected;
+}
+
bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(
const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
switch(ConstraintID) {
@@ -2591,6 +2600,40 @@ static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op,
return true;
}
+// For node (shl (and val, mask), N)), returns true if the node is equivalent to
+// UBFIZ.
+static bool isSeveralBitsPositioningOpFromShl(const uint64_t ShlImm, SDValue Op,
+ SDValue &Src, int &DstLSB,
+ int &Width) {
+ // Caller should have verified that N is a left shift with constant shift
+ // amount; asserts that.
+ assert(Op.getOpcode() == ISD::SHL &&
+ "Op.getNode() should be a SHL node to call this function");
+ assert(isIntImmediateEq(Op.getOperand(1), ShlImm) &&
+ "Op.getNode() should shift ShlImm to call this function");
+
+ uint64_t AndImm = 0;
+ SDValue Op0 = Op.getOperand(0);
+ if (!isOpcWithIntImmediate(Op0.getNode(), ISD::AND, AndImm))
+ return false;
+
+ const uint64_t ShiftedAndImm = ((AndImm << ShlImm) >> ShlImm);
+ if (isMask_64(ShiftedAndImm)) {
+ // AndImm is a superset of (AllOnes >> ShlImm); in other words, AndImm
+ // should end with Mask, and could be prefixed with random bits if those
+ // bits are shifted out.
+ //
+ // For example, xyz11111 (with {x,y,z} being 0 or 1) is fine if ShlImm >= 3;
+ // the AND result corresponding to those bits are shifted out, so it's fine
+ // to not extract them.
+ Width = countTrailingOnes(ShiftedAndImm);
+ DstLSB = ShlImm;
+ Src = Op0.getOperand(0);
+ return true;
+ }
+ return false;
+}
+
static bool isBitfieldPositioningOpFromShl(SelectionDAG *CurDAG, SDValue Op,
bool BiggerPattern,
const uint64_t NonZeroBits,
@@ -2609,6 +2652,9 @@ static bool isBitfieldPositioningOpFromShl(SelectionDAG *CurDAG, SDValue Op,
if (!BiggerPattern && !Op.hasOneUse())
return false;
+ if (isSeveralBitsPositioningOpFromShl(ShlImm, Op, Src, DstLSB, Width))
+ return true;
+
DstLSB = countTrailingZeros(NonZeroBits);
Width = countTrailingOnes(NonZeroBits >> DstLSB);
diff --git a/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll b/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll
index 04940fbe9423..2bea1d22d52a 100644
--- a/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll
@@ -9114,13 +9114,12 @@ define i16 @load_single_extract_variable_index_i16(<8 x i16>* %A, i32 %idx) {
; CHECK: ; %bb.0:
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: ; kill: def $w1 killed $w1 def $x1
-; CHECK-NEXT: and x8, x1, #0x7
+; CHECK-NEXT: mov x8, sp
; CHECK-NEXT: ldr q0, [x0]
-; CHECK-NEXT: mov x9, sp
-; CHECK-NEXT: bfi x9, x8, #1, #3
+; CHECK-NEXT: ; kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT: bfi x8, x1, #1, #3
; CHECK-NEXT: str q0, [sp]
-; CHECK-NEXT: ldrh w0, [x9]
+; CHECK-NEXT: ldrh w0, [x8]
; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
%lv = load <8 x i16>, <8 x i16>* %A
diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll b/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll
index 9090fc1979fc..8f322ab1c362 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll
@@ -1222,12 +1222,11 @@ define <4 x i16> @test_extracts_inserts_varidx_extract(<8 x i16> %x, i32 %idx) {
; CHECK: // %bb.0:
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: mov x8, sp
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
-; CHECK-NEXT: and x8, x0, #0x7
-; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: str q0, [sp]
-; CHECK-NEXT: bfi x9, x8, #1, #3
-; CHECK-NEXT: ldr h1, [x9]
+; CHECK-NEXT: bfi x8, x0, #1, #3
+; CHECK-NEXT: ldr h1, [x8]
; CHECK-NEXT: mov v1.h[1], v0.h[1]
; CHECK-NEXT: mov v1.h[2], v0.h[2]
; CHECK-NEXT: mov v1.h[3], v0.h[3]
@@ -1250,11 +1249,10 @@ define <4 x i16> @test_extracts_inserts_varidx_insert(<8 x i16> %x, i32 %idx) {
; CHECK: // %bb.0:
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: add x8, sp, #8
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
-; CHECK-NEXT: and x8, x0, #0x3
-; CHECK-NEXT: add x9, sp, #8
-; CHECK-NEXT: bfi x9, x8, #1, #2
-; CHECK-NEXT: str h0, [x9]
+; CHECK-NEXT: bfi x8, x0, #1, #2
+; CHECK-NEXT: str h0, [x8]
; CHECK-NEXT: ldr d1, [sp, #8]
; CHECK-NEXT: mov v1.h[1], v0.h[1]
; CHECK-NEXT: mov v1.h[2], v0.h[2]
diff --git a/llvm/test/CodeGen/AArch64/arm64-nvcast.ll b/llvm/test/CodeGen/AArch64/arm64-nvcast.ll
index eb8f6f5c754d..c87cb5bd6a80 100644
--- a/llvm/test/CodeGen/AArch64/arm64-nvcast.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-nvcast.ll
@@ -6,13 +6,12 @@ define void @test(float * %p1, i32 %v1) {
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: ; kill: def $w1 killed $w1 def $x1
-; CHECK-NEXT: and x8, x1, #0x3
-; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: fmov.2d v0, #2.00000000
-; CHECK-NEXT: bfi x9, x8, #2, #2
+; CHECK-NEXT: mov x8, sp
+; CHECK-NEXT: ; kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT: bfi x8, x1, #2, #2
; CHECK-NEXT: str q0, [sp]
-; CHECK-NEXT: ldr s0, [x9]
+; CHECK-NEXT: ldr s0, [x8]
; CHECK-NEXT: str s0, [x0]
; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
@@ -27,13 +26,12 @@ define void @test2(float * %p1, i32 %v1) {
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: ; kill: def $w1 killed $w1 def $x1
-; CHECK-NEXT: and x8, x1, #0x3
-; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: movi.16b v0, #63
-; CHECK-NEXT: bfi x9, x8, #2, #2
+; CHECK-NEXT: mov x8, sp
+; CHECK-NEXT: ; kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT: bfi x8, x1, #2, #2
; CHECK-NEXT: str q0, [sp]
-; CHECK-NEXT: ldr s0, [x9]
+; CHECK-NEXT: ldr s0, [x8]
; CHECK-NEXT: str s0, [x0]
; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/pr58350.ll b/llvm/test/CodeGen/AArch64/pr58350.ll
index eaabb3d0cd40..efbdfb168502 100644
--- a/llvm/test/CodeGen/AArch64/pr58350.ll
+++ b/llvm/test/CodeGen/AArch64/pr58350.ll
@@ -11,13 +11,12 @@ define void @f(<1 x float> %a, i64 %b) {
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: adrp x8, .LCPI0_0
-; CHECK-NEXT: and x9, x0, #0x1
-; CHECK-NEXT: mov x10, sp
+; CHECK-NEXT: mov x9, sp
+; CHECK-NEXT: bfi x9, x0, #2, #1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI0_0]
-; CHECK-NEXT: bfi x10, x9, #2, #1
; CHECK-NEXT: str d1, [sp]
-; CHECK-NEXT: ldr s1, [x10]
+; CHECK-NEXT: ldr s1, [x9]
; CHECK-NEXT: mov v1.s[1], v0.s[0]
; CHECK-NEXT: str d1, [sp, #8]
; CHECK-NEXT: add sp, sp, #16
diff --git a/llvm/test/CodeGen/AArch64/vec_uaddo.ll b/llvm/test/CodeGen/AArch64/vec_uaddo.ll
index f192fae2ce92..eab5c8abd020 100644
--- a/llvm/test/CodeGen/AArch64/vec_uaddo.ll
+++ b/llvm/test/CodeGen/AArch64/vec_uaddo.ll
@@ -250,19 +250,17 @@ define <4 x i32> @uaddo_v4i1(<4 x i1> %a0, <4 x i1> %a1, <4 x i1>* %p2) nounwind
; CHECK-NEXT: and v0.8b, v0.8b, v2.8b
; CHECK-NEXT: add v0.4h, v0.4h, v1.4h
; CHECK-NEXT: umov w8, v0.h[1]
-; CHECK-NEXT: umov w9, v0.h[2]
-; CHECK-NEXT: umov w10, v0.h[0]
+; CHECK-NEXT: umov w9, v0.h[0]
+; CHECK-NEXT: umov w10, v0.h[2]
; CHECK-NEXT: umov w11, v0.h[3]
; CHECK-NEXT: and v1.8b, v0.8b, v2.8b
; CHECK-NEXT: cmeq v0.4h, v1.4h, v0.4h
-; CHECK-NEXT: and w8, w8, #0x1
-; CHECK-NEXT: and w9, w9, #0x1
+; CHECK-NEXT: bfi w9, w8, #1, #1
+; CHECK-NEXT: bfi w9, w10, #2, #1
; CHECK-NEXT: mvn v0.8b, v0.8b
+; CHECK-NEXT: bfi w9, w11, #3, #29
+; CHECK-NEXT: and w8, w9, #0xf
; CHECK-NEXT: sshll v0.4s, v0.4h, #0
-; CHECK-NEXT: bfi w10, w8, #1, #1
-; CHECK-NEXT: bfi w10, w9, #2, #1
-; CHECK-NEXT: bfi w10, w11, #3, #29
-; CHECK-NEXT: and w8, w10, #0xf
; CHECK-NEXT: strb w8, [x0]
; CHECK-NEXT: ret
%t = call {<4 x i1>, <4 x i1>} @llvm.uadd.with.overflow.v4i1(<4 x i1> %a0, <4 x i1> %a1)
diff --git a/llvm/test/CodeGen/AArch64/vec_umulo.ll b/llvm/test/CodeGen/AArch64/vec_umulo.ll
index 2bb7f7fcdd3e..0fccb574644f 100644
--- a/llvm/test/CodeGen/AArch64/vec_umulo.ll
+++ b/llvm/test/CodeGen/AArch64/vec_umulo.ll
@@ -300,15 +300,13 @@ define <4 x i32> @umulo_v4i1(<4 x i1> %a0, <4 x i1> %a1, <4 x i1>* %p2) nounwind
; CHECK-NEXT: movi v0.2d, #0000000000000000
; CHECK-NEXT: and v1.8b, v2.8b, v1.8b
; CHECK-NEXT: umov w8, v1.h[1]
-; CHECK-NEXT: umov w9, v1.h[2]
-; CHECK-NEXT: umov w10, v1.h[0]
+; CHECK-NEXT: umov w9, v1.h[0]
+; CHECK-NEXT: umov w10, v1.h[2]
; CHECK-NEXT: umov w11, v1.h[3]
-; CHECK-NEXT: and w8, w8, #0x1
-; CHECK-NEXT: and w9, w9, #0x1
-; CHECK-NEXT: bfi w10, w8, #1, #1
-; CHECK-NEXT: bfi w10, w9, #2, #1
-; CHECK-NEXT: bfi w10, w11, #3, #29
-; CHECK-NEXT: and w8, w10, #0xf
+; CHECK-NEXT: bfi w9, w8, #1, #1
+; CHECK-NEXT: bfi w9, w10, #2, #1
+; CHECK-NEXT: bfi w9, w11, #3, #29
+; CHECK-NEXT: and w8, w9, #0xf
; CHECK-NEXT: strb w8, [x0]
; CHECK-NEXT: ret
%t = call {<4 x i1>, <4 x i1>} @llvm.umul.with.overflow.v4i1(<4 x i1> %a0, <4 x i1> %a1)
More information about the llvm-commits
mailing list