[llvm] 317a1ad - [DAGCombiner] Fold *_EXTEND_INREG of one of CONCAT_VECTORS operands into *_EXTEND of operand
Roman Lebedev via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 4 14:12:56 PST 2023
Author: Roman Lebedev
Date: 2023-01-05T01:12:31+03:00
New Revision: 317a1adfe4927c06eee606a81ac9b01fd6d6df93
URL: https://github.com/llvm/llvm-project/commit/317a1adfe4927c06eee606a81ac9b01fd6d6df93
DIFF: https://github.com/llvm/llvm-project/commit/317a1adfe4927c06eee606a81ac9b01fd6d6df93.diff
LOG: [DAGCombiner] Fold *_EXTEND_INREG of one of CONCAT_VECTORS operands into *_EXTEND of operand
This appears to be the root problematic pattern
for AArch64 regression in D140677.
We already do this, and many more, as target-specific X86 combines,
so this isn't causing much of an impact.
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/test/CodeGen/AArch64/extend_inreg_of_concat_subvectors.ll
llvm/test/CodeGen/AArch64/zext-to-tbl.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index d23a150ba7aa9..9180ee18c3ee9 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -13542,6 +13542,39 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
return SDValue();
}
+static SDValue
+foldExtendVectorInregToExtendOfSubvector(SDNode *N, const TargetLowering &TLI,
+ SelectionDAG &DAG,
+ bool LegalOperations) {
+ unsigned InregOpcode = N->getOpcode();
+ unsigned Opcode = DAG.getOpcode_EXTEND(InregOpcode);
+
+ SDValue Src = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+ EVT SrcVT =
+ VT.changeVectorElementType(Src.getValueType().getVectorElementType());
+
+ assert((InregOpcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
+ InregOpcode == ISD::ZERO_EXTEND_VECTOR_INREG) &&
+ "Expected EXTEND_VECTOR_INREG dag node in input!");
+
+ // Profitability check: our operand must be an one-use CONCAT_VECTORS.
+ // FIXME: one-use check may be overly restrictive
+ if (!Src.hasOneUse() || Src.getOpcode() != ISD::CONCAT_VECTORS)
+ return SDValue();
+
+ // Profitability check: we must be extending exactly one of it's operands.
+ // FIXME: this is probably overly restrictive.
+ Src = Src.getOperand(0);
+ if (Src.getValueType() != SrcVT)
+ return SDValue();
+
+ if (LegalOperations && !TLI.isOperationLegal(Opcode, VT))
+ return SDValue();
+
+ return DAG.getNode(Opcode, SDLoc(N), VT, Src);
+}
+
SDValue DAGCombiner::visitEXTEND_VECTOR_INREG(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
@@ -13556,6 +13589,10 @@ SDValue DAGCombiner::visitEXTEND_VECTOR_INREG(SDNode *N) {
if (SimplifyDemandedVectorElts(SDValue(N, 0)))
return SDValue(N, 0);
+ if (SDValue R = foldExtendVectorInregToExtendOfSubvector(N, TLI, DAG,
+ LegalOperations))
+ return R;
+
return SDValue();
}
diff --git a/llvm/test/CodeGen/AArch64/extend_inreg_of_concat_subvectors.ll b/llvm/test/CodeGen/AArch64/extend_inreg_of_concat_subvectors.ll
index 86df50d699ebb..36144d0c0036c 100644
--- a/llvm/test/CodeGen/AArch64/extend_inreg_of_concat_subvectors.ll
+++ b/llvm/test/CodeGen/AArch64/extend_inreg_of_concat_subvectors.ll
@@ -7,12 +7,11 @@
define void @zext_of_concat(ptr %a, ptr %b, ptr %c, ptr %d) nounwind {
; CHECK-LABEL: zext_of_concat:
; CHECK: ; %bb.0:
-; CHECK-NEXT: ldr d0, [x1]
-; CHECK-NEXT: ldr d1, [x0]
-; CHECK-NEXT: movi.2d v2, #0000000000000000
-; CHECK-NEXT: add.2s v0, v1, v0
+; CHECK-NEXT: ldr d0, [x0]
+; CHECK-NEXT: ldr d1, [x1]
+; CHECK-NEXT: add.2s v0, v0, v1
; CHECK-NEXT: ldr q1, [x2]
-; CHECK-NEXT: zip1.4s v0, v0, v2
+; CHECK-NEXT: ushll.2d v0, v0, #0
; CHECK-NEXT: add.4s v0, v0, v1
; CHECK-NEXT: str q0, [x2]
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/zext-to-tbl.ll b/llvm/test/CodeGen/AArch64/zext-to-tbl.ll
index 65b80d16e1328..65b9cbe30c08a 100644
--- a/llvm/test/CodeGen/AArch64/zext-to-tbl.ll
+++ b/llvm/test/CodeGen/AArch64/zext-to-tbl.ll
@@ -993,61 +993,21 @@ exit:
ret void
}
-; CHECK-LABEL: lCPI11_0:
-; CHECK-NEXT: .byte 0 ; 0x0
-; CHECK-NEXT: .byte 255 ; 0xff
-; CHECK-NEXT: .byte 255 ; 0xff
-; CHECK-NEXT: .byte 255 ; 0xff
-; CHECK-NEXT: .byte 1 ; 0x1
-; CHECK-NEXT: .byte 255 ; 0xff
-; CHECK-NEXT: .byte 255 ; 0xff
-; CHECK-NEXT: .byte 255 ; 0xff
-; CHECK-NEXT: .byte 2 ; 0x2
-; CHECK-NEXT: .byte 255 ; 0xff
-; CHECK-NEXT: .byte 255 ; 0xff
-; CHECK-NEXT: .byte 255 ; 0xff
-; CHECK-NEXT: .byte 3 ; 0x3
-; CHECK-NEXT: .byte 255 ; 0xff
-; CHECK-NEXT: .byte 255 ; 0xff
-; CHECK-NEXT: .byte 255 ; 0xff
-
-; CHECK-BE-LABEL: .LCPI11_0:
-; CHECK-BE-NEXT: .byte 255 // 0xff
-; CHECK-BE-NEXT: .byte 255 // 0xff
-; CHECK-BE-NEXT: .byte 255 // 0xff
-; CHECK-BE-NEXT: .byte 0 // 0x0
-; CHECK-BE-NEXT: .byte 255 // 0xff
-; CHECK-BE-NEXT: .byte 255 // 0xff
-; CHECK-BE-NEXT: .byte 255 // 0xff
-; CHECK-BE-NEXT: .byte 1 // 0x1
-; CHECK-BE-NEXT: .byte 255 // 0xff
-; CHECK-BE-NEXT: .byte 255 // 0xff
-; CHECK-BE-NEXT: .byte 255 // 0xff
-; CHECK-BE-NEXT: .byte 2 // 0x2
-; CHECK-BE-NEXT: .byte 255 // 0xff
-; CHECK-BE-NEXT: .byte 255 // 0xff
-; CHECK-BE-NEXT: .byte 255 // 0xff
-; CHECK-BE-NEXT: .byte 3 // 0x3
-
define void @zext_v4i8_to_v4i32_in_loop(ptr %src, ptr %dst) {
; CHECK-LABEL: zext_v4i8_to_v4i32_in_loop:
; CHECK: ; %bb.0: ; %entry
-; CHECK-NEXT: Lloh12:
-; CHECK-NEXT: adrp x9, lCPI11_0 at PAGE
; CHECK-NEXT: mov x8, xzr
-; CHECK-NEXT: Lloh13:
-; CHECK-NEXT: ldr q0, [x9, lCPI11_0 at PAGEOFF]
; CHECK-NEXT: LBB11_1: ; %loop
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldr s1, [x0, x8]
+; CHECK-NEXT: ldr s0, [x0, x8]
; CHECK-NEXT: add x8, x8, #16
; CHECK-NEXT: cmp x8, #128
-; CHECK-NEXT: tbl.16b v1, { v1 }, v0
-; CHECK-NEXT: str q1, [x1], #64
+; CHECK-NEXT: ushll.8h v0, v0, #0
+; CHECK-NEXT: ushll.4s v0, v0, #0
+; CHECK-NEXT: str q0, [x1], #64
; CHECK-NEXT: b.ne LBB11_1
; CHECK-NEXT: ; %bb.2: ; %exit
; CHECK-NEXT: ret
-; CHECK-NEXT: .loh AdrpLdr Lloh12, Lloh13
;
; CHECK-BE-LABEL: zext_v4i8_to_v4i32_in_loop:
; CHECK-BE: // %bb.0: // %entry
@@ -1194,18 +1154,18 @@ exit:
define void @zext_v12i8_to_v12i32_in_loop(ptr %src, ptr %dst) {
; CHECK-LABEL: zext_v12i8_to_v12i32_in_loop:
; CHECK: ; %bb.0: ; %entry
-; CHECK-NEXT: Lloh14:
+; CHECK-NEXT: Lloh12:
; CHECK-NEXT: adrp x9, lCPI12_0 at PAGE
-; CHECK-NEXT: Lloh15:
+; CHECK-NEXT: Lloh13:
; CHECK-NEXT: adrp x10, lCPI12_1 at PAGE
-; CHECK-NEXT: Lloh16:
+; CHECK-NEXT: Lloh14:
; CHECK-NEXT: adrp x11, lCPI12_2 at PAGE
; CHECK-NEXT: mov x8, xzr
-; CHECK-NEXT: Lloh17:
+; CHECK-NEXT: Lloh15:
; CHECK-NEXT: ldr q0, [x9, lCPI12_0 at PAGEOFF]
-; CHECK-NEXT: Lloh18:
+; CHECK-NEXT: Lloh16:
; CHECK-NEXT: ldr q1, [x10, lCPI12_1 at PAGEOFF]
-; CHECK-NEXT: Lloh19:
+; CHECK-NEXT: Lloh17:
; CHECK-NEXT: ldr q2, [x11, lCPI12_2 at PAGEOFF]
; CHECK-NEXT: LBB12_1: ; %loop
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -1220,9 +1180,9 @@ define void @zext_v12i8_to_v12i32_in_loop(ptr %src, ptr %dst) {
; CHECK-NEXT: b.ne LBB12_1
; CHECK-NEXT: ; %bb.2: ; %exit
; CHECK-NEXT: ret
-; CHECK-NEXT: .loh AdrpLdr Lloh16, Lloh19
-; CHECK-NEXT: .loh AdrpLdr Lloh15, Lloh18
; CHECK-NEXT: .loh AdrpLdr Lloh14, Lloh17
+; CHECK-NEXT: .loh AdrpLdr Lloh13, Lloh16
+; CHECK-NEXT: .loh AdrpLdr Lloh12, Lloh15
;
; CHECK-BE-LABEL: zext_v12i8_to_v12i32_in_loop:
; CHECK-BE: // %bb.0: // %entry
@@ -2192,22 +2152,22 @@ exit:
define void @zext_v20i8_to_v20i24_in_loop(ptr %src, ptr %dst) {
; CHECK-LABEL: zext_v20i8_to_v20i24_in_loop:
; CHECK: ; %bb.0: ; %entry
-; CHECK-NEXT: Lloh20:
+; CHECK-NEXT: Lloh18:
; CHECK-NEXT: adrp x9, lCPI20_0 at PAGE
-; CHECK-NEXT: Lloh21:
+; CHECK-NEXT: Lloh19:
; CHECK-NEXT: adrp x10, lCPI20_1 at PAGE
-; CHECK-NEXT: Lloh22:
+; CHECK-NEXT: Lloh20:
; CHECK-NEXT: adrp x11, lCPI20_2 at PAGE
-; CHECK-NEXT: Lloh23:
+; CHECK-NEXT: Lloh21:
; CHECK-NEXT: adrp x12, lCPI20_3 at PAGE
; CHECK-NEXT: mov x8, xzr
-; CHECK-NEXT: Lloh24:
+; CHECK-NEXT: Lloh22:
; CHECK-NEXT: ldr q0, [x9, lCPI20_0 at PAGEOFF]
-; CHECK-NEXT: Lloh25:
+; CHECK-NEXT: Lloh23:
; CHECK-NEXT: ldr q1, [x10, lCPI20_1 at PAGEOFF]
-; CHECK-NEXT: Lloh26:
+; CHECK-NEXT: Lloh24:
; CHECK-NEXT: ldr q2, [x11, lCPI20_2 at PAGEOFF]
-; CHECK-NEXT: Lloh27:
+; CHECK-NEXT: Lloh25:
; CHECK-NEXT: ldr q3, [x12, lCPI20_3 at PAGEOFF]
; CHECK-NEXT: LBB20_1: ; %loop
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -2228,10 +2188,10 @@ define void @zext_v20i8_to_v20i24_in_loop(ptr %src, ptr %dst) {
; CHECK-NEXT: b.ne LBB20_1
; CHECK-NEXT: ; %bb.2: ; %exit
; CHECK-NEXT: ret
-; CHECK-NEXT: .loh AdrpLdr Lloh23, Lloh27
-; CHECK-NEXT: .loh AdrpLdr Lloh22, Lloh26
; CHECK-NEXT: .loh AdrpLdr Lloh21, Lloh25
; CHECK-NEXT: .loh AdrpLdr Lloh20, Lloh24
+; CHECK-NEXT: .loh AdrpLdr Lloh19, Lloh23
+; CHECK-NEXT: .loh AdrpLdr Lloh18, Lloh22
;
; CHECK-BE-LABEL: zext_v20i8_to_v20i24_in_loop:
; CHECK-BE: // %bb.0: // %entry
@@ -2519,30 +2479,30 @@ exit:
define void @zext_v23i8_to_v23i48_in_loop(ptr %src, ptr %dst) {
; CHECK-LABEL: zext_v23i8_to_v23i48_in_loop:
; CHECK: ; %bb.0: ; %entry
-; CHECK-NEXT: Lloh28:
+; CHECK-NEXT: Lloh26:
; CHECK-NEXT: adrp x9, lCPI21_0 at PAGE
-; CHECK-NEXT: Lloh29:
+; CHECK-NEXT: Lloh27:
; CHECK-NEXT: adrp x10, lCPI21_1 at PAGE
-; CHECK-NEXT: Lloh30:
+; CHECK-NEXT: Lloh28:
; CHECK-NEXT: adrp x11, lCPI21_2 at PAGE
; CHECK-NEXT: mov x8, xzr
-; CHECK-NEXT: Lloh31:
+; CHECK-NEXT: Lloh29:
; CHECK-NEXT: ldr q0, [x9, lCPI21_0 at PAGEOFF]
-; CHECK-NEXT: Lloh32:
+; CHECK-NEXT: Lloh30:
; CHECK-NEXT: adrp x9, lCPI21_3 at PAGE
-; CHECK-NEXT: Lloh33:
+; CHECK-NEXT: Lloh31:
; CHECK-NEXT: ldr q1, [x10, lCPI21_1 at PAGEOFF]
-; CHECK-NEXT: Lloh34:
+; CHECK-NEXT: Lloh32:
; CHECK-NEXT: adrp x10, lCPI21_4 at PAGE
-; CHECK-NEXT: Lloh35:
+; CHECK-NEXT: Lloh33:
; CHECK-NEXT: ldr q2, [x11, lCPI21_2 at PAGEOFF]
-; CHECK-NEXT: Lloh36:
+; CHECK-NEXT: Lloh34:
; CHECK-NEXT: adrp x11, lCPI21_5 at PAGE
-; CHECK-NEXT: Lloh37:
+; CHECK-NEXT: Lloh35:
; CHECK-NEXT: ldr q3, [x9, lCPI21_3 at PAGEOFF]
-; CHECK-NEXT: Lloh38:
+; CHECK-NEXT: Lloh36:
; CHECK-NEXT: ldr q4, [x10, lCPI21_4 at PAGEOFF]
-; CHECK-NEXT: Lloh39:
+; CHECK-NEXT: Lloh37:
; CHECK-NEXT: ldr q5, [x11, lCPI21_5 at PAGEOFF]
; CHECK-NEXT: LBB21_1: ; %loop
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -2570,15 +2530,15 @@ define void @zext_v23i8_to_v23i48_in_loop(ptr %src, ptr %dst) {
; CHECK-NEXT: b.ne LBB21_1
; CHECK-NEXT: ; %bb.2: ; %exit
; CHECK-NEXT: ret
-; CHECK-NEXT: .loh AdrpLdr Lloh36, Lloh39
-; CHECK-NEXT: .loh AdrpLdr Lloh34, Lloh38
-; CHECK-NEXT: .loh AdrpLdr Lloh32, Lloh37
-; CHECK-NEXT: .loh AdrpAdrp Lloh30, Lloh36
+; CHECK-NEXT: .loh AdrpLdr Lloh34, Lloh37
+; CHECK-NEXT: .loh AdrpLdr Lloh32, Lloh36
; CHECK-NEXT: .loh AdrpLdr Lloh30, Lloh35
-; CHECK-NEXT: .loh AdrpAdrp Lloh29, Lloh34
-; CHECK-NEXT: .loh AdrpLdr Lloh29, Lloh33
-; CHECK-NEXT: .loh AdrpAdrp Lloh28, Lloh32
-; CHECK-NEXT: .loh AdrpLdr Lloh28, Lloh31
+; CHECK-NEXT: .loh AdrpAdrp Lloh28, Lloh34
+; CHECK-NEXT: .loh AdrpLdr Lloh28, Lloh33
+; CHECK-NEXT: .loh AdrpAdrp Lloh27, Lloh32
+; CHECK-NEXT: .loh AdrpLdr Lloh27, Lloh31
+; CHECK-NEXT: .loh AdrpAdrp Lloh26, Lloh30
+; CHECK-NEXT: .loh AdrpLdr Lloh26, Lloh29
;
; CHECK-BE-LABEL: zext_v23i8_to_v23i48_in_loop:
; CHECK-BE: // %bb.0: // %entry
More information about the llvm-commits
mailing list