[llvm-branch-commits] [llvm] [AArch64][DAG] Copy flags when narrowExtractedVectorBinOp-ing (PR #193446)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Wed Apr 22 02:13:24 PDT 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: David Green (davemgreen)
<details>
<summary>Changes</summary>
The `extract (binop B0, B1), N` fold above already copies flags, use the same for the `extract (binop (concat X1, X2), Y), N` version. In this case it is helping copy disjoint or flags.
---
Full diff: https://github.com/llvm/llvm-project/pull/193446.diff
2 Files Affected:
- (modified) llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (+2-1)
- (modified) llvm/test/CodeGen/AArch64/arm64-vmul.ll (+26-26)
``````````diff
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 53953a776fdc1..78bb236dea06b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -26989,7 +26989,8 @@ static SDValue narrowExtractedVectorBinOp(EVT VT, SDValue Src, unsigned Index,
: DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
BinOp.getOperand(1), IndexC);
- SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
+ SDValue NarrowBinOp =
+ DAG.getNode(BOpcode, DL, NarrowBVT, X, Y, BinOp->getFlags());
return DAG.getBitcast(VT, NarrowBinOp);
}
diff --git a/llvm/test/CodeGen/AArch64/arm64-vmul.ll b/llvm/test/CodeGen/AArch64/arm64-vmul.ll
index 83ec287f16363..fbf6df2b1fda4 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vmul.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vmul.ll
@@ -3630,10 +3630,10 @@ define <2 x i64> @sqdmlsl2_lane_2d_lib(<2 x i64> %dst, <4 x i32> %v1, <4 x i32>
define <16 x i16> @or_sext_v16i8_i16(<16 x i8> %s0, <16 x i8> %s1, <16 x i16> %b) {
; CHECK-SD-LABEL: or_sext_v16i8_i16:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: smull2 v4.8h, v0.16b, v1.16b
+; CHECK-SD-NEXT: smlal2 v3.8h, v0.16b, v1.16b
; CHECK-SD-NEXT: smlal v2.8h, v0.8b, v1.8b
-; CHECK-SD-NEXT: orr v1.16b, v4.16b, v3.16b
; CHECK-SD-NEXT: mov v0.16b, v2.16b
+; CHECK-SD-NEXT: mov v1.16b, v3.16b
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: or_sext_v16i8_i16:
@@ -3654,10 +3654,10 @@ entry:
define <16 x i16> @or_zext_v16i8_i16(<16 x i8> %s0, <16 x i8> %s1, <16 x i16> %b) {
; CHECK-SD-LABEL: or_zext_v16i8_i16:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: umull2 v4.8h, v0.16b, v1.16b
+; CHECK-SD-NEXT: umlal2 v3.8h, v0.16b, v1.16b
; CHECK-SD-NEXT: umlal v2.8h, v0.8b, v1.8b
-; CHECK-SD-NEXT: orr v1.16b, v4.16b, v3.16b
; CHECK-SD-NEXT: mov v0.16b, v2.16b
+; CHECK-SD-NEXT: mov v1.16b, v3.16b
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: or_zext_v16i8_i16:
@@ -3678,11 +3678,11 @@ entry:
define <16 x i16> @or_sext_idx_v16i8_i16(<16 x i8> %s0, <16 x i8> %s1, <16 x i16> %b) {
; CHECK-SD-LABEL: or_sext_idx_v16i8_i16:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: dup v4.16b, v0.b[3]
+; CHECK-SD-NEXT: dup v0.16b, v0.b[3]
+; CHECK-SD-NEXT: smlal2 v3.8h, v0.16b, v1.16b
+; CHECK-SD-NEXT: smlal v2.8h, v0.8b, v1.8b
; CHECK-SD-NEXT: mov v0.16b, v2.16b
-; CHECK-SD-NEXT: smull2 v5.8h, v4.16b, v1.16b
-; CHECK-SD-NEXT: smlal v0.8h, v4.8b, v1.8b
-; CHECK-SD-NEXT: orr v1.16b, v5.16b, v3.16b
+; CHECK-SD-NEXT: mov v1.16b, v3.16b
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: or_sext_idx_v16i8_i16:
@@ -3706,11 +3706,11 @@ entry:
define <16 x i16> @or_zext_idx_v16i8_i16(<16 x i8> %s0, <16 x i8> %s1, <16 x i16> %b) {
; CHECK-SD-LABEL: or_zext_idx_v16i8_i16:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: dup v4.16b, v0.b[3]
+; CHECK-SD-NEXT: dup v0.16b, v0.b[3]
+; CHECK-SD-NEXT: umlal2 v3.8h, v0.16b, v1.16b
+; CHECK-SD-NEXT: umlal v2.8h, v0.8b, v1.8b
; CHECK-SD-NEXT: mov v0.16b, v2.16b
-; CHECK-SD-NEXT: umull2 v5.8h, v4.16b, v1.16b
-; CHECK-SD-NEXT: umlal v0.8h, v4.8b, v1.8b
-; CHECK-SD-NEXT: orr v1.16b, v5.16b, v3.16b
+; CHECK-SD-NEXT: mov v1.16b, v3.16b
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: or_zext_idx_v16i8_i16:
@@ -3761,10 +3761,10 @@ entry:
define <8 x i32> @or_zext_v8i16_i32(<8 x i16> %s0, <8 x i16> %s1, <8 x i32> %b) {
; CHECK-SD-LABEL: or_zext_v8i16_i32:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: umull2 v4.4s, v0.8h, v1.8h
+; CHECK-SD-NEXT: umlal2 v3.4s, v0.8h, v1.8h
; CHECK-SD-NEXT: umlal v2.4s, v0.4h, v1.4h
-; CHECK-SD-NEXT: orr v1.16b, v4.16b, v3.16b
; CHECK-SD-NEXT: mov v0.16b, v2.16b
+; CHECK-SD-NEXT: mov v1.16b, v3.16b
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: or_zext_v8i16_i32:
@@ -3785,10 +3785,10 @@ entry:
define <8 x i32> @or_sext_idx_v8i16_i32(<8 x i16> %s0, <8 x i16> %s1, <8 x i32> %b) {
; CHECK-SD-LABEL: or_sext_idx_v8i16_i32:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: smull2 v4.4s, v1.8h, v0.h[3]
+; CHECK-SD-NEXT: smlal2 v3.4s, v1.8h, v0.h[3]
; CHECK-SD-NEXT: smlal v2.4s, v1.4h, v0.h[3]
-; CHECK-SD-NEXT: orr v1.16b, v4.16b, v3.16b
; CHECK-SD-NEXT: mov v0.16b, v2.16b
+; CHECK-SD-NEXT: mov v1.16b, v3.16b
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: or_sext_idx_v8i16_i32:
@@ -3811,10 +3811,10 @@ entry:
define <8 x i32> @or_zext_idx_v8i16_i32(<8 x i16> %s0, <8 x i16> %s1, <8 x i32> %b) {
; CHECK-SD-LABEL: or_zext_idx_v8i16_i32:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: umull2 v4.4s, v1.8h, v0.h[3]
+; CHECK-SD-NEXT: umlal2 v3.4s, v1.8h, v0.h[3]
; CHECK-SD-NEXT: umlal v2.4s, v1.4h, v0.h[3]
-; CHECK-SD-NEXT: orr v1.16b, v4.16b, v3.16b
; CHECK-SD-NEXT: mov v0.16b, v2.16b
+; CHECK-SD-NEXT: mov v1.16b, v3.16b
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: or_zext_idx_v8i16_i32:
@@ -3837,10 +3837,10 @@ entry:
define <4 x i64> @or_sext_v4i32_i64(<4 x i32> %s0, <4 x i32> %s1, <4 x i64> %b) {
; CHECK-SD-LABEL: or_sext_v4i32_i64:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: smull2 v4.2d, v0.4s, v1.4s
+; CHECK-SD-NEXT: smlal2 v3.2d, v0.4s, v1.4s
; CHECK-SD-NEXT: smlal v2.2d, v0.2s, v1.2s
-; CHECK-SD-NEXT: orr v1.16b, v4.16b, v3.16b
; CHECK-SD-NEXT: mov v0.16b, v2.16b
+; CHECK-SD-NEXT: mov v1.16b, v3.16b
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: or_sext_v4i32_i64:
@@ -3861,10 +3861,10 @@ entry:
define <4 x i64> @or_zext_v4i32_i64(<4 x i32> %s0, <4 x i32> %s1, <4 x i64> %b) {
; CHECK-SD-LABEL: or_zext_v4i32_i64:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: umull2 v4.2d, v0.4s, v1.4s
+; CHECK-SD-NEXT: umlal2 v3.2d, v0.4s, v1.4s
; CHECK-SD-NEXT: umlal v2.2d, v0.2s, v1.2s
-; CHECK-SD-NEXT: orr v1.16b, v4.16b, v3.16b
; CHECK-SD-NEXT: mov v0.16b, v2.16b
+; CHECK-SD-NEXT: mov v1.16b, v3.16b
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: or_zext_v4i32_i64:
@@ -3885,10 +3885,10 @@ entry:
define <4 x i64> @or_sext_idx_v4i32_i64(<4 x i32> %s0, <4 x i32> %s1, <4 x i64> %b) {
; CHECK-SD-LABEL: or_sext_idx_v4i32_i64:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: smull2 v4.2d, v1.4s, v0.s[3]
+; CHECK-SD-NEXT: smlal2 v3.2d, v1.4s, v0.s[3]
; CHECK-SD-NEXT: smlal v2.2d, v1.2s, v0.s[3]
-; CHECK-SD-NEXT: orr v1.16b, v4.16b, v3.16b
; CHECK-SD-NEXT: mov v0.16b, v2.16b
+; CHECK-SD-NEXT: mov v1.16b, v3.16b
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: or_sext_idx_v4i32_i64:
@@ -3911,10 +3911,10 @@ entry:
define <4 x i64> @or_zext_idx_v4i32_i64(<4 x i32> %s0, <4 x i32> %s1, <4 x i64> %b) {
; CHECK-SD-LABEL: or_zext_idx_v4i32_i64:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: umull2 v4.2d, v1.4s, v0.s[3]
+; CHECK-SD-NEXT: umlal2 v3.2d, v1.4s, v0.s[3]
; CHECK-SD-NEXT: umlal v2.2d, v1.2s, v0.s[3]
-; CHECK-SD-NEXT: orr v1.16b, v4.16b, v3.16b
; CHECK-SD-NEXT: mov v0.16b, v2.16b
+; CHECK-SD-NEXT: mov v1.16b, v3.16b
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: or_zext_idx_v4i32_i64:
``````````
</details>
https://github.com/llvm/llvm-project/pull/193446
More information about the llvm-branch-commits
mailing list