[llvm-branch-commits] [llvm] [AArch64][DAG] Copy flags when narrowExtractedVectorBinOp-ing (PR #193446)

via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Wed Apr 22 02:13:24 PDT 2026


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-aarch64

Author: David Green (davemgreen)

<details>
<summary>Changes</summary>

The `extract (binop B0, B1), N` fold above already copies flags, use the same for the `extract (binop (concat X1, X2), Y), N` version. In this case it is helping copy disjoint or flags.

---
Full diff: https://github.com/llvm/llvm-project/pull/193446.diff


2 Files Affected:

- (modified) llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (+2-1) 
- (modified) llvm/test/CodeGen/AArch64/arm64-vmul.ll (+26-26) 


``````````diff
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 53953a776fdc1..78bb236dea06b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -26989,7 +26989,8 @@ static SDValue narrowExtractedVectorBinOp(EVT VT, SDValue Src, unsigned Index,
                         : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
                                       BinOp.getOperand(1), IndexC);
 
-    SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
+    SDValue NarrowBinOp =
+        DAG.getNode(BOpcode, DL, NarrowBVT, X, Y, BinOp->getFlags());
     return DAG.getBitcast(VT, NarrowBinOp);
   }
 
diff --git a/llvm/test/CodeGen/AArch64/arm64-vmul.ll b/llvm/test/CodeGen/AArch64/arm64-vmul.ll
index 83ec287f16363..fbf6df2b1fda4 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vmul.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vmul.ll
@@ -3630,10 +3630,10 @@ define <2 x i64> @sqdmlsl2_lane_2d_lib(<2 x i64> %dst, <4 x i32> %v1, <4 x i32>
 define <16 x i16> @or_sext_v16i8_i16(<16 x i8> %s0, <16 x i8> %s1, <16 x i16> %b) {
 ; CHECK-SD-LABEL: or_sext_v16i8_i16:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    smull2 v4.8h, v0.16b, v1.16b
+; CHECK-SD-NEXT:    smlal2 v3.8h, v0.16b, v1.16b
 ; CHECK-SD-NEXT:    smlal v2.8h, v0.8b, v1.8b
-; CHECK-SD-NEXT:    orr v1.16b, v4.16b, v3.16b
 ; CHECK-SD-NEXT:    mov v0.16b, v2.16b
+; CHECK-SD-NEXT:    mov v1.16b, v3.16b
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: or_sext_v16i8_i16:
@@ -3654,10 +3654,10 @@ entry:
 define <16 x i16> @or_zext_v16i8_i16(<16 x i8> %s0, <16 x i8> %s1, <16 x i16> %b) {
 ; CHECK-SD-LABEL: or_zext_v16i8_i16:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    umull2 v4.8h, v0.16b, v1.16b
+; CHECK-SD-NEXT:    umlal2 v3.8h, v0.16b, v1.16b
 ; CHECK-SD-NEXT:    umlal v2.8h, v0.8b, v1.8b
-; CHECK-SD-NEXT:    orr v1.16b, v4.16b, v3.16b
 ; CHECK-SD-NEXT:    mov v0.16b, v2.16b
+; CHECK-SD-NEXT:    mov v1.16b, v3.16b
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: or_zext_v16i8_i16:
@@ -3678,11 +3678,11 @@ entry:
 define <16 x i16> @or_sext_idx_v16i8_i16(<16 x i8> %s0, <16 x i8> %s1, <16 x i16> %b) {
 ; CHECK-SD-LABEL: or_sext_idx_v16i8_i16:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    dup v4.16b, v0.b[3]
+; CHECK-SD-NEXT:    dup v0.16b, v0.b[3]
+; CHECK-SD-NEXT:    smlal2 v3.8h, v0.16b, v1.16b
+; CHECK-SD-NEXT:    smlal v2.8h, v0.8b, v1.8b
 ; CHECK-SD-NEXT:    mov v0.16b, v2.16b
-; CHECK-SD-NEXT:    smull2 v5.8h, v4.16b, v1.16b
-; CHECK-SD-NEXT:    smlal v0.8h, v4.8b, v1.8b
-; CHECK-SD-NEXT:    orr v1.16b, v5.16b, v3.16b
+; CHECK-SD-NEXT:    mov v1.16b, v3.16b
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: or_sext_idx_v16i8_i16:
@@ -3706,11 +3706,11 @@ entry:
 define <16 x i16> @or_zext_idx_v16i8_i16(<16 x i8> %s0, <16 x i8> %s1, <16 x i16> %b) {
 ; CHECK-SD-LABEL: or_zext_idx_v16i8_i16:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    dup v4.16b, v0.b[3]
+; CHECK-SD-NEXT:    dup v0.16b, v0.b[3]
+; CHECK-SD-NEXT:    umlal2 v3.8h, v0.16b, v1.16b
+; CHECK-SD-NEXT:    umlal v2.8h, v0.8b, v1.8b
 ; CHECK-SD-NEXT:    mov v0.16b, v2.16b
-; CHECK-SD-NEXT:    umull2 v5.8h, v4.16b, v1.16b
-; CHECK-SD-NEXT:    umlal v0.8h, v4.8b, v1.8b
-; CHECK-SD-NEXT:    orr v1.16b, v5.16b, v3.16b
+; CHECK-SD-NEXT:    mov v1.16b, v3.16b
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: or_zext_idx_v16i8_i16:
@@ -3761,10 +3761,10 @@ entry:
 define <8 x i32> @or_zext_v8i16_i32(<8 x i16> %s0, <8 x i16> %s1, <8 x i32> %b) {
 ; CHECK-SD-LABEL: or_zext_v8i16_i32:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    umull2 v4.4s, v0.8h, v1.8h
+; CHECK-SD-NEXT:    umlal2 v3.4s, v0.8h, v1.8h
 ; CHECK-SD-NEXT:    umlal v2.4s, v0.4h, v1.4h
-; CHECK-SD-NEXT:    orr v1.16b, v4.16b, v3.16b
 ; CHECK-SD-NEXT:    mov v0.16b, v2.16b
+; CHECK-SD-NEXT:    mov v1.16b, v3.16b
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: or_zext_v8i16_i32:
@@ -3785,10 +3785,10 @@ entry:
 define <8 x i32> @or_sext_idx_v8i16_i32(<8 x i16> %s0, <8 x i16> %s1, <8 x i32> %b) {
 ; CHECK-SD-LABEL: or_sext_idx_v8i16_i32:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    smull2 v4.4s, v1.8h, v0.h[3]
+; CHECK-SD-NEXT:    smlal2 v3.4s, v1.8h, v0.h[3]
 ; CHECK-SD-NEXT:    smlal v2.4s, v1.4h, v0.h[3]
-; CHECK-SD-NEXT:    orr v1.16b, v4.16b, v3.16b
 ; CHECK-SD-NEXT:    mov v0.16b, v2.16b
+; CHECK-SD-NEXT:    mov v1.16b, v3.16b
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: or_sext_idx_v8i16_i32:
@@ -3811,10 +3811,10 @@ entry:
 define <8 x i32> @or_zext_idx_v8i16_i32(<8 x i16> %s0, <8 x i16> %s1, <8 x i32> %b) {
 ; CHECK-SD-LABEL: or_zext_idx_v8i16_i32:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    umull2 v4.4s, v1.8h, v0.h[3]
+; CHECK-SD-NEXT:    umlal2 v3.4s, v1.8h, v0.h[3]
 ; CHECK-SD-NEXT:    umlal v2.4s, v1.4h, v0.h[3]
-; CHECK-SD-NEXT:    orr v1.16b, v4.16b, v3.16b
 ; CHECK-SD-NEXT:    mov v0.16b, v2.16b
+; CHECK-SD-NEXT:    mov v1.16b, v3.16b
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: or_zext_idx_v8i16_i32:
@@ -3837,10 +3837,10 @@ entry:
 define <4 x i64> @or_sext_v4i32_i64(<4 x i32> %s0, <4 x i32> %s1, <4 x i64> %b) {
 ; CHECK-SD-LABEL: or_sext_v4i32_i64:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    smull2 v4.2d, v0.4s, v1.4s
+; CHECK-SD-NEXT:    smlal2 v3.2d, v0.4s, v1.4s
 ; CHECK-SD-NEXT:    smlal v2.2d, v0.2s, v1.2s
-; CHECK-SD-NEXT:    orr v1.16b, v4.16b, v3.16b
 ; CHECK-SD-NEXT:    mov v0.16b, v2.16b
+; CHECK-SD-NEXT:    mov v1.16b, v3.16b
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: or_sext_v4i32_i64:
@@ -3861,10 +3861,10 @@ entry:
 define <4 x i64> @or_zext_v4i32_i64(<4 x i32> %s0, <4 x i32> %s1, <4 x i64> %b) {
 ; CHECK-SD-LABEL: or_zext_v4i32_i64:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    umull2 v4.2d, v0.4s, v1.4s
+; CHECK-SD-NEXT:    umlal2 v3.2d, v0.4s, v1.4s
 ; CHECK-SD-NEXT:    umlal v2.2d, v0.2s, v1.2s
-; CHECK-SD-NEXT:    orr v1.16b, v4.16b, v3.16b
 ; CHECK-SD-NEXT:    mov v0.16b, v2.16b
+; CHECK-SD-NEXT:    mov v1.16b, v3.16b
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: or_zext_v4i32_i64:
@@ -3885,10 +3885,10 @@ entry:
 define <4 x i64> @or_sext_idx_v4i32_i64(<4 x i32> %s0, <4 x i32> %s1, <4 x i64> %b) {
 ; CHECK-SD-LABEL: or_sext_idx_v4i32_i64:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    smull2 v4.2d, v1.4s, v0.s[3]
+; CHECK-SD-NEXT:    smlal2 v3.2d, v1.4s, v0.s[3]
 ; CHECK-SD-NEXT:    smlal v2.2d, v1.2s, v0.s[3]
-; CHECK-SD-NEXT:    orr v1.16b, v4.16b, v3.16b
 ; CHECK-SD-NEXT:    mov v0.16b, v2.16b
+; CHECK-SD-NEXT:    mov v1.16b, v3.16b
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: or_sext_idx_v4i32_i64:
@@ -3911,10 +3911,10 @@ entry:
 define <4 x i64> @or_zext_idx_v4i32_i64(<4 x i32> %s0, <4 x i32> %s1, <4 x i64> %b) {
 ; CHECK-SD-LABEL: or_zext_idx_v4i32_i64:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    umull2 v4.2d, v1.4s, v0.s[3]
+; CHECK-SD-NEXT:    umlal2 v3.2d, v1.4s, v0.s[3]
 ; CHECK-SD-NEXT:    umlal v2.2d, v1.2s, v0.s[3]
-; CHECK-SD-NEXT:    orr v1.16b, v4.16b, v3.16b
 ; CHECK-SD-NEXT:    mov v0.16b, v2.16b
+; CHECK-SD-NEXT:    mov v1.16b, v3.16b
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: or_zext_idx_v4i32_i64:

``````````

</details>


https://github.com/llvm/llvm-project/pull/193446


More information about the llvm-branch-commits mailing list