[llvm-branch-commits] [llvm] release/22.x: [AArch64] Fix `shufflevector` miscompilation on `aarch64_be` (#193076) (PR #193744)

via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Thu Apr 23 06:41:39 PDT 2026


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-aarch64

Author: Folkert de Vries (folkertdev)

<details>
<summary>Changes</summary>

backport 7425ab9d95776777959767b6054024bd1725ae76 
pr: https://github.com/llvm/llvm-project/pull/193076

---
Full diff: https://github.com/llvm/llvm-project/pull/193744.diff


3 Files Affected:

- (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (+27-27) 
- (added) llvm/test/CodeGen/AArch64/aarch64_be-shuffle-vector.ll (+95) 
- (removed) llvm/test/CodeGen/AArch64/fix-shuffle-vector-be-rev.ll (-39) 


``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index a55b1facb103f..aa22f480c1537 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -24419,33 +24419,6 @@ static SDValue performUzpCombine(SDNode *N, SelectionDAG &DAG,
   if (N->getOpcode() == AArch64ISD::UZP2)
     return SDValue();
 
-  // uzp1(x, undef) -> concat(truncate(x), undef)
-  if (Op1.isUndef()) {
-    EVT BCVT = MVT::Other, HalfVT = MVT::Other;
-    switch (ResVT.getSimpleVT().SimpleTy) {
-    default:
-      break;
-    case MVT::v16i8:
-      BCVT = MVT::v8i16;
-      HalfVT = MVT::v8i8;
-      break;
-    case MVT::v8i16:
-      BCVT = MVT::v4i32;
-      HalfVT = MVT::v4i16;
-      break;
-    case MVT::v4i32:
-      BCVT = MVT::v2i64;
-      HalfVT = MVT::v2i32;
-      break;
-    }
-    if (BCVT != MVT::Other) {
-      SDValue BC = DAG.getBitcast(BCVT, Op0);
-      SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, HalfVT, BC);
-      return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Trunc,
-                         DAG.getUNDEF(HalfVT));
-    }
-  }
-
   if (SDValue Urshr = tryCombineExtendRShTrunc(N, DAG))
     return Urshr;
 
@@ -24487,6 +24460,33 @@ static SDValue performUzpCombine(SDNode *N, SelectionDAG &DAG,
   if (!DAG.getDataLayout().isLittleEndian())
     return SDValue();
 
+  // uzp1(x, undef) -> concat(truncate(x), undef)
+  if (Op1.isUndef()) {
+    EVT BCVT = MVT::Other, HalfVT = MVT::Other;
+    switch (ResVT.getSimpleVT().SimpleTy) {
+    default:
+      break;
+    case MVT::v16i8:
+      BCVT = MVT::v8i16;
+      HalfVT = MVT::v8i8;
+      break;
+    case MVT::v8i16:
+      BCVT = MVT::v4i32;
+      HalfVT = MVT::v4i16;
+      break;
+    case MVT::v4i32:
+      BCVT = MVT::v2i64;
+      HalfVT = MVT::v2i32;
+      break;
+    }
+    if (BCVT != MVT::Other) {
+      SDValue BC = DAG.getBitcast(BCVT, Op0);
+      SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, HalfVT, BC);
+      return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Trunc,
+                         DAG.getPOISON(HalfVT));
+    }
+  }
+
   // uzp1(bitcast(x), bitcast(y)) -> uzp1(x, y)
   // Example:
   // nxv4i32 = uzp1 bitcast(nxv4i32 x to nxv2i64), bitcast(nxv4i32 y to nxv2i64)
diff --git a/llvm/test/CodeGen/AArch64/aarch64_be-shuffle-vector.ll b/llvm/test/CodeGen/AArch64/aarch64_be-shuffle-vector.ll
new file mode 100644
index 0000000000000..4e60d99dbed36
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/aarch64_be-shuffle-vector.ll
@@ -0,0 +1,95 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
+; RUN: llc < %s -mtriple=aarch64 | FileCheck %s --check-prefix=CHECKLE
+; RUN: llc < %s -mtriple=aarch64_be | FileCheck %s --check-prefix=CHECKBE
+
+define <4 x i16> @test_reconstructshuffle(<16 x i8> %a, <16 x i8> %b) nounwind {
+; CHECKLE-LABEL: test_reconstructshuffle:
+; CHECKLE:       // %bb.0:
+; CHECKLE-NEXT:    mov b2, v0.b[3]
+; CHECKLE-NEXT:    ext v1.16b, v1.16b, v1.16b, #8
+; CHECKLE-NEXT:    mov v2.b[2], v0.b[2]
+; CHECKLE-NEXT:    mov v2.b[4], v0.b[1]
+; CHECKLE-NEXT:    mov v2.b[6], v0.b[0]
+; CHECKLE-NEXT:    zip2 v0.8b, v1.8b, v0.8b
+; CHECKLE-NEXT:    add v0.4h, v2.4h, v0.4h
+; CHECKLE-NEXT:    bic v0.4h, #255, lsl #8
+; CHECKLE-NEXT:    ret
+;
+; CHECKBE-LABEL: test_reconstructshuffle:
+; CHECKBE:       // %bb.0:
+; CHECKBE-NEXT:    rev64 v0.16b, v0.16b
+; CHECKBE-NEXT:    rev64 v1.16b, v1.16b
+; CHECKBE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; CHECKBE-NEXT:    ext v1.16b, v1.16b, v1.16b, #8
+; CHECKBE-NEXT:    mov b2, v0.b[3]
+; CHECKBE-NEXT:    ext v1.16b, v1.16b, v1.16b, #8
+; CHECKBE-NEXT:    mov v2.b[2], v0.b[2]
+; CHECKBE-NEXT:    mov v2.b[4], v0.b[1]
+; CHECKBE-NEXT:    mov v2.b[6], v0.b[0]
+; CHECKBE-NEXT:    zip2 v0.8b, v1.8b, v0.8b
+; CHECKBE-NEXT:    add v0.4h, v2.4h, v0.4h
+; CHECKBE-NEXT:    bic v0.4h, #255, lsl #8
+; CHECKBE-NEXT:    rev64 v0.4h, v0.4h
+; CHECKBE-NEXT:    ret
+  %tmp1 = shufflevector <16 x i8> %a, <16 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  %tmp2 = shufflevector <16 x i8> %b, <16 x i8> poison, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
+  %tmp3 = add <4 x i8> %tmp1, %tmp2
+  %tmp4 = zext <4 x i8> %tmp3 to <4 x i16>
+  ret <4 x i16> %tmp4
+}
+
+; a shufflevector of even elements can become just xtn on little-endian, but not on big-endian.
+define <8 x i8> @xtn_shuffle_even_v16i8(<16 x i8> %a) {
+; CHECKLE-LABEL: xtn_shuffle_even_v16i8:
+; CHECKLE:       // %bb.0:
+; CHECKLE-NEXT:    xtn v0.8b, v0.8h
+; CHECKLE-NEXT:    ret
+;
+; CHECKBE-LABEL: xtn_shuffle_even_v16i8:
+; CHECKBE:       // %bb.0:
+; CHECKBE-NEXT:    rev64 v0.16b, v0.16b
+; CHECKBE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; CHECKBE-NEXT:    uzp1 v0.16b, v0.16b, v0.16b
+; CHECKBE-NEXT:    rev64 v0.8b, v0.8b
+; CHECKBE-NEXT:    ret
+  %r = shufflevector <16 x i8> %a, <16 x i8> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+  ret <8 x i8> %r
+}
+
+; a shufflevector of even elements can become just xtn on little-endian, but not on big-endian.
+define <4 x i16> @xtn_shuffle_even_v8i16(<8 x i16> %a) {
+; CHECKLE-LABEL: xtn_shuffle_even_v8i16:
+; CHECKLE:       // %bb.0:
+; CHECKLE-NEXT:    xtn v0.4h, v0.4s
+; CHECKLE-NEXT:    ret
+;
+; CHECKBE-LABEL: xtn_shuffle_even_v8i16:
+; CHECKBE:       // %bb.0:
+; CHECKBE-NEXT:    rev64 v0.8h, v0.8h
+; CHECKBE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; CHECKBE-NEXT:    uzp1 v0.8h, v0.8h, v0.8h
+; CHECKBE-NEXT:    rev64 v0.4h, v0.4h
+; CHECKBE-NEXT:    ret
+  %r = shufflevector <8 x i16> %a, <8 x i16> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  ret <4 x i16> %r
+}
+
+; a shufflevector of even elements can become just xtn on little-endian, but not on big-endian.
+define <4 x i32> @xtn_shuffle_uzp1_poison_v4i32(<4 x i32> %a) {
+; CHECKLE-LABEL: xtn_shuffle_uzp1_poison_v4i32:
+; CHECKLE:       // %bb.0:
+; CHECKLE-NEXT:    xtn v0.2s, v0.2d
+; CHECKLE-NEXT:    ret
+;
+; CHECKBE-LABEL: xtn_shuffle_uzp1_poison_v4i32:
+; CHECKBE:       // %bb.0:
+; CHECKBE-NEXT:    rev64 v0.4s, v0.4s
+; CHECKBE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; CHECKBE-NEXT:    uzp1 v0.4s, v0.4s, v0.4s
+; CHECKBE-NEXT:    rev64 v0.4s, v0.4s
+; CHECKBE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; CHECKBE-NEXT:    ret
+  %r = shufflevector <4 x i32> %a, <4 x i32> poison, <4 x i32> <i32 0, i32 2, i32 poison, i32 poison>
+  ret <4 x i32> %r
+
+}
diff --git a/llvm/test/CodeGen/AArch64/fix-shuffle-vector-be-rev.ll b/llvm/test/CodeGen/AArch64/fix-shuffle-vector-be-rev.ll
deleted file mode 100644
index 65da95e0163f4..0000000000000
--- a/llvm/test/CodeGen/AArch64/fix-shuffle-vector-be-rev.ll
+++ /dev/null
@@ -1,39 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
-; RUN: llc < %s -mtriple=aarch64 | FileCheck %s --check-prefix=CHECKLE
-; RUN: llc < %s -mtriple=aarch64_be | FileCheck %s --check-prefix=CHECKBE
-
-define <4 x i16> @test_reconstructshuffle(<16 x i8> %a, <16 x i8> %b) nounwind {
-; CHECKLE-LABEL: test_reconstructshuffle:
-; CHECKLE:       // %bb.0:
-; CHECKLE-NEXT:    mov b2, v0.b[3]
-; CHECKLE-NEXT:    ext v1.16b, v1.16b, v1.16b, #8
-; CHECKLE-NEXT:    mov v2.b[2], v0.b[2]
-; CHECKLE-NEXT:    mov v2.b[4], v0.b[1]
-; CHECKLE-NEXT:    mov v2.b[6], v0.b[0]
-; CHECKLE-NEXT:    zip2 v0.8b, v1.8b, v0.8b
-; CHECKLE-NEXT:    add v0.4h, v2.4h, v0.4h
-; CHECKLE-NEXT:    bic v0.4h, #255, lsl #8
-; CHECKLE-NEXT:    ret
-;
-; CHECKBE-LABEL: test_reconstructshuffle:
-; CHECKBE:       // %bb.0:
-; CHECKBE-NEXT:    rev64 v0.16b, v0.16b
-; CHECKBE-NEXT:    rev64 v1.16b, v1.16b
-; CHECKBE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
-; CHECKBE-NEXT:    ext v1.16b, v1.16b, v1.16b, #8
-; CHECKBE-NEXT:    mov b2, v0.b[3]
-; CHECKBE-NEXT:    ext v1.16b, v1.16b, v1.16b, #8
-; CHECKBE-NEXT:    mov v2.b[2], v0.b[2]
-; CHECKBE-NEXT:    mov v2.b[4], v0.b[1]
-; CHECKBE-NEXT:    mov v2.b[6], v0.b[0]
-; CHECKBE-NEXT:    zip2 v0.8b, v1.8b, v0.8b
-; CHECKBE-NEXT:    add v0.4h, v2.4h, v0.4h
-; CHECKBE-NEXT:    bic v0.4h, #255, lsl #8
-; CHECKBE-NEXT:    rev64 v0.4h, v0.4h
-; CHECKBE-NEXT:    ret
-  %tmp1 = shufflevector <16 x i8> %a, <16 x i8> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-  %tmp2 = shufflevector <16 x i8> %b, <16 x i8> undef, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
-  %tmp3 = add <4 x i8> %tmp1, %tmp2
-  %tmp4 = zext <4 x i8> %tmp3 to <4 x i16>
-  ret <4 x i16> %tmp4
-}

``````````

</details>


https://github.com/llvm/llvm-project/pull/193744


More information about the llvm-branch-commits mailing list