[llvm] abcbca2 - [AArch64] Fix big endian shuffle vector miscompile (#68673)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 20 02:24:25 PST 2023
Author: hstk30-hw
Date: 2023-11-20T10:24:20Z
New Revision: abcbca21cc2e8a2b256cd519df2b1559f29e8edd
URL: https://github.com/llvm/llvm-project/commit/abcbca21cc2e8a2b256cd519df2b1559f29e8edd
DIFF: https://github.com/llvm/llvm-project/commit/abcbca21cc2e8a2b256cd519df2b1559f29e8edd.diff
LOG: [AArch64] Fix big endian shuffle vector miscompile (#68673)
Fixes https://github.com/llvm/llvm-project/issues/65884
Added:
llvm/test/CodeGen/AArch64/fix-shuffle-vector-be-rev.ll
Modified:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/test/CodeGen/AArch64/aarch64-load-ext.ll
llvm/test/CodeGen/AArch64/aarch64-uzp1-combine.ll
llvm/test/CodeGen/AArch64/neon-bitcast.ll
llvm/test/CodeGen/AArch64/zext-to-tbl.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 47c57edbd16ccd3..d42ae4ff93a4442 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -11050,7 +11050,12 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
if (SrcEltTy == SmallestEltTy)
continue;
assert(ShuffleVT.getVectorElementType() == SmallestEltTy);
- Src.ShuffleVec = DAG.getNode(ISD::BITCAST, dl, ShuffleVT, Src.ShuffleVec);
+ if (DAG.getDataLayout().isBigEndian()) {
+ Src.ShuffleVec =
+ DAG.getNode(AArch64ISD::NVCAST, dl, ShuffleVT, Src.ShuffleVec);
+ } else {
+ Src.ShuffleVec = DAG.getNode(ISD::BITCAST, dl, ShuffleVT, Src.ShuffleVec);
+ }
Src.WindowScale =
SrcEltTy.getFixedSizeInBits() / SmallestEltTy.getFixedSizeInBits();
Src.WindowBase *= Src.WindowScale;
@@ -11102,7 +11107,12 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
SDValue Shuffle = DAG.getVectorShuffle(ShuffleVT, dl, ShuffleOps[0],
ShuffleOps[1], Mask);
- SDValue V = DAG.getNode(ISD::BITCAST, dl, VT, Shuffle);
+ SDValue V;
+ if (DAG.getDataLayout().isBigEndian()) {
+ V = DAG.getNode(AArch64ISD::NVCAST, dl, VT, Shuffle);
+ } else {
+ V = DAG.getNode(ISD::BITCAST, dl, VT, Shuffle);
+ }
LLVM_DEBUG(dbgs() << "Reshuffle, creating node: "; Shuffle.dump();
dbgs() << "Reshuffle, creating node: "; V.dump(););
diff --git a/llvm/test/CodeGen/AArch64/aarch64-load-ext.ll b/llvm/test/CodeGen/AArch64/aarch64-load-ext.ll
index cc7dffc497495a0..945a73b05f1baee 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-load-ext.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-load-ext.ll
@@ -140,7 +140,6 @@ define <3 x i32> @fsext_v3i32(ptr %a) {
; CHECK-BE-NEXT: ldr s0, [x0]
; CHECK-BE-NEXT: rev32 v0.8b, v0.8b
; CHECK-BE-NEXT: zip1 v0.8b, v0.8b, v0.8b
-; CHECK-BE-NEXT: rev16 v0.8b, v0.8b
; CHECK-BE-NEXT: ushll v0.4s, v0.4h, #0
; CHECK-BE-NEXT: shl v0.4s, v0.4s, #24
; CHECK-BE-NEXT: sshr v0.4s, v0.4s, #24
@@ -284,7 +283,6 @@ define <3 x i16> @fsext_v3i16(ptr %a) {
; CHECK-BE-NEXT: ldr s0, [x0]
; CHECK-BE-NEXT: rev32 v0.8b, v0.8b
; CHECK-BE-NEXT: zip1 v0.8b, v0.8b, v0.8b
-; CHECK-BE-NEXT: rev16 v0.8b, v0.8b
; CHECK-BE-NEXT: shl v0.4h, v0.4h, #8
; CHECK-BE-NEXT: sshr v0.4h, v0.4h, #8
; CHECK-BE-NEXT: rev64 v0.4h, v0.4h
@@ -447,7 +445,7 @@ define <4 x i8> @bitcast(i32 %0) {
; CHECK-BE-NEXT: fmov s0, w0
; CHECK-BE-NEXT: rev32 v0.8b, v0.8b
; CHECK-BE-NEXT: zip1 v0.8b, v0.8b, v0.8b
-; CHECK-BE-NEXT: rev64 v0.8b, v0.8b
+; CHECK-BE-NEXT: rev64 v0.4h, v0.4h
; CHECK-BE-NEXT: ret
%2 = bitcast i32 %0 to <4 x i8>
ret <4 x i8> %2
diff --git a/llvm/test/CodeGen/AArch64/aarch64-uzp1-combine.ll b/llvm/test/CodeGen/AArch64/aarch64-uzp1-combine.ll
index b2cb38c72bae832..d774d71d88f309b 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-uzp1-combine.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-uzp1-combine.ll
@@ -270,8 +270,6 @@ define i8 @trunc_v4i64_v4i8(<4 x i64> %input) {
; CHECK-BE-NEXT: ext v1.16b, v1.16b, v1.16b, #8
; CHECK-BE-NEXT: xtn v1.2s, v1.2d
; CHECK-BE-NEXT: xtn v0.2s, v0.2d
-; CHECK-BE-NEXT: rev32 v1.4h, v1.4h
-; CHECK-BE-NEXT: rev32 v0.4h, v0.4h
; CHECK-BE-NEXT: uzp1 v0.4h, v0.4h, v1.4h
; CHECK-BE-NEXT: addv h0, v0.4h
; CHECK-BE-NEXT: fmov w0, s0
diff --git a/llvm/test/CodeGen/AArch64/fix-shuffle-vector-be-rev.ll b/llvm/test/CodeGen/AArch64/fix-shuffle-vector-be-rev.ll
new file mode 100644
index 000000000000000..8b74de1c127dd43
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/fix-shuffle-vector-be-rev.ll
@@ -0,0 +1,47 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
+; RUN: llc < %s -mtriple=aarch64 | FileCheck %s --check-prefix=CHECKLE
+; RUN: llc < %s -mtriple=aarch64_be | FileCheck %s --check-prefix=CHECKBE
+
+define <4 x i16> @test_reconstructshuffle(<16 x i8> %a, <16 x i8> %b) nounwind {
+; CHECKLE-LABEL: test_reconstructshuffle:
+; CHECKLE: // %bb.0:
+; CHECKLE-NEXT: umov w8, v0.b[3]
+; CHECKLE-NEXT: umov w9, v0.b[2]
+; CHECKLE-NEXT: fmov s2, w8
+; CHECKLE-NEXT: umov w8, v0.b[1]
+; CHECKLE-NEXT: mov v2.h[1], w9
+; CHECKLE-NEXT: mov v2.h[2], w8
+; CHECKLE-NEXT: umov w8, v0.b[0]
+; CHECKLE-NEXT: ext v0.16b, v1.16b, v1.16b, #8
+; CHECKLE-NEXT: mov v2.h[3], w8
+; CHECKLE-NEXT: zip2 v0.8b, v0.8b, v0.8b
+; CHECKLE-NEXT: add v0.4h, v2.4h, v0.4h
+; CHECKLE-NEXT: bic v0.4h, #255, lsl #8
+; CHECKLE-NEXT: ret
+;
+; CHECKBE-LABEL: test_reconstructshuffle:
+; CHECKBE: // %bb.0:
+; CHECKBE-NEXT: rev64 v0.16b, v0.16b
+; CHECKBE-NEXT: rev64 v1.16b, v1.16b
+; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECKBE-NEXT: ext v1.16b, v1.16b, v1.16b, #8
+; CHECKBE-NEXT: umov w8, v0.b[3]
+; CHECKBE-NEXT: umov w9, v0.b[2]
+; CHECKBE-NEXT: fmov s2, w8
+; CHECKBE-NEXT: umov w8, v0.b[1]
+; CHECKBE-NEXT: mov v2.h[1], w9
+; CHECKBE-NEXT: mov v2.h[2], w8
+; CHECKBE-NEXT: umov w8, v0.b[0]
+; CHECKBE-NEXT: ext v0.16b, v1.16b, v1.16b, #8
+; CHECKBE-NEXT: mov v2.h[3], w8
+; CHECKBE-NEXT: zip2 v0.8b, v0.8b, v0.8b
+; CHECKBE-NEXT: add v0.4h, v2.4h, v0.4h
+; CHECKBE-NEXT: bic v0.4h, #255, lsl #8
+; CHECKBE-NEXT: rev64 v0.4h, v0.4h
+; CHECKBE-NEXT: ret
+ %tmp1 = shufflevector <16 x i8> %a, <16 x i8> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+ %tmp2 = shufflevector <16 x i8> %b, <16 x i8> undef, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
+ %tmp3 = add <4 x i8> %tmp1, %tmp2
+ %tmp4 = zext <4 x i8> %tmp3 to <4 x i16>
+ ret <4 x i16> %tmp4
+}
diff --git a/llvm/test/CodeGen/AArch64/neon-bitcast.ll b/llvm/test/CodeGen/AArch64/neon-bitcast.ll
index bfd59f3d813c85e..d06612e2332e6ee 100644
--- a/llvm/test/CodeGen/AArch64/neon-bitcast.ll
+++ b/llvm/test/CodeGen/AArch64/neon-bitcast.ll
@@ -545,7 +545,7 @@ define <4 x i8> @bitcast_i32_to_v4i8(i32 %word) {
; CHECK-BE-NEXT: fmov s0, w0
; CHECK-BE-NEXT: rev32 v0.8b, v0.8b
; CHECK-BE-NEXT: zip1 v0.8b, v0.8b, v0.8b
-; CHECK-BE-NEXT: rev64 v0.8b, v0.8b
+; CHECK-BE-NEXT: rev64 v0.4h, v0.4h
; CHECK-BE-NEXT: ret
%ret = bitcast i32 %word to <4 x i8>
ret <4 x i8> %ret
diff --git a/llvm/test/CodeGen/AArch64/zext-to-tbl.ll b/llvm/test/CodeGen/AArch64/zext-to-tbl.ll
index cf3955be99b4f10..0a3476e5f4cef63 100644
--- a/llvm/test/CodeGen/AArch64/zext-to-tbl.ll
+++ b/llvm/test/CodeGen/AArch64/zext-to-tbl.ll
@@ -1345,10 +1345,6 @@ define void @zext_v16i4_to_v16i32_in_loop(ptr %src, ptr %dst) {
; CHECK-BE-NEXT: zip1 v1.8b, v1.8b, v0.8b
; CHECK-BE-NEXT: zip2 v4.8b, v2.8b, v0.8b
; CHECK-BE-NEXT: zip1 v2.8b, v2.8b, v0.8b
-; CHECK-BE-NEXT: rev16 v3.8b, v3.8b
-; CHECK-BE-NEXT: rev16 v1.8b, v1.8b
-; CHECK-BE-NEXT: rev16 v4.8b, v4.8b
-; CHECK-BE-NEXT: rev16 v2.8b, v2.8b
; CHECK-BE-NEXT: ushll v3.4s, v3.4h, #0
; CHECK-BE-NEXT: ushll v1.4s, v1.4h, #0
; CHECK-BE-NEXT: and v3.16b, v3.16b, v0.16b
More information about the llvm-commits
mailing list