[llvm] feat: fix big endian shuffle vector miscompile (PR #68673)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Oct 11 00:28:50 PDT 2023
https://github.com/hstk30-hw updated https://github.com/llvm/llvm-project/pull/68673
>From be4d279bb201f92a4322054949ee14f20997614c Mon Sep 17 00:00:00 2001
From: hstk30-hw <hanwei62 at huawei.com>
Date: Tue, 10 Oct 2023 16:31:55 +0800
Subject: [PATCH 1/2] feat: fix big endian shuffle vector miscompile
---
.../Target/AArch64/AArch64ISelLowering.cpp | 20 +++++++++++++++++++
llvm/test/CodeGen/AArch64/aarch64-load-ext.ll | 2 ++
.../AArch64/fix-shuffle-vector-be-rev.ll | 17 ++++++++++++++++
3 files changed, 39 insertions(+)
create mode 100644 llvm/test/CodeGen/AArch64/fix-shuffle-vector-be-rev.ll
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index ac1ca04e15046b7..e3dd54a0b59a0da 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -10914,6 +10914,26 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
SDValue Shuffle = DAG.getVectorShuffle(ShuffleVT, dl, ShuffleOps[0],
ShuffleOps[1], Mask);
+ if (DAG.getDataLayout().isBigEndian()) {
+ EVT SrcEltTy = ShuffleVT.getVectorElementType();
+ EVT DstEltTy = VT.getVectorElementType();
+ if (SrcEltTy != DstEltTy) {
+ unsigned REVOp = 0;
+ unsigned DstTypeSize = DstEltTy.getFixedSizeInBits();
+ unsigned SrcTypeSize = SrcEltTy.getFixedSizeInBits();
+ if (std::max(DstTypeSize, SrcTypeSize) == 16) {
+ REVOp = AArch64ISD::REV16;
+ } else if (std::max(DstTypeSize, SrcTypeSize) == 32) {
+ REVOp = AArch64ISD::REV32;
+ } else if (std::max(DstTypeSize, SrcTypeSize) == 64) {
+ REVOp = AArch64ISD::REV64;
+ }
+ if (REVOp) {
+ Shuffle = DAG.getNode(REVOp, dl, ShuffleVT, Shuffle);
+ }
+ }
+ }
+
SDValue V = DAG.getNode(ISD::BITCAST, dl, VT, Shuffle);
LLVM_DEBUG(dbgs() << "Reshuffle, creating node: "; Shuffle.dump();
diff --git a/llvm/test/CodeGen/AArch64/aarch64-load-ext.ll b/llvm/test/CodeGen/AArch64/aarch64-load-ext.ll
index cc7dffc497495a0..ba7dd0a6a2ca0dc 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-load-ext.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-load-ext.ll
@@ -141,6 +141,7 @@ define <3 x i32> @fsext_v3i32(ptr %a) {
; CHECK-BE-NEXT: rev32 v0.8b, v0.8b
; CHECK-BE-NEXT: zip1 v0.8b, v0.8b, v0.8b
; CHECK-BE-NEXT: rev16 v0.8b, v0.8b
+; CHECK-BE-NEXT: rev16 v0.8b, v0.8b
; CHECK-BE-NEXT: ushll v0.4s, v0.4h, #0
; CHECK-BE-NEXT: shl v0.4s, v0.4s, #24
; CHECK-BE-NEXT: sshr v0.4s, v0.4s, #24
@@ -285,6 +286,7 @@ define <3 x i16> @fsext_v3i16(ptr %a) {
; CHECK-BE-NEXT: rev32 v0.8b, v0.8b
; CHECK-BE-NEXT: zip1 v0.8b, v0.8b, v0.8b
; CHECK-BE-NEXT: rev16 v0.8b, v0.8b
+; CHECK-BE-NEXT: rev16 v0.8b, v0.8b
; CHECK-BE-NEXT: shl v0.4h, v0.4h, #8
; CHECK-BE-NEXT: sshr v0.4h, v0.4h, #8
; CHECK-BE-NEXT: rev64 v0.4h, v0.4h
diff --git a/llvm/test/CodeGen/AArch64/fix-shuffle-vector-be-rev.ll b/llvm/test/CodeGen/AArch64/fix-shuffle-vector-be-rev.ll
new file mode 100644
index 000000000000000..cfdf4b958fbe95c
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/fix-shuffle-vector-be-rev.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -mtriple=aarch64_be | FileCheck %s
+
+; one `rev` is from `bitcast`, another inserted for be
+
+ define <4 x i1> @insert_rev_for_reconstructshuffle(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+; CHECK-LABEL: insert_rev_for_reconstructshuffle:
+; CHECK: zip2 v[[V1:[0-9]+]].8b, v[[V2:[0-9]+]].8b, v[[V3:[0-9]+]].8b
+; CHECK: rev16
+; CHECK: rev16
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
+ %tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+ %tmp4 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
+ %tmp5 = icmp eq <4 x i8> %tmp3, %tmp4
+ %tmp6 = freeze <4 x i1> %tmp5
+ ret <4 x i1> %tmp6
+}
\ No newline at end of file
>From 7294dbc2c09634158165d2de557ee1f40402d533 Mon Sep 17 00:00:00 2001
From: hstk30-hw <hanwei62 at huawei.com>
Date: Wed, 11 Oct 2023 15:23:39 +0800
Subject: [PATCH 2/2] test: pass the test case
---
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 2 +-
llvm/test/CodeGen/AArch64/aarch64-load-ext.ll | 1 +
llvm/test/CodeGen/AArch64/neon-bitcast.ll | 1 +
llvm/test/CodeGen/AArch64/zext-to-tbl.ll | 4 ++++
4 files changed, 7 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index e3dd54a0b59a0da..131485f9caff3df 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -10933,7 +10933,7 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
}
}
}
-
+
SDValue V = DAG.getNode(ISD::BITCAST, dl, VT, Shuffle);
LLVM_DEBUG(dbgs() << "Reshuffle, creating node: "; Shuffle.dump();
diff --git a/llvm/test/CodeGen/AArch64/aarch64-load-ext.ll b/llvm/test/CodeGen/AArch64/aarch64-load-ext.ll
index ba7dd0a6a2ca0dc..8ee891e6be9fb35 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-load-ext.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-load-ext.ll
@@ -449,6 +449,7 @@ define <4 x i8> @bitcast(i32 %0) {
; CHECK-BE-NEXT: fmov s0, w0
; CHECK-BE-NEXT: rev32 v0.8b, v0.8b
; CHECK-BE-NEXT: zip1 v0.8b, v0.8b, v0.8b
+; CHECK-BE-NEXT: rev16 v0.8b, v0.8b
; CHECK-BE-NEXT: rev64 v0.8b, v0.8b
; CHECK-BE-NEXT: ret
%2 = bitcast i32 %0 to <4 x i8>
diff --git a/llvm/test/CodeGen/AArch64/neon-bitcast.ll b/llvm/test/CodeGen/AArch64/neon-bitcast.ll
index bfd59f3d813c85e..f03889eb60b7dd5 100644
--- a/llvm/test/CodeGen/AArch64/neon-bitcast.ll
+++ b/llvm/test/CodeGen/AArch64/neon-bitcast.ll
@@ -545,6 +545,7 @@ define <4 x i8> @bitcast_i32_to_v4i8(i32 %word) {
; CHECK-BE-NEXT: fmov s0, w0
; CHECK-BE-NEXT: rev32 v0.8b, v0.8b
; CHECK-BE-NEXT: zip1 v0.8b, v0.8b, v0.8b
+; CHECK-BE-NEXT: rev16 v0.8b, v0.8b
; CHECK-BE-NEXT: rev64 v0.8b, v0.8b
; CHECK-BE-NEXT: ret
%ret = bitcast i32 %word to <4 x i8>
diff --git a/llvm/test/CodeGen/AArch64/zext-to-tbl.ll b/llvm/test/CodeGen/AArch64/zext-to-tbl.ll
index f24abb568400099..1d9655cae159df2 100644
--- a/llvm/test/CodeGen/AArch64/zext-to-tbl.ll
+++ b/llvm/test/CodeGen/AArch64/zext-to-tbl.ll
@@ -1349,6 +1349,10 @@ define void @zext_v16i4_to_v16i32_in_loop(ptr %src, ptr %dst) {
; CHECK-BE-NEXT: rev16 v1.8b, v1.8b
; CHECK-BE-NEXT: rev16 v4.8b, v4.8b
; CHECK-BE-NEXT: rev16 v2.8b, v2.8b
+; CHECK-BE-NEXT: rev16 v3.8b, v3.8b
+; CHECK-BE-NEXT: rev16 v1.8b, v1.8b
+; CHECK-BE-NEXT: rev16 v4.8b, v4.8b
+; CHECK-BE-NEXT: rev16 v2.8b, v2.8b
; CHECK-BE-NEXT: ushll v3.4s, v3.4h, #0
; CHECK-BE-NEXT: ushll v1.4s, v1.4h, #0
; CHECK-BE-NEXT: and v3.16b, v3.16b, v0.16b
More information about the llvm-commits
mailing list