[llvm] [ARM][Codegen] Fix vector data miscompilation in arm32be (PR #105519)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Aug 25 19:18:03 PDT 2024
https://github.com/Zhenhang1213 updated https://github.com/llvm/llvm-project/pull/105519
>From 41e737a1e436c174b2fb5cc7271922d2d1fdad68 Mon Sep 17 00:00:00 2001
From: Austin <zhenhangwang at huawei.com>
Date: Mon, 26 Aug 2024 10:11:04 +0800
Subject: [PATCH 1/3] [ARM][Codegen] Fix vector data miscompilation in arm32be
---
llvm/lib/Target/ARM/ARMISelLowering.cpp | 2 +-
.../ARM/big-endian-neon-fp16-bitconv.ll | 3 +--
llvm/test/CodeGen/Thumb2/mve-be.ll | 2 --
.../CodeGen/Thumb2/mve-intrinsics/vmovl.ll | 7 +++++--
llvm/test/CodeGen/Thumb2/mve-masked-load.ll | 7 ++++---
llvm/test/CodeGen/Thumb2/mve-pred-bitcast.ll | 2 --
llvm/test/CodeGen/Thumb2/mve-pred-loadstore.ll | 8 ++------
llvm/test/CodeGen/Thumb2/mve-pred-spill.ll | 18 ++++++++----------
llvm/test/CodeGen/Thumb2/mve-vmovimm.ll | 10 ++++------
9 files changed, 25 insertions(+), 34 deletions(-)
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 1e8bb8a495e68b..f2b8c10acf8bc0 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -7966,7 +7966,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
if (Val.getNode()) {
SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val);
- return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
+ return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Vmov);
}
// Try an immediate VMVN.
diff --git a/llvm/test/CodeGen/ARM/big-endian-neon-fp16-bitconv.ll b/llvm/test/CodeGen/ARM/big-endian-neon-fp16-bitconv.ll
index 4026495a0f2b41..a4f5d1c61eae73 100644
--- a/llvm/test/CodeGen/ARM/big-endian-neon-fp16-bitconv.ll
+++ b/llvm/test/CodeGen/ARM/big-endian-neon-fp16-bitconv.ll
@@ -101,9 +101,8 @@ define void @conv_v4i16_to_v4f16( <4 x i16> %a, ptr %store ) {
; CHECK-NEXT: vmov.i64 d16, #0xffff00000000ffff
; CHECK-NEXT: vldr d17, [r0]
; CHECK-NEXT: vrev64.16 d18, d0
-; CHECK-NEXT: vrev64.16 d17, d17
-; CHECK-NEXT: vrev64.16 d16, d16
; CHECK-NEXT: vadd.i16 d16, d18, d16
+; CHECK-NEXT: vrev64.16 d17, d17
; CHECK-NEXT: vadd.f16 d16, d16, d17
; CHECK-NEXT: vrev64.16 d16, d16
; CHECK-NEXT: vstr d16, [r0]
diff --git a/llvm/test/CodeGen/Thumb2/mve-be.ll b/llvm/test/CodeGen/Thumb2/mve-be.ll
index 522d6f8704b6af..c7066ee34dc902 100644
--- a/llvm/test/CodeGen/Thumb2/mve-be.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-be.ll
@@ -232,7 +232,6 @@ define arm_aapcs_vfpcc <16 x i8> @and_v16i8_le(<4 x i32> %src) {
; CHECK-BE: @ %bb.0: @ %entry
; CHECK-BE-NEXT: vrev64.8 q1, q0
; CHECK-BE-NEXT: vmov.i32 q0, #0x1
-; CHECK-BE-NEXT: vrev32.8 q0, q0
; CHECK-BE-NEXT: vand q1, q1, q0
; CHECK-BE-NEXT: vrev64.8 q0, q1
; CHECK-BE-NEXT: bx lr
@@ -254,7 +253,6 @@ define arm_aapcs_vfpcc <16 x i8> @and_v16i8_be(<4 x i32> %src) {
; CHECK-BE: @ %bb.0: @ %entry
; CHECK-BE-NEXT: vrev64.8 q1, q0
; CHECK-BE-NEXT: vmov.i32 q0, #0x1000000
-; CHECK-BE-NEXT: vrev32.8 q0, q0
; CHECK-BE-NEXT: vand q1, q1, q0
; CHECK-BE-NEXT: vrev64.8 q0, q1
; CHECK-BE-NEXT: bx lr
diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vmovl.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vmovl.ll
index fd33dddc685e5e..45cc79a91d80d2 100644
--- a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vmovl.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vmovl.ll
@@ -65,7 +65,8 @@ define arm_aapcs_vfpcc <4 x i32> @test_vmovlbq_u16(<8 x i16> %a) {
; BE-LABEL: test_vmovlbq_u16:
; BE: @ %bb.0: @ %entry
; BE-NEXT: vrev64.16 q1, q0
-; BE-NEXT: vmovlb.u16 q1, q1
+; BE-NEXT: vmov.i32 q0, #0xffff
+; BE-NEXT: vand q1, q1, q0
; BE-NEXT: vrev64.32 q0, q1
; BE-NEXT: bx lr
entry:
@@ -137,7 +138,9 @@ define arm_aapcs_vfpcc <4 x i32> @test_vmovltq_u16(<8 x i16> %a) {
; BE-LABEL: test_vmovltq_u16:
; BE: @ %bb.0: @ %entry
; BE-NEXT: vrev64.16 q1, q0
-; BE-NEXT: vmovlt.u16 q1, q1
+; BE-NEXT: vrev32.16 q0, q1
+; BE-NEXT: vmov.i32 q1, #0xffff
+; BE-NEXT: vand q1, q0, q1
; BE-NEXT: vrev64.32 q0, q1
; BE-NEXT: bx lr
entry:
diff --git a/llvm/test/CodeGen/Thumb2/mve-masked-load.ll b/llvm/test/CodeGen/Thumb2/mve-masked-load.ll
index b0a3a6354daa70..0f5f5acb805892 100644
--- a/llvm/test/CodeGen/Thumb2/mve-masked-load.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-masked-load.ll
@@ -293,9 +293,10 @@ define arm_aapcs_vfpcc <4 x i32> @zext16_masked_v4i32_align2_other(ptr %dest, <4
;
; CHECK-BE-LABEL: zext16_masked_v4i32_align2_other:
; CHECK-BE: @ %bb.0: @ %entry
-; CHECK-BE-NEXT: vrev64.32 q1, q0
-; CHECK-BE-NEXT: vmovlb.u16 q0, q1
-; CHECK-BE-NEXT: vmovlb.s16 q1, q1
+; CHECK-BE-NEXT: vmov.i32 q1, #0xffff
+; CHECK-BE-NEXT: vrev64.32 q2, q0
+; CHECK-BE-NEXT: vand q0, q2, q1
+; CHECK-BE-NEXT: vmovlb.s16 q1, q2
; CHECK-BE-NEXT: vpt.s32 gt, q1, zr
; CHECK-BE-NEXT: vldrht.u32 q1, [r0]
; CHECK-BE-NEXT: vpsel q1, q1, q0
diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-bitcast.ll b/llvm/test/CodeGen/Thumb2/mve-pred-bitcast.ll
index 470007878ec842..0d0e45956080de 100644
--- a/llvm/test/CodeGen/Thumb2/mve-pred-bitcast.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-pred-bitcast.ll
@@ -115,7 +115,6 @@ define arm_aapcs_vfpcc <8 x i16> @bitcast_to_v8i1(i8 %b, <8 x i16> %a) {
; CHECK-BE-NEXT: vcmp.i16 ne, q1, zr
; CHECK-BE-NEXT: vrev64.16 q1, q0
; CHECK-BE-NEXT: vmov.i32 q0, #0x0
-; CHECK-BE-NEXT: vrev32.16 q0, q0
; CHECK-BE-NEXT: vpsel q1, q1, q0
; CHECK-BE-NEXT: vrev64.16 q0, q1
; CHECK-BE-NEXT: add sp, #4
@@ -145,7 +144,6 @@ define arm_aapcs_vfpcc <16 x i8> @bitcast_to_v16i1(i16 %b, <16 x i8> %a) {
; CHECK-BE-NEXT: vrev64.8 q1, q0
; CHECK-BE-NEXT: rbit r0, r0
; CHECK-BE-NEXT: vmov.i32 q0, #0x0
-; CHECK-BE-NEXT: vrev32.8 q0, q0
; CHECK-BE-NEXT: lsrs r0, r0, #16
; CHECK-BE-NEXT: vmsr p0, r0
; CHECK-BE-NEXT: vpsel q1, q1, q0
diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-loadstore.ll b/llvm/test/CodeGen/Thumb2/mve-pred-loadstore.ll
index a92adf6f1a067b..ba3d5c22fc671b 100644
--- a/llvm/test/CodeGen/Thumb2/mve-pred-loadstore.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-pred-loadstore.ll
@@ -105,7 +105,6 @@ define arm_aapcs_vfpcc <8 x i16> @load_v8i1(ptr %src, <8 x i16> %a) {
; CHECK-BE-NEXT: vcmp.i16 ne, q1, zr
; CHECK-BE-NEXT: vrev64.16 q1, q0
; CHECK-BE-NEXT: vmov.i32 q0, #0x0
-; CHECK-BE-NEXT: vrev32.16 q0, q0
; CHECK-BE-NEXT: vpsel q1, q1, q0
; CHECK-BE-NEXT: vrev64.16 q0, q1
; CHECK-BE-NEXT: bx lr
@@ -130,7 +129,6 @@ define arm_aapcs_vfpcc <16 x i8> @load_v16i1(ptr %src, <16 x i8> %a) {
; CHECK-BE-NEXT: vrev64.8 q1, q0
; CHECK-BE-NEXT: vmov.i32 q0, #0x0
; CHECK-BE-NEXT: rbit r0, r0
-; CHECK-BE-NEXT: vrev32.8 q0, q0
; CHECK-BE-NEXT: lsrs r0, r0, #16
; CHECK-BE-NEXT: vmsr p0, r0
; CHECK-BE-NEXT: vpsel q1, q1, q0
@@ -416,10 +414,9 @@ define arm_aapcs_vfpcc <8 x i16> @load_predcast8(ptr %i, <8 x i16> %a) {
;
; CHECK-BE-LABEL: load_predcast8:
; CHECK-BE: @ %bb.0:
+; CHECK-BE-NEXT: vldr p0, [r0]
; CHECK-BE-NEXT: vrev64.16 q1, q0
; CHECK-BE-NEXT: vmov.i32 q0, #0x0
-; CHECK-BE-NEXT: vldr p0, [r0]
-; CHECK-BE-NEXT: vrev32.16 q0, q0
; CHECK-BE-NEXT: vpsel q1, q1, q0
; CHECK-BE-NEXT: vrev64.16 q0, q1
; CHECK-BE-NEXT: bx lr
@@ -439,10 +436,9 @@ define arm_aapcs_vfpcc <16 x i8> @load_predcast16(ptr %i, <16 x i8> %a) {
;
; CHECK-BE-LABEL: load_predcast16:
; CHECK-BE: @ %bb.0:
+; CHECK-BE-NEXT: vldr p0, [r0]
; CHECK-BE-NEXT: vrev64.8 q1, q0
; CHECK-BE-NEXT: vmov.i32 q0, #0x0
-; CHECK-BE-NEXT: vldr p0, [r0]
-; CHECK-BE-NEXT: vrev32.8 q0, q0
; CHECK-BE-NEXT: vpsel q1, q1, q0
; CHECK-BE-NEXT: vrev64.8 q0, q1
; CHECK-BE-NEXT: bx lr
diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-spill.ll b/llvm/test/CodeGen/Thumb2/mve-pred-spill.ll
index 3bc129d0fd92e5..c17066126083a9 100644
--- a/llvm/test/CodeGen/Thumb2/mve-pred-spill.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-pred-spill.ll
@@ -156,11 +156,10 @@ define arm_aapcs_vfpcc <8 x i16> @shuffle1_v8i16(<8 x i16> %src, <8 x i16> %a) {
; CHECK-BE-NEXT: .pad #8
; CHECK-BE-NEXT: sub sp, #8
; CHECK-BE-NEXT: vrev64.16 q4, q1
-; CHECK-BE-NEXT: vmov.i32 q1, #0x0
-; CHECK-BE-NEXT: vrev64.16 q2, q0
-; CHECK-BE-NEXT: vrev32.16 q1, q1
-; CHECK-BE-NEXT: vcmp.i16 eq, q2, zr
-; CHECK-BE-NEXT: vpsel q1, q4, q1
+; CHECK-BE-NEXT: vrev64.16 q1, q0
+; CHECK-BE-NEXT: vcmp.i16 eq, q1, zr
+; CHECK-BE-NEXT: vmov.i32 q0, #0x0
+; CHECK-BE-NEXT: vpsel q1, q4, q0
; CHECK-BE-NEXT: vstr p0, [sp, #4] @ 4-byte Spill
; CHECK-BE-NEXT: vrev64.16 q0, q1
; CHECK-BE-NEXT: bl ext_i16
@@ -209,11 +208,10 @@ define arm_aapcs_vfpcc <16 x i8> @shuffle1_v16i8(<16 x i8> %src, <16 x i8> %a) {
; CHECK-BE-NEXT: .pad #8
; CHECK-BE-NEXT: sub sp, #8
; CHECK-BE-NEXT: vrev64.8 q4, q1
-; CHECK-BE-NEXT: vmov.i32 q1, #0x0
-; CHECK-BE-NEXT: vrev64.8 q2, q0
-; CHECK-BE-NEXT: vrev32.8 q1, q1
-; CHECK-BE-NEXT: vcmp.i8 eq, q2, zr
-; CHECK-BE-NEXT: vpsel q1, q4, q1
+; CHECK-BE-NEXT: vrev64.8 q1, q0
+; CHECK-BE-NEXT: vcmp.i8 eq, q1, zr
+; CHECK-BE-NEXT: vmov.i32 q0, #0x0
+; CHECK-BE-NEXT: vpsel q1, q4, q0
; CHECK-BE-NEXT: vstr p0, [sp, #4] @ 4-byte Spill
; CHECK-BE-NEXT: vrev64.8 q0, q1
; CHECK-BE-NEXT: bl ext_i8
diff --git a/llvm/test/CodeGen/Thumb2/mve-vmovimm.ll b/llvm/test/CodeGen/Thumb2/mve-vmovimm.ll
index 97abc539557131..5aa3cde9c686be 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vmovimm.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vmovimm.ll
@@ -414,9 +414,8 @@ define arm_aapcs_vfpcc <16 x i8> @test(<16 x i8> %i) {
; CHECKBE-LABEL: test:
; CHECKBE: @ %bb.0: @ %entry
; CHECKBE-NEXT: vmov.i64 q1, #0xff00ff000000ff00
-; CHECKBE-NEXT: vrev64.8 q2, q1
-; CHECKBE-NEXT: vrev64.8 q1, q0
-; CHECKBE-NEXT: vorr q1, q1, q2
+; CHECKBE-NEXT: vrev64.8 q2, q0
+; CHECKBE-NEXT: vorr q1, q2, q1
; CHECKBE-NEXT: vrev64.8 q0, q1
; CHECKBE-NEXT: bx lr
entry:
@@ -434,9 +433,8 @@ define arm_aapcs_vfpcc <8 x i16> @test2(<8 x i16> %i) {
; CHECKBE-LABEL: test2:
; CHECKBE: @ %bb.0: @ %entry
; CHECKBE-NEXT: vmov.i64 q1, #0xffff0000ffffffff
-; CHECKBE-NEXT: vrev64.16 q2, q1
-; CHECKBE-NEXT: vrev64.16 q1, q0
-; CHECKBE-NEXT: vorr q1, q1, q2
+; CHECKBE-NEXT: vrev64.16 q2, q0
+; CHECKBE-NEXT: vorr q1, q2, q1
; CHECKBE-NEXT: vrev64.16 q0, q1
; CHECKBE-NEXT: bx lr
entry:
>From 8c2ecc2d6cafacee609fae426c8cd9baf662eda6 Mon Sep 17 00:00:00 2001
From: Austin <zhenhangwang at huawei.com>
Date: Mon, 26 Aug 2024 10:13:30 +0800
Subject: [PATCH 2/3] [ARM] Add VECTOR_REG_CAST identity fold.
---
llvm/lib/Target/ARM/ARMISelLowering.cpp | 4 ++++
llvm/test/CodeGen/Thumb2/mve-intrinsics/vmovl.ll | 4 ++--
2 files changed, 6 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index f2b8c10acf8bc0..5bb7ddffc410ab 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -15444,6 +15444,10 @@ static SDValue PerformVECTOR_REG_CASTCombine(SDNode *N, SelectionDAG &DAG,
if (ST->isLittle())
return DAG.getNode(ISD::BITCAST, dl, VT, Op);
+ // VT VECTOR_REG_CAST (VT Op) -> Op
+ if (Op.getValueType() == VT)
+ return Op;
+
// VECTOR_REG_CAST undef -> undef
if (Op.isUndef())
return DAG.getUNDEF(VT);
diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vmovl.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vmovl.ll
index 45cc79a91d80d2..0bc7b4b30086db 100644
--- a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vmovl.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vmovl.ll
@@ -65,8 +65,8 @@ define arm_aapcs_vfpcc <4 x i32> @test_vmovlbq_u16(<8 x i16> %a) {
; BE-LABEL: test_vmovlbq_u16:
; BE: @ %bb.0: @ %entry
; BE-NEXT: vrev64.16 q1, q0
-; BE-NEXT: vmov.i32 q0, #0xffff
-; BE-NEXT: vand q1, q1, q0
+; BE-NEXT: vmov.i32 q0, #0xffff
+; BE-NEXT: vand q1, q1, q0
; BE-NEXT: vrev64.32 q0, q1
; BE-NEXT: bx lr
entry:
>From f02a0af7cf0c853a41f6e3425230fa98a8f3dacb Mon Sep 17 00:00:00 2001
From: Austin <zhenhangwang at huawei.com>
Date: Mon, 26 Aug 2024 10:17:48 +0800
Subject: [PATCH 3/3] [Clang][Codegen] fix vector data by modifying VMVN
---
llvm/lib/Target/ARM/ARMISelLowering.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 5bb7ddffc410ab..b4ea4330ecc86d 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -7976,7 +7976,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
VT, ST->hasMVEIntegerOps() ? MVEVMVNModImm : VMVNModImm);
if (Val.getNode()) {
SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val);
- return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
+ return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Vmov);
}
// Use vmov.f32 to materialize other v2f32 and v4f32 splats.
More information about the llvm-commits
mailing list