[llvm] [ARM][Codegen] Fix vector data miscompilation in arm32be (PR #105519)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 22 02:52:25 PDT 2024
https://github.com/Zhenhang1213 updated https://github.com/llvm/llvm-project/pull/105519
>From a846b8db14d6a7f5f1db46592f19f226d6a1bf78 Mon Sep 17 00:00:00 2001
From: Austin <zhenhangwang at huawei.com>
Date: Thu, 22 Aug 2024 17:52:06 +0800
Subject: [PATCH] [Clang][Codegen] Fix vector data miscompilation in arm32be
only change bitcast to VECTOR_REG_CAST
---
llvm/lib/Target/ARM/ARMISelLowering.cpp | 4 ++--
llvm/test/CodeGen/ARM/big-endian-neon-fp16-bitconv.ll | 3 +--
llvm/test/CodeGen/Thumb2/mve-intrinsics/vmovl.ll | 7 +++++--
llvm/test/CodeGen/Thumb2/mve-masked-load.ll | 7 ++++---
llvm/test/CodeGen/Thumb2/mve-pred-bitcast.ll | 2 --
llvm/test/CodeGen/Thumb2/mve-pred-loadstore.ll | 8 ++------
llvm/test/CodeGen/Thumb2/mve-vmovimm.ll | 10 ++++------
7 files changed, 18 insertions(+), 23 deletions(-)
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 1e8bb8a495e68b..2de47f93d956f7 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -7966,7 +7966,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
if (Val.getNode()) {
SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val);
- return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
+ return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Vmov);
}
// Try an immediate VMVN.
@@ -7976,7 +7976,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
VT, ST->hasMVEIntegerOps() ? MVEVMVNModImm : VMVNModImm);
if (Val.getNode()) {
SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val);
- return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
+ return DAG.getNode(ARMISD::VECTOR_REG_CASTT, dl, VT, Vmov);
}
// Use vmov.f32 to materialize other v2f32 and v4f32 splats.
diff --git a/llvm/test/CodeGen/ARM/big-endian-neon-fp16-bitconv.ll b/llvm/test/CodeGen/ARM/big-endian-neon-fp16-bitconv.ll
index 4026495a0f2b41..a4f5d1c61eae73 100644
--- a/llvm/test/CodeGen/ARM/big-endian-neon-fp16-bitconv.ll
+++ b/llvm/test/CodeGen/ARM/big-endian-neon-fp16-bitconv.ll
@@ -101,9 +101,8 @@ define void @conv_v4i16_to_v4f16( <4 x i16> %a, ptr %store ) {
; CHECK-NEXT: vmov.i64 d16, #0xffff00000000ffff
; CHECK-NEXT: vldr d17, [r0]
; CHECK-NEXT: vrev64.16 d18, d0
-; CHECK-NEXT: vrev64.16 d17, d17
-; CHECK-NEXT: vrev64.16 d16, d16
; CHECK-NEXT: vadd.i16 d16, d18, d16
+; CHECK-NEXT: vrev64.16 d17, d17
; CHECK-NEXT: vadd.f16 d16, d16, d17
; CHECK-NEXT: vrev64.16 d16, d16
; CHECK-NEXT: vstr d16, [r0]
diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vmovl.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vmovl.ll
index fd33dddc685e5e..45cc79a91d80d2 100644
--- a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vmovl.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vmovl.ll
@@ -65,7 +65,8 @@ define arm_aapcs_vfpcc <4 x i32> @test_vmovlbq_u16(<8 x i16> %a) {
; BE-LABEL: test_vmovlbq_u16:
; BE: @ %bb.0: @ %entry
; BE-NEXT: vrev64.16 q1, q0
-; BE-NEXT: vmovlb.u16 q1, q1
+; BE-NEXT: vmov.i32 q0, #0xffff
+; BE-NEXT: vand q1, q1, q0
; BE-NEXT: vrev64.32 q0, q1
; BE-NEXT: bx lr
entry:
@@ -137,7 +138,9 @@ define arm_aapcs_vfpcc <4 x i32> @test_vmovltq_u16(<8 x i16> %a) {
; BE-LABEL: test_vmovltq_u16:
; BE: @ %bb.0: @ %entry
; BE-NEXT: vrev64.16 q1, q0
-; BE-NEXT: vmovlt.u16 q1, q1
+; BE-NEXT: vrev32.16 q0, q1
+; BE-NEXT: vmov.i32 q1, #0xffff
+; BE-NEXT: vand q1, q0, q1
; BE-NEXT: vrev64.32 q0, q1
; BE-NEXT: bx lr
entry:
diff --git a/llvm/test/CodeGen/Thumb2/mve-masked-load.ll b/llvm/test/CodeGen/Thumb2/mve-masked-load.ll
index b0a3a6354daa70..0f5f5acb805892 100644
--- a/llvm/test/CodeGen/Thumb2/mve-masked-load.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-masked-load.ll
@@ -293,9 +293,10 @@ define arm_aapcs_vfpcc <4 x i32> @zext16_masked_v4i32_align2_other(ptr %dest, <4
;
; CHECK-BE-LABEL: zext16_masked_v4i32_align2_other:
; CHECK-BE: @ %bb.0: @ %entry
-; CHECK-BE-NEXT: vrev64.32 q1, q0
-; CHECK-BE-NEXT: vmovlb.u16 q0, q1
-; CHECK-BE-NEXT: vmovlb.s16 q1, q1
+; CHECK-BE-NEXT: vmov.i32 q1, #0xffff
+; CHECK-BE-NEXT: vrev64.32 q2, q0
+; CHECK-BE-NEXT: vand q0, q2, q1
+; CHECK-BE-NEXT: vmovlb.s16 q1, q2
; CHECK-BE-NEXT: vpt.s32 gt, q1, zr
; CHECK-BE-NEXT: vldrht.u32 q1, [r0]
; CHECK-BE-NEXT: vpsel q1, q1, q0
diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-bitcast.ll b/llvm/test/CodeGen/Thumb2/mve-pred-bitcast.ll
index 470007878ec842..0d0e45956080de 100644
--- a/llvm/test/CodeGen/Thumb2/mve-pred-bitcast.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-pred-bitcast.ll
@@ -115,7 +115,6 @@ define arm_aapcs_vfpcc <8 x i16> @bitcast_to_v8i1(i8 %b, <8 x i16> %a) {
; CHECK-BE-NEXT: vcmp.i16 ne, q1, zr
; CHECK-BE-NEXT: vrev64.16 q1, q0
; CHECK-BE-NEXT: vmov.i32 q0, #0x0
-; CHECK-BE-NEXT: vrev32.16 q0, q0
; CHECK-BE-NEXT: vpsel q1, q1, q0
; CHECK-BE-NEXT: vrev64.16 q0, q1
; CHECK-BE-NEXT: add sp, #4
@@ -145,7 +144,6 @@ define arm_aapcs_vfpcc <16 x i8> @bitcast_to_v16i1(i16 %b, <16 x i8> %a) {
; CHECK-BE-NEXT: vrev64.8 q1, q0
; CHECK-BE-NEXT: rbit r0, r0
; CHECK-BE-NEXT: vmov.i32 q0, #0x0
-; CHECK-BE-NEXT: vrev32.8 q0, q0
; CHECK-BE-NEXT: lsrs r0, r0, #16
; CHECK-BE-NEXT: vmsr p0, r0
; CHECK-BE-NEXT: vpsel q1, q1, q0
diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-loadstore.ll b/llvm/test/CodeGen/Thumb2/mve-pred-loadstore.ll
index a92adf6f1a067b..ba3d5c22fc671b 100644
--- a/llvm/test/CodeGen/Thumb2/mve-pred-loadstore.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-pred-loadstore.ll
@@ -105,7 +105,6 @@ define arm_aapcs_vfpcc <8 x i16> @load_v8i1(ptr %src, <8 x i16> %a) {
; CHECK-BE-NEXT: vcmp.i16 ne, q1, zr
; CHECK-BE-NEXT: vrev64.16 q1, q0
; CHECK-BE-NEXT: vmov.i32 q0, #0x0
-; CHECK-BE-NEXT: vrev32.16 q0, q0
; CHECK-BE-NEXT: vpsel q1, q1, q0
; CHECK-BE-NEXT: vrev64.16 q0, q1
; CHECK-BE-NEXT: bx lr
@@ -130,7 +129,6 @@ define arm_aapcs_vfpcc <16 x i8> @load_v16i1(ptr %src, <16 x i8> %a) {
; CHECK-BE-NEXT: vrev64.8 q1, q0
; CHECK-BE-NEXT: vmov.i32 q0, #0x0
; CHECK-BE-NEXT: rbit r0, r0
-; CHECK-BE-NEXT: vrev32.8 q0, q0
; CHECK-BE-NEXT: lsrs r0, r0, #16
; CHECK-BE-NEXT: vmsr p0, r0
; CHECK-BE-NEXT: vpsel q1, q1, q0
@@ -416,10 +414,9 @@ define arm_aapcs_vfpcc <8 x i16> @load_predcast8(ptr %i, <8 x i16> %a) {
;
; CHECK-BE-LABEL: load_predcast8:
; CHECK-BE: @ %bb.0:
+; CHECK-BE-NEXT: vldr p0, [r0]
; CHECK-BE-NEXT: vrev64.16 q1, q0
; CHECK-BE-NEXT: vmov.i32 q0, #0x0
-; CHECK-BE-NEXT: vldr p0, [r0]
-; CHECK-BE-NEXT: vrev32.16 q0, q0
; CHECK-BE-NEXT: vpsel q1, q1, q0
; CHECK-BE-NEXT: vrev64.16 q0, q1
; CHECK-BE-NEXT: bx lr
@@ -439,10 +436,9 @@ define arm_aapcs_vfpcc <16 x i8> @load_predcast16(ptr %i, <16 x i8> %a) {
;
; CHECK-BE-LABEL: load_predcast16:
; CHECK-BE: @ %bb.0:
+; CHECK-BE-NEXT: vldr p0, [r0]
; CHECK-BE-NEXT: vrev64.8 q1, q0
; CHECK-BE-NEXT: vmov.i32 q0, #0x0
-; CHECK-BE-NEXT: vldr p0, [r0]
-; CHECK-BE-NEXT: vrev32.8 q0, q0
; CHECK-BE-NEXT: vpsel q1, q1, q0
; CHECK-BE-NEXT: vrev64.8 q0, q1
; CHECK-BE-NEXT: bx lr
diff --git a/llvm/test/CodeGen/Thumb2/mve-vmovimm.ll b/llvm/test/CodeGen/Thumb2/mve-vmovimm.ll
index 97abc539557131..5aa3cde9c686be 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vmovimm.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vmovimm.ll
@@ -414,9 +414,8 @@ define arm_aapcs_vfpcc <16 x i8> @test(<16 x i8> %i) {
; CHECKBE-LABEL: test:
; CHECKBE: @ %bb.0: @ %entry
; CHECKBE-NEXT: vmov.i64 q1, #0xff00ff000000ff00
-; CHECKBE-NEXT: vrev64.8 q2, q1
-; CHECKBE-NEXT: vrev64.8 q1, q0
-; CHECKBE-NEXT: vorr q1, q1, q2
+; CHECKBE-NEXT: vrev64.8 q2, q0
+; CHECKBE-NEXT: vorr q1, q2, q1
; CHECKBE-NEXT: vrev64.8 q0, q1
; CHECKBE-NEXT: bx lr
entry:
@@ -434,9 +433,8 @@ define arm_aapcs_vfpcc <8 x i16> @test2(<8 x i16> %i) {
; CHECKBE-LABEL: test2:
; CHECKBE: @ %bb.0: @ %entry
; CHECKBE-NEXT: vmov.i64 q1, #0xffff0000ffffffff
-; CHECKBE-NEXT: vrev64.16 q2, q1
-; CHECKBE-NEXT: vrev64.16 q1, q0
-; CHECKBE-NEXT: vorr q1, q1, q2
+; CHECKBE-NEXT: vrev64.16 q2, q0
+; CHECKBE-NEXT: vorr q1, q2, q1
; CHECKBE-NEXT: vrev64.16 q0, q1
; CHECKBE-NEXT: bx lr
entry:
More information about the llvm-commits
mailing list