[llvm] [ARM] Fix VBICimm and VORRimm generation under Big endian. (PR #107813)
David Green via llvm-commits
llvm-commits at lists.llvm.org
Sun Sep 8 23:41:56 PDT 2024
https://github.com/davemgreen created https://github.com/llvm/llvm-project/pull/107813
This is a smaller follow on to #105519 that fixes VBICimm and VORRimm too. The logic behind lowering vector immediates under big endian Neon/MVE is to treat them in natural lane ordering (same as little endian), and VECTOR_REG_CAST them to the correct type (as opposed to creating the constants in big endian form and bitcasting them). This makes sure that is done when creating VORRIMM and VBICIMM.
>From 2f5604b7b3afc3a5693e297168a2d93c9cc529d9 Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Mon, 9 Sep 2024 07:40:28 +0100
Subject: [PATCH] [ARM] Fix VBICimm and VORRimm generation under Big endian.
This is a smaller follow on to #105519 that fixes VBICimm and VORRimm too. The
logic behind lowering vector immediates under big endian Neon/MVE is to treat
them in natural lane ordering (same as little endian), and VECTOR_REG_CAST them
to the correct type (as opposed to creating the constants in big endian form
and bitcasting them). This makes sure that is done when creating VORRIMM and
VBICIMM.
---
llvm/lib/Target/ARM/ARMISelLowering.cpp | 8 ++++----
llvm/test/CodeGen/ARM/big-endian-vmov.ll | 10 ++++------
llvm/test/CodeGen/Thumb2/mve-vmovimm.ll | 10 ++++------
3 files changed, 12 insertions(+), 16 deletions(-)
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 8c82161bd15c65..67653a17512d2c 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -14443,9 +14443,9 @@ static SDValue PerformANDCombine(SDNode *N,
DAG, dl, VbicVT, VT, OtherModImm);
if (Val.getNode()) {
SDValue Input =
- DAG.getNode(ISD::BITCAST, dl, VbicVT, N->getOperand(0));
+ DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VbicVT, N->getOperand(0));
SDValue Vbic = DAG.getNode(ARMISD::VBICIMM, dl, VbicVT, Input, Val);
- return DAG.getNode(ISD::BITCAST, dl, VT, Vbic);
+ return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Vbic);
}
}
}
@@ -14739,9 +14739,9 @@ static SDValue PerformORCombine(SDNode *N,
SplatBitSize, DAG, dl, VorrVT, VT, OtherModImm);
if (Val.getNode()) {
SDValue Input =
- DAG.getNode(ISD::BITCAST, dl, VorrVT, N->getOperand(0));
+ DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VorrVT, N->getOperand(0));
SDValue Vorr = DAG.getNode(ARMISD::VORRIMM, dl, VorrVT, Input, Val);
- return DAG.getNode(ISD::BITCAST, dl, VT, Vorr);
+ return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Vorr);
}
}
}
diff --git a/llvm/test/CodeGen/ARM/big-endian-vmov.ll b/llvm/test/CodeGen/ARM/big-endian-vmov.ll
index 327d4334ad83a1..e3647cf6234c82 100644
--- a/llvm/test/CodeGen/ARM/big-endian-vmov.ll
+++ b/llvm/test/CodeGen/ARM/big-endian-vmov.ll
@@ -219,7 +219,6 @@ define arm_aapcs_vfpcc <8 x i16> @vmvn_v16i8_m1() {
ret <8 x i16> <i16 65535, i16 65534, i16 65535, i16 65534, i16 65535, i16 65534, i16 65535, i16 65534>
}
-; FIXME: This is incorrect for BE
define arm_aapcs_vfpcc <8 x i16> @and_v8i16_m1(<8 x i16> %a) {
; CHECK-LE-LABEL: and_v8i16_m1:
; CHECK-LE: @ %bb.0:
@@ -228,15 +227,14 @@ define arm_aapcs_vfpcc <8 x i16> @and_v8i16_m1(<8 x i16> %a) {
;
; CHECK-BE-LABEL: and_v8i16_m1:
; CHECK-BE: @ %bb.0:
-; CHECK-BE-NEXT: vrev64.32 q8, q0
+; CHECK-BE-NEXT: vrev64.16 q8, q0
; CHECK-BE-NEXT: vbic.i32 q8, #0x10000
-; CHECK-BE-NEXT: vrev64.32 q0, q8
+; CHECK-BE-NEXT: vrev64.16 q0, q8
; CHECK-BE-NEXT: bx lr
%b = and <8 x i16> %a, <i16 65535, i16 65534, i16 65535, i16 65534, i16 65535, i16 65534, i16 65535, i16 65534>
ret <8 x i16> %b
}
-; FIXME: This is incorrect for BE
define arm_aapcs_vfpcc <8 x i16> @or_v8i16_1(<8 x i16> %a) {
; CHECK-LE-LABEL: or_v8i16_1:
; CHECK-LE: @ %bb.0:
@@ -245,9 +243,9 @@ define arm_aapcs_vfpcc <8 x i16> @or_v8i16_1(<8 x i16> %a) {
;
; CHECK-BE-LABEL: or_v8i16_1:
; CHECK-BE: @ %bb.0:
-; CHECK-BE-NEXT: vrev64.32 q8, q0
+; CHECK-BE-NEXT: vrev64.16 q8, q0
; CHECK-BE-NEXT: vorr.i32 q8, #0x10000
-; CHECK-BE-NEXT: vrev64.32 q0, q8
+; CHECK-BE-NEXT: vrev64.16 q0, q8
; CHECK-BE-NEXT: bx lr
%b = or <8 x i16> %a, <i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1>
ret <8 x i16> %b
diff --git a/llvm/test/CodeGen/Thumb2/mve-vmovimm.ll b/llvm/test/CodeGen/Thumb2/mve-vmovimm.ll
index 9cf92663e3b052..1970ff35f183fa 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vmovimm.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vmovimm.ll
@@ -1331,7 +1331,6 @@ entry:
ret <2 x i64> %s
}
-; FIXME: This is incorrect for BE
define arm_aapcs_vfpcc <8 x i16> @and_v8i16_m1(<8 x i16> %a) {
; CHECKLE-LABEL: and_v8i16_m1:
; CHECKLE: @ %bb.0:
@@ -1340,15 +1339,14 @@ define arm_aapcs_vfpcc <8 x i16> @and_v8i16_m1(<8 x i16> %a) {
;
; CHECKBE-LABEL: and_v8i16_m1:
; CHECKBE: @ %bb.0:
-; CHECKBE-NEXT: vrev64.32 q1, q0
+; CHECKBE-NEXT: vrev64.16 q1, q0
; CHECKBE-NEXT: vbic.i32 q1, #0x10000
-; CHECKBE-NEXT: vrev64.32 q0, q1
+; CHECKBE-NEXT: vrev64.16 q0, q1
; CHECKBE-NEXT: bx lr
%b = and <8 x i16> %a, <i16 65535, i16 65534, i16 65535, i16 65534, i16 65535, i16 65534, i16 65535, i16 65534>
ret <8 x i16> %b
}
-; FIXME: This is incorrect for BE
define arm_aapcs_vfpcc <8 x i16> @or_v8i16_1(<8 x i16> %a) {
; CHECKLE-LABEL: or_v8i16_1:
; CHECKLE: @ %bb.0:
@@ -1357,9 +1355,9 @@ define arm_aapcs_vfpcc <8 x i16> @or_v8i16_1(<8 x i16> %a) {
;
; CHECKBE-LABEL: or_v8i16_1:
; CHECKBE: @ %bb.0:
-; CHECKBE-NEXT: vrev64.32 q1, q0
+; CHECKBE-NEXT: vrev64.16 q1, q0
; CHECKBE-NEXT: vorr.i32 q1, #0x10000
-; CHECKBE-NEXT: vrev64.32 q0, q1
+; CHECKBE-NEXT: vrev64.16 q0, q1
; CHECKBE-NEXT: bx lr
%b = or <8 x i16> %a, <i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1>
ret <8 x i16> %b
More information about the llvm-commits
mailing list