[llvm] 4ad9ca0 - [ARM] Fix incorrect handling of big-endian vmov.i64
John Brawn via llvm-commits
llvm-commits at lists.llvm.org
Fri Apr 3 09:37:31 PDT 2020
Author: John Brawn
Date: 2020-04-03T17:36:50+01:00
New Revision: 4ad9ca0f9e1b501ddf0ca4082c459d98046c93c2
URL: https://github.com/llvm/llvm-project/commit/4ad9ca0f9e1b501ddf0ca4082c459d98046c93c2
DIFF: https://github.com/llvm/llvm-project/commit/4ad9ca0f9e1b501ddf0ca4082c459d98046c93c2.diff
LOG: [ARM] Fix incorrect handling of big-endian vmov.i64
Currently when the target is big-endian vmov.i64 reverses the order of the two
words of the vector. This is correct only when the underlying element type is
32-bit, as actually what it should be doing is considering it a vector of the
underlying type and reversing the elements of that.
Differential Revision: https://reviews.llvm.org/D76515
Added:
llvm/test/CodeGen/ARM/big-endian-vmov.ll
Modified:
llvm/lib/Target/ARM/ARMISelLowering.cpp
llvm/test/CodeGen/ARM/big-endian-neon-fp16-bitconv.ll
llvm/test/CodeGen/ARM/vmov.ll
llvm/test/CodeGen/Thumb2/mve-vmovimm.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index bfe475723cae..e4d1caa0b1e0 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -6443,9 +6443,10 @@ static SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) {
/// immediate" operand (e.g., VMOV). If so, return the encoded value.
static SDValue isVMOVModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
unsigned SplatBitSize, SelectionDAG &DAG,
- const SDLoc &dl, EVT &VT, bool is128Bits,
+ const SDLoc &dl, EVT &VT, EVT VectorVT,
VMOVModImmType type) {
unsigned OpCmode, Imm;
+ bool is128Bits = VectorVT.is128BitVector();
// SplatBitSize is set to the smallest size that splats the vector, so a
// zero vector will always have SplatBitSize == 8. However, NEON modified
@@ -6563,9 +6564,18 @@ static SDValue isVMOVModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
ImmMask <<= 1;
}
- if (DAG.getDataLayout().isBigEndian())
- // swap higher and lower 32 bit word
- Imm = ((Imm & 0xf) << 4) | ((Imm & 0xf0) >> 4);
+ if (DAG.getDataLayout().isBigEndian()) {
+ // Reverse the order of elements within the vector.
+ unsigned BytesPerElem = VectorVT.getScalarSizeInBits() / 8;
+ unsigned Mask = (1 << BytesPerElem) - 1;
+ unsigned NumElems = 8 / BytesPerElem;
+ unsigned NewImm = 0;
+ for (unsigned ElemNum = 0; ElemNum < NumElems; ++ElemNum) {
+ unsigned Elem = ((Imm >> ElemNum * BytesPerElem) & Mask);
+ NewImm |= Elem << (NumElems - ElemNum - 1) * BytesPerElem;
+ }
+ Imm = NewImm;
+ }
// Op=1, Cmode=1110.
OpCmode = 0x1e;
@@ -6658,7 +6668,7 @@ SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
// Try a VMOV.i32 (FIXME: i8, i16, or i64 could work too).
SDValue NewVal = isVMOVModifiedImm(iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op),
- VMovVT, false, VMOVModImm);
+ VMovVT, VT, VMOVModImm);
if (NewVal != SDValue()) {
SDLoc DL(Op);
SDValue VecConstant = DAG.getNode(ARMISD::VMOVIMM, DL, VMovVT,
@@ -6675,7 +6685,7 @@ SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
// Finally, try a VMVN.i32
NewVal = isVMOVModifiedImm(~iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op), VMovVT,
- false, VMVNModImm);
+ VT, VMVNModImm);
if (NewVal != SDValue()) {
SDLoc DL(Op);
SDValue VecConstant = DAG.getNode(ARMISD::VMVNIMM, DL, VMovVT, NewVal);
@@ -7185,10 +7195,9 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
(ST->hasMVEIntegerOps() && SplatBitSize <= 64)) {
// Check if an immediate VMOV works.
EVT VmovVT;
- SDValue Val = isVMOVModifiedImm(SplatBits.getZExtValue(),
- SplatUndef.getZExtValue(), SplatBitSize,
- DAG, dl, VmovVT, VT.is128BitVector(),
- VMOVModImm);
+ SDValue Val =
+ isVMOVModifiedImm(SplatBits.getZExtValue(), SplatUndef.getZExtValue(),
+ SplatBitSize, DAG, dl, VmovVT, VT, VMOVModImm);
if (Val.getNode()) {
SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val);
@@ -7198,9 +7207,8 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
// Try an immediate VMVN.
uint64_t NegatedImm = (~SplatBits).getZExtValue();
Val = isVMOVModifiedImm(
- NegatedImm, SplatUndef.getZExtValue(), SplatBitSize,
- DAG, dl, VmovVT, VT.is128BitVector(),
- ST->hasMVEIntegerOps() ? MVEVMVNModImm : VMVNModImm);
+ NegatedImm, SplatUndef.getZExtValue(), SplatBitSize, DAG, dl, VmovVT,
+ VT, ST->hasMVEIntegerOps() ? MVEVMVNModImm : VMVNModImm);
if (Val.getNode()) {
SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val);
return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
@@ -12403,8 +12411,7 @@ static SDValue PerformANDCombine(SDNode *N,
EVT VbicVT;
SDValue Val = isVMOVModifiedImm((~SplatBits).getZExtValue(),
SplatUndef.getZExtValue(), SplatBitSize,
- DAG, dl, VbicVT, VT.is128BitVector(),
- OtherModImm);
+ DAG, dl, VbicVT, VT, OtherModImm);
if (Val.getNode()) {
SDValue Input =
DAG.getNode(ISD::BITCAST, dl, VbicVT, N->getOperand(0));
@@ -12708,10 +12715,9 @@ static SDValue PerformORCombine(SDNode *N,
BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
if (SplatBitSize <= 64) {
EVT VorrVT;
- SDValue Val = isVMOVModifiedImm(SplatBits.getZExtValue(),
- SplatUndef.getZExtValue(), SplatBitSize,
- DAG, dl, VorrVT, VT.is128BitVector(),
- OtherModImm);
+ SDValue Val =
+ isVMOVModifiedImm(SplatBits.getZExtValue(), SplatUndef.getZExtValue(),
+ SplatBitSize, DAG, dl, VorrVT, VT, OtherModImm);
if (Val.getNode()) {
SDValue Input =
DAG.getNode(ISD::BITCAST, dl, VorrVT, N->getOperand(0));
diff --git a/llvm/test/CodeGen/ARM/big-endian-neon-fp16-bitconv.ll b/llvm/test/CodeGen/ARM/big-endian-neon-fp16-bitconv.ll
index 86a69be5b4df..9942d6df99a4 100644
--- a/llvm/test/CodeGen/ARM/big-endian-neon-fp16-bitconv.ll
+++ b/llvm/test/CodeGen/ARM/big-endian-neon-fp16-bitconv.ll
@@ -98,7 +98,7 @@ entry:
define void @conv_v4i16_to_v4f16( <4 x i16> %a, <4 x half>* %store ) {
; CHECK-LABEL: conv_v4i16_to_v4f16:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vmov.i64 d16, #0xffffffff0000
+; CHECK-NEXT: vmov.i64 d16, #0xffff00000000ffff
; CHECK-NEXT: vldr d17, [r0]
; CHECK-NEXT: vrev64.16 d18, d0
; CHECK-NEXT: vrev64.16 d17, d17
diff --git a/llvm/test/CodeGen/ARM/big-endian-vmov.ll b/llvm/test/CodeGen/ARM/big-endian-vmov.ll
new file mode 100644
index 000000000000..1b2d4db9b909
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/big-endian-vmov.ll
@@ -0,0 +1,88 @@
+; RUN: llc < %s -mtriple armv7-eabi -o - | FileCheck %s --check-prefixes=CHECK,CHECK-LE
+; RUN: llc < %s -mtriple armebv7-eabi -o - | FileCheck %s --check-prefixes=CHECK,CHECK-BE
+
+; CHECK-LABEL: vmov_i8
+; CHECK-LE: vmov.i64 d0, #0xff00000000000000{{$}}
+; CHECK-BE: vmov.i64 d0, #0xff{{$}}
+; CHECK-NEXT: bx lr
+define arm_aapcs_vfpcc <8 x i8> @vmov_i8() {
+ ret <8 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 -1>
+}
+
+; CHECK-LABEL: vmov_i16_a:
+; CHECK-LE: vmov.i64 d0, #0xffff000000000000{{$}}
+; CHECK-BE: vmov.i64 d0, #0xffff{{$}}
+; CHECK-NEXT: bx lr
+define arm_aapcs_vfpcc <4 x i16> @vmov_i16_a() {
+ ret <4 x i16> <i16 0, i16 0, i16 0, i16 -1>
+}
+
+; CHECK-LABEL: vmov_i16_b:
+; CHECK-LE: vmov.i64 d0, #0xff000000000000{{$}}
+; CHECK-BE: vmov.i64 d0, #0xff{{$}}
+; CHECK-NEXT: bx lr
+define arm_aapcs_vfpcc <4 x i16> @vmov_i16_b() {
+ ret <4 x i16> <i16 0, i16 0, i16 0, i16 255>
+}
+
+; CHECK-LABEL: vmov_i16_c:
+; CHECK-LE: vmov.i64 d0, #0xff00000000000000{{$}}
+; CHECK-BE: vmov.i64 d0, #0xff00{{$}}
+; CHECK-NEXT: bx lr
+define arm_aapcs_vfpcc <4 x i16> @vmov_i16_c() {
+ ret <4 x i16> <i16 0, i16 0, i16 0, i16 65280>
+}
+
+; CHECK-LABEL: vmov_i32_a:
+; CHECK-LE: vmov.i64 d0, #0xffffffff00000000{{$}}
+; CHECK-BE: vmov.i64 d0, #0xffffffff{{$}}
+; CHECK-NEXT: bx lr
+define arm_aapcs_vfpcc <2 x i32> @vmov_i32_a() {
+ ret <2 x i32> <i32 0, i32 -1>
+}
+
+; CHECK-LABEL: vmov_i32_b:
+; CHECK-LE: vmov.i64 d0, #0xff00000000{{$}}
+; CHECK-BE: vmov.i64 d0, #0xff{{$}}
+; CHECK-NEXT: bx lr
+define arm_aapcs_vfpcc <2 x i32> @vmov_i32_b() {
+ ret <2 x i32> <i32 0, i32 255>
+}
+
+; CHECK-LABEL: vmov_i32_c:
+; CHECK-LE: vmov.i64 d0, #0xff0000000000{{$}}
+; CHECK-BE: vmov.i64 d0, #0xff00{{$}}
+; CHECK-NEXT: bx lr
+define arm_aapcs_vfpcc <2 x i32> @vmov_i32_c() {
+ ret <2 x i32> <i32 0, i32 65280>
+}
+
+; CHECK-LABEL: vmov_i32_d:
+; CHECK-LE: vmov.i64 d0, #0xff000000000000{{$}}
+; CHECK-BE: vmov.i64 d0, #0xff0000{{$}}
+; CHECK-NEXT: bx lr
+define arm_aapcs_vfpcc <2 x i32> @vmov_i32_d() {
+ ret <2 x i32> <i32 0, i32 16711680>
+}
+
+; CHECK-LABEL: vmov_i32_e:
+; CHECK-LE: vmov.i64 d0, #0xff00000000000000{{$}}
+; CHECK-BE: vmov.i64 d0, #0xff000000{{$}}
+; CHECK-NEXT: bx lr
+define arm_aapcs_vfpcc <2 x i32> @vmov_i32_e() {
+ ret <2 x i32> <i32 0, i32 4278190080>
+}
+
+; CHECK-LABEL: vmov_i64_a:
+; CHECK: vmov.i8 d0, #0xff{{$}}
+; CHECK-NEXT: bx lr
+define arm_aapcs_vfpcc <1 x i64> @vmov_i64_a() {
+ ret <1 x i64> <i64 -1>
+}
+
+; CHECK-LABEL: vmov_i64_b:
+; CHECK: vmov.i64 d0, #0xffff00ff0000ff{{$}}
+; CHECK-NEXT: bx lr
+define arm_aapcs_vfpcc <1 x i64> @vmov_i64_b() {
+ ret <1 x i64> <i64 72056498804490495>
+}
diff --git a/llvm/test/CodeGen/ARM/vmov.ll b/llvm/test/CodeGen/ARM/vmov.ll
index 751fd2ff557a..995e015b4b81 100644
--- a/llvm/test/CodeGen/ARM/vmov.ll
+++ b/llvm/test/CodeGen/ARM/vmov.ll
@@ -219,15 +219,10 @@ define arm_aapcs_vfpcc <4 x i32> @v_movQi32f() nounwind {
}
define arm_aapcs_vfpcc <2 x i64> @v_movQi64() nounwind {
-; CHECK-LE-LABEL: v_movQi64:
-; CHECK-LE: @ %bb.0:
-; CHECK-LE-NEXT: vmov.i64 q0, #0xff0000ff0000ffff
-; CHECK-LE-NEXT: mov pc, lr
-;
-; CHECK-BE-LABEL: v_movQi64:
-; CHECK-BE: @ %bb.0:
-; CHECK-BE-NEXT: vmov.i64 q0, #0xffffff0000ff
-; CHECK-BE-NEXT: mov pc, lr
+; CHECK-LABEL: v_movQi64:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov.i64 q0, #0xff0000ff0000ffff
+; CHECK-NEXT: mov pc, lr
ret <2 x i64> < i64 18374687574888349695, i64 18374687574888349695 >
}
diff --git a/llvm/test/CodeGen/Thumb2/mve-vmovimm.ll b/llvm/test/CodeGen/Thumb2/mve-vmovimm.ll
index 77dd9c5df95b..aad885b9b18a 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vmovimm.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vmovimm.ll
@@ -263,15 +263,10 @@ entry:
}
define arm_aapcs_vfpcc <2 x i64> @mov_int64_ff() {
-; CHECKLE-LABEL: mov_int64_ff:
-; CHECKLE: @ %bb.0: @ %entry
-; CHECKLE-NEXT: vmov.i64 q0, #0xff
-; CHECKLE-NEXT: bx lr
-;
-; CHECKBE-LABEL: mov_int64_ff:
-; CHECKBE: @ %bb.0: @ %entry
-; CHECKBE-NEXT: vmov.i64 q0, #0xff00000000
-; CHECKBE-NEXT: bx lr
+; CHECK-LABEL: mov_int64_ff:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov.i64 q0, #0xff
+; CHECK-NEXT: bx lr
entry:
ret <2 x i64> < i64 255, i64 255 >
}
@@ -286,15 +281,10 @@ entry:
}
define arm_aapcs_vfpcc <2 x i64> @mov_int64_ff0000ff0000ffff() {
-; CHECKLE-LABEL: mov_int64_ff0000ff0000ffff:
-; CHECKLE: @ %bb.0: @ %entry
-; CHECKLE-NEXT: vmov.i64 q0, #0xff0000ff0000ffff
-; CHECKLE-NEXT: bx lr
-;
-; CHECKBE-LABEL: mov_int64_ff0000ff0000ffff:
-; CHECKBE: @ %bb.0: @ %entry
-; CHECKBE-NEXT: vmov.i64 q0, #0xffffff0000ff
-; CHECKBE-NEXT: bx lr
+; CHECK-LABEL: mov_int64_ff0000ff0000ffff:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov.i64 q0, #0xff0000ff0000ffff
+; CHECK-NEXT: bx lr
entry:
ret <2 x i64> < i64 18374687574888349695, i64 18374687574888349695 >
}
@@ -338,7 +328,7 @@ define arm_aapcs_vfpcc <16 x i8> @mov_int64_0f000f0f() {
;
; CHECKBE-LABEL: mov_int64_0f000f0f:
; CHECKBE: @ %bb.0: @ %entry
-; CHECKBE-NEXT: vmov.i64 q0, #0xff00ff00ff00
+; CHECKBE-NEXT: vmov.i64 q0, #0xff00ff000000ff00
; CHECKBE-NEXT: bx lr
entry:
ret <16 x i8> <i8 -1, i8 0, i8 -1, i8 0, i8 0, i8 0, i8 -1, i8 0, i8 -1, i8 0, i8 -1, i8 0, i8 0, i8 0, i8 -1, i8 0>
@@ -352,7 +342,7 @@ define arm_aapcs_vfpcc <8 x i16> @mov_int64_ff00ffff() {
;
; CHECKBE-LABEL: mov_int64_ff00ffff:
; CHECKBE: @ %bb.0: @ %entry
-; CHECKBE-NEXT: vmov.i64 q0, #0xffffffffffff0000
+; CHECKBE-NEXT: vmov.i64 q0, #0xffff0000ffffffff
; CHECKBE-NEXT: bx lr
entry:
ret <8 x i16> <i16 -1, i16 0, i16 -1, i16 -1, i16 -1, i16 0, i16 -1, i16 -1>
@@ -494,7 +484,7 @@ define arm_aapcs_vfpcc <16 x i8> @test(<16 x i8> %i) {
;
; CHECKBE-LABEL: test:
; CHECKBE: @ %bb.0: @ %entry
-; CHECKBE-NEXT: vmov.i64 q1, #0xff00ff00ff0000
+; CHECKBE-NEXT: vmov.i64 q1, #0xff00ff000000ff00
; CHECKBE-NEXT: vrev64.8 q2, q1
; CHECKBE-NEXT: vrev64.8 q1, q0
; CHECKBE-NEXT: vorr q1, q1, q2
@@ -514,7 +504,7 @@ define arm_aapcs_vfpcc <8 x i16> @test2(<8 x i16> %i) {
;
; CHECKBE-LABEL: test2:
; CHECKBE: @ %bb.0: @ %entry
-; CHECKBE-NEXT: vmov.i64 q1, #0xffffffffffff
+; CHECKBE-NEXT: vmov.i64 q1, #0xffff0000ffffffff
; CHECKBE-NEXT: vrev64.16 q2, q1
; CHECKBE-NEXT: vrev64.16 q1, q0
; CHECKBE-NEXT: vorr q1, q1, q2
More information about the llvm-commits
mailing list