[llvm] 6f37d42 - [AArch64][GlobalISel] Reland Make G_DUP immediate 32-bits or larger (#96780) (#99014)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 25 03:06:49 PDT 2024
Author: chuongg3
Date: 2024-07-25T11:06:46+01:00
New Revision: 6f37d42a33ba09add14a9a1f422cb489ba02336e
URL: https://github.com/llvm/llvm-project/commit/6f37d42a33ba09add14a9a1f422cb489ba02336e
DIFF: https://github.com/llvm/llvm-project/commit/6f37d42a33ba09add14a9a1f422cb489ba02336e.diff
LOG: [AArch64][GlobalISel] Reland Make G_DUP immediate 32-bits or larger (#96780) (#99014)
Immediate operand gets extended in RegBankSelect to at least 32 bits to
allow for better pattern matching in TableGen
The previous patch was erasing a constant without checking if it has
more than one use
Changes:
- Does not erase the constant
- Added @v_dup16_const test
Added:
Modified:
llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
llvm/test/CodeGen/AArch64/GlobalISel/regbank-dup.mir
llvm/test/CodeGen/AArch64/GlobalISel/select-dup.mir
llvm/test/CodeGen/AArch64/arm64-dup.ll
llvm/test/CodeGen/AArch64/neon-mov.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index d33b0ab7b9fcd..e9e6b6cb68d0d 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -2284,8 +2284,9 @@ bool AArch64InstructionSelector::earlySelect(MachineInstr &I) {
Register Dst = I.getOperand(0).getReg();
auto *CV = ConstantDataVector::getSplat(
MRI.getType(Dst).getNumElements(),
- ConstantInt::get(Type::getIntNTy(Ctx, MRI.getType(Src).getSizeInBits()),
- ValAndVReg->Value));
+ ConstantInt::get(
+ Type::getIntNTy(Ctx, MRI.getType(Dst).getScalarSizeInBits()),
+ ValAndVReg->Value.trunc(MRI.getType(Dst).getScalarSizeInBits())));
if (!emitConstantVector(Dst, CV, MIB, MRI))
return false;
I.eraseFromParent();
@@ -5614,7 +5615,8 @@ AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV,
}
if (CV->getSplatValue()) {
- APInt DefBits = APInt::getSplat(DstSize, CV->getUniqueInteger());
+ APInt DefBits = APInt::getSplat(
+ DstSize, CV->getUniqueInteger().trunc(DstTy.getScalarSizeInBits()));
auto TryMOVIWithBits = [&](APInt DefBits) -> MachineInstr * {
MachineInstr *NewOp;
bool Inv = false;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
index 5616d063f70bc..220ddbf10a1c2 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
@@ -42,6 +42,7 @@
#include "AArch64GenRegisterBankInfo.def"
using namespace llvm;
+static const unsigned CustomMappingID = 1;
AArch64RegisterBankInfo::AArch64RegisterBankInfo(
const TargetRegisterInfo &TRI) {
@@ -424,6 +425,26 @@ void AArch64RegisterBankInfo::applyMappingImpl(
MI.getOperand(2).setReg(Ext.getReg(0));
return applyDefaultMapping(OpdMapper);
}
+ case AArch64::G_DUP: {
+ // Extend smaller gpr to 32-bits
+ assert(MRI.getType(MI.getOperand(1).getReg()).getSizeInBits() < 32 &&
+ "Expected sources smaller than 32-bits");
+ Builder.setInsertPt(*MI.getParent(), MI.getIterator());
+
+ Register ConstReg;
+ auto ConstMI = MRI.getVRegDef(MI.getOperand(1).getReg());
+ if (ConstMI->getOpcode() == TargetOpcode::G_CONSTANT) {
+ auto CstVal = ConstMI->getOperand(1).getCImm()->getValue();
+ ConstReg =
+ Builder.buildConstant(LLT::scalar(32), CstVal.sext(32)).getReg(0);
+ } else {
+ ConstReg = Builder.buildAnyExt(LLT::scalar(32), MI.getOperand(1).getReg())
+ .getReg(0);
+ }
+ MRI.setRegBank(ConstReg, getRegBank(AArch64::GPRRegBankID));
+ MI.getOperand(1).setReg(ConstReg);
+ return applyDefaultMapping(OpdMapper);
+ }
default:
llvm_unreachable("Don't know how to handle that operation");
}
@@ -792,8 +813,14 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
(getRegBank(ScalarReg, MRI, TRI) == &AArch64::FPRRegBank ||
onlyDefinesFP(*ScalarDef, MRI, TRI)))
OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
- else
+ else {
+ if (ScalarTy.getSizeInBits() < 32 &&
+ getRegBank(ScalarReg, MRI, TRI) == &AArch64::GPRRegBank) {
+ // Calls applyMappingImpl()
+ MappingID = CustomMappingID;
+ }
OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR};
+ }
break;
}
case TargetOpcode::G_TRUNC: {
@@ -1014,8 +1041,10 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
// If the type is i8/i16, and the regank will be GPR, then we change the
// type to i32 in applyMappingImpl.
LLT Ty = MRI.getType(MI.getOperand(2).getReg());
- if (Ty.getSizeInBits() == 8 || Ty.getSizeInBits() == 16)
- MappingID = 1;
+ if (Ty.getSizeInBits() == 8 || Ty.getSizeInBits() == 16) {
+ // Calls applyMappingImpl()
+ MappingID = CustomMappingID;
+ }
OpRegBankIdx[2] = PMI_FirstGPR;
}
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-dup.mir b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-dup.mir
index 4cd6eef531ce0..66c8c2efda9bc 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-dup.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-dup.mir
@@ -16,10 +16,11 @@ body: |
; CHECK-LABEL: name: v4s32_gpr
; CHECK: liveins: $w0
- ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0
- ; CHECK: [[DUP:%[0-9]+]]:fpr(<4 x s32>) = G_DUP [[COPY]](s32)
- ; CHECK: $q0 = COPY [[DUP]](<4 x s32>)
- ; CHECK: RET_ReallyLR implicit $q0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0
+ ; CHECK-NEXT: [[DUP:%[0-9]+]]:fpr(<4 x s32>) = G_DUP [[COPY]](s32)
+ ; CHECK-NEXT: $q0 = COPY [[DUP]](<4 x s32>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0
%0:_(s32) = COPY $w0
%4:_(<4 x s32>) = G_DUP %0(s32)
$q0 = COPY %4(<4 x s32>)
@@ -37,10 +38,11 @@ body: |
; CHECK-LABEL: name: v4s64_gpr
; CHECK: liveins: $x0
- ; CHECK: [[COPY:%[0-9]+]]:gpr(s64) = COPY $x0
- ; CHECK: [[DUP:%[0-9]+]]:fpr(<2 x s64>) = G_DUP [[COPY]](s64)
- ; CHECK: $q0 = COPY [[DUP]](<2 x s64>)
- ; CHECK: RET_ReallyLR implicit $q0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr(s64) = COPY $x0
+ ; CHECK-NEXT: [[DUP:%[0-9]+]]:fpr(<2 x s64>) = G_DUP [[COPY]](s64)
+ ; CHECK-NEXT: $q0 = COPY [[DUP]](<2 x s64>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0
%0:_(s64) = COPY $x0
%4:_(<2 x s64>) = G_DUP %0(s64)
$q0 = COPY %4(<2 x s64>)
@@ -58,10 +60,11 @@ body: |
; CHECK-LABEL: name: v2s32_gpr
; CHECK: liveins: $w0
- ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0
- ; CHECK: [[DUP:%[0-9]+]]:fpr(<2 x s32>) = G_DUP [[COPY]](s32)
- ; CHECK: $d0 = COPY [[DUP]](<2 x s32>)
- ; CHECK: RET_ReallyLR implicit $d0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0
+ ; CHECK-NEXT: [[DUP:%[0-9]+]]:fpr(<2 x s32>) = G_DUP [[COPY]](s32)
+ ; CHECK-NEXT: $d0 = COPY [[DUP]](<2 x s32>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $d0
%0:_(s32) = COPY $w0
%4:_(<2 x s32>) = G_DUP %0(s32)
$d0 = COPY %4(<2 x s32>)
@@ -79,10 +82,11 @@ body: |
; CHECK-LABEL: name: v4s32_fpr
; CHECK: liveins: $s0
- ; CHECK: [[COPY:%[0-9]+]]:fpr(s32) = COPY $s0
- ; CHECK: [[DUP:%[0-9]+]]:fpr(<4 x s32>) = G_DUP [[COPY]](s32)
- ; CHECK: $q0 = COPY [[DUP]](<4 x s32>)
- ; CHECK: RET_ReallyLR implicit $q0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr(s32) = COPY $s0
+ ; CHECK-NEXT: [[DUP:%[0-9]+]]:fpr(<4 x s32>) = G_DUP [[COPY]](s32)
+ ; CHECK-NEXT: $q0 = COPY [[DUP]](<4 x s32>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0
%0:_(s32) = COPY $s0
%4:_(<4 x s32>) = G_DUP %0(s32)
$q0 = COPY %4(<4 x s32>)
@@ -100,10 +104,11 @@ body: |
; CHECK-LABEL: name: v2s64_fpr
; CHECK: liveins: $d0
- ; CHECK: [[COPY:%[0-9]+]]:fpr(s64) = COPY $d0
- ; CHECK: [[DUP:%[0-9]+]]:fpr(<2 x s64>) = G_DUP [[COPY]](s64)
- ; CHECK: $q0 = COPY [[DUP]](<2 x s64>)
- ; CHECK: RET_ReallyLR implicit $q0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr(s64) = COPY $d0
+ ; CHECK-NEXT: [[DUP:%[0-9]+]]:fpr(<2 x s64>) = G_DUP [[COPY]](s64)
+ ; CHECK-NEXT: $q0 = COPY [[DUP]](<2 x s64>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0
%0:_(s64) = COPY $d0
%4:_(<2 x s64>) = G_DUP %0(s64)
$q0 = COPY %4(<2 x s64>)
@@ -121,10 +126,11 @@ body: |
; CHECK-LABEL: name: v2s32_fpr
; CHECK: liveins: $s0
- ; CHECK: [[COPY:%[0-9]+]]:fpr(s32) = COPY $s0
- ; CHECK: [[DUP:%[0-9]+]]:fpr(<2 x s32>) = G_DUP [[COPY]](s32)
- ; CHECK: $d0 = COPY [[DUP]](<2 x s32>)
- ; CHECK: RET_ReallyLR implicit $d0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr(s32) = COPY $s0
+ ; CHECK-NEXT: [[DUP:%[0-9]+]]:fpr(<2 x s32>) = G_DUP [[COPY]](s32)
+ ; CHECK-NEXT: $d0 = COPY [[DUP]](<2 x s32>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $d0
%0:_(s32) = COPY $s0
%4:_(<2 x s32>) = G_DUP %0(s32)
$d0 = COPY %4(<2 x s32>)
@@ -142,10 +148,11 @@ body: |
; CHECK-LABEL: name: v2s64_fpr_copy
; CHECK: liveins: $d0
- ; CHECK: [[COPY:%[0-9]+]]:fpr(s64) = COPY $d0
- ; CHECK: [[DUP:%[0-9]+]]:fpr(<2 x s64>) = G_DUP [[COPY]](s64)
- ; CHECK: $q0 = COPY [[DUP]](<2 x s64>)
- ; CHECK: RET_ReallyLR implicit $q0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr(s64) = COPY $d0
+ ; CHECK-NEXT: [[DUP:%[0-9]+]]:fpr(<2 x s64>) = G_DUP [[COPY]](s64)
+ ; CHECK-NEXT: $q0 = COPY [[DUP]](<2 x s64>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0
%0:_(s64) = COPY $d0
%6:_(<2 x s64>) = G_DUP %0(s64)
$q0 = COPY %6(<2 x s64>)
@@ -163,11 +170,13 @@ body: |
; CHECK-LABEL: name: v416s8_gpr
; CHECK: liveins: $w0
- ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0
- ; CHECK: %trunc:gpr(s8) = G_TRUNC [[COPY]](s32)
- ; CHECK: [[DUP:%[0-9]+]]:fpr(<16 x s8>) = G_DUP %trunc(s8)
- ; CHECK: $q0 = COPY [[DUP]](<16 x s8>)
- ; CHECK: RET_ReallyLR implicit $q0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0
+ ; CHECK-NEXT: %trunc:gpr(s8) = G_TRUNC [[COPY]](s32)
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:gpr(s32) = G_ANYEXT %trunc(s8)
+ ; CHECK-NEXT: [[DUP:%[0-9]+]]:fpr(<16 x s8>) = G_DUP [[ANYEXT]](s32)
+ ; CHECK-NEXT: $q0 = COPY [[DUP]](<16 x s8>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0
%0:_(s32) = COPY $w0
%trunc:_(s8) = G_TRUNC %0(s32)
%1:_(<16 x s8>) = G_DUP %trunc(s8)
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-dup.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-dup.mir
index 809bdceb4aa25..cf2bab78fe5a6 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/select-dup.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-dup.mir
@@ -453,3 +453,22 @@ body: |
%dup:fpr(<2 x p0>) = G_DUP %cst(p0)
$q0 = COPY %dup(<2 x p0>)
RET_ReallyLR implicit $q0
+...
+---
+name: cstv4i16gpri32
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins:
+ ; CHECK-LABEL: name: cstv4i16gpri32
+ ; CHECK: %dup:fpr64 = MOVIv4i16 3, 0
+ ; CHECK-NEXT: $d0 = COPY %dup
+ ; CHECK-NEXT: RET_ReallyLR implicit $d0
+ %cst:gpr(s32) = G_CONSTANT i32 3
+ %dup:fpr(<4 x s16>) = G_DUP %cst(s32)
+ $d0 = COPY %dup(<4 x s16>)
+ RET_ReallyLR implicit $d0
+
+...
diff --git a/llvm/test/CodeGen/AArch64/arm64-dup.ll b/llvm/test/CodeGen/AArch64/arm64-dup.ll
index 2bf5419e54830..0291f8c912304 100644
--- a/llvm/test/CodeGen/AArch64/arm64-dup.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-dup.ll
@@ -103,6 +103,19 @@ define <4 x i32> @v_dupQ32(i32 %A) nounwind {
ret <4 x i32> %tmp4
}
+define <4 x i16> @v_dup16_const(i16 %y, ptr %p) {
+; CHECK-LABEL: v_dup16_const:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi.4h v0, #10
+; CHECK-NEXT: mov w8, #10 // =0xa
+; CHECK-NEXT: strh w8, [x1]
+; CHECK-NEXT: ret
+ %i = insertelement <4 x i16> undef, i16 10, i32 0
+ %lo = shufflevector <4 x i16> %i, <4 x i16> undef, <4 x i32> zeroinitializer
+ store i16 10, ptr %p
+ ret <4 x i16> %lo
+}
+
define <4 x float> @v_dupQfloat(float %A) nounwind {
; CHECK-LABEL: v_dupQfloat:
; CHECK: // %bb.0:
@@ -420,9 +433,9 @@ define <4 x i16> @test_perfectshuffle_dupext_v4i16(<4 x i16> %a, <4 x i16> %b) n
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT: adrp x8, .LCPI33_0
+; CHECK-GI-NEXT: adrp x8, .LCPI34_0
; CHECK-GI-NEXT: mov.d v0[1], v1[0]
-; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI33_0]
+; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI34_0]
; CHECK-GI-NEXT: tbl.16b v0, { v0 }, v1
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-GI-NEXT: ret
@@ -443,9 +456,9 @@ define <4 x half> @test_perfectshuffle_dupext_v4f16(<4 x half> %a, <4 x half> %b
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT: adrp x8, .LCPI34_0
+; CHECK-GI-NEXT: adrp x8, .LCPI35_0
; CHECK-GI-NEXT: mov.d v0[1], v1[0]
-; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI34_0]
+; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI35_0]
; CHECK-GI-NEXT: tbl.16b v0, { v0 }, v1
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-GI-NEXT: ret
@@ -462,9 +475,9 @@ define <4 x i32> @test_perfectshuffle_dupext_v4i32(<4 x i32> %a, <4 x i32> %b) n
;
; CHECK-GI-LABEL: test_perfectshuffle_dupext_v4i32:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: adrp x8, .LCPI35_0
+; CHECK-GI-NEXT: adrp x8, .LCPI36_0
; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI35_0]
+; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI36_0]
; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2
; CHECK-GI-NEXT: ret
@@ -481,9 +494,9 @@ define <4 x float> @test_perfectshuffle_dupext_v4f32(<4 x float> %a, <4 x float>
;
; CHECK-GI-LABEL: test_perfectshuffle_dupext_v4f32:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: adrp x8, .LCPI36_0
+; CHECK-GI-NEXT: adrp x8, .LCPI37_0
; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI36_0]
+; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI37_0]
; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2
; CHECK-GI-NEXT: ret
@@ -503,12 +516,12 @@ define void @disguised_dup(<4 x float> %x, ptr %p1, ptr %p2) {
;
; CHECK-GI-LABEL: disguised_dup:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: adrp x8, .LCPI37_1
+; CHECK-GI-NEXT: adrp x8, .LCPI38_1
; CHECK-GI-NEXT: // kill: def $q0 killed $q0 def $q0_q1
-; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI37_1]
-; CHECK-GI-NEXT: adrp x8, .LCPI37_0
+; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI38_1]
+; CHECK-GI-NEXT: adrp x8, .LCPI38_0
; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2
-; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI37_0]
+; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI38_0]
; CHECK-GI-NEXT: tbl.16b v2, { v0, v1 }, v2
; CHECK-GI-NEXT: str q0, [x0]
; CHECK-GI-NEXT: str q2, [x1]
@@ -531,8 +544,8 @@ define <2 x i32> @dup_const2(<2 x i32> %A) nounwind {
;
; CHECK-GI-LABEL: dup_const2:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: adrp x8, .LCPI38_0
-; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI38_0]
+; CHECK-GI-NEXT: adrp x8, .LCPI39_0
+; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI39_0]
; CHECK-GI-NEXT: add.2s v0, v0, v1
; CHECK-GI-NEXT: ret
%tmp2 = add <2 x i32> %A, <i32 8421378, i32 8421378>
@@ -550,8 +563,8 @@ define <2 x i32> @dup_const4_ext(<4 x i32> %A) nounwind {
;
; CHECK-GI-LABEL: dup_const4_ext:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: adrp x8, .LCPI39_0
-; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI39_0]
+; CHECK-GI-NEXT: adrp x8, .LCPI40_0
+; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI40_0]
; CHECK-GI-NEXT: add.4s v0, v0, v1
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-GI-NEXT: ret
@@ -575,12 +588,12 @@ define <4 x i32> @dup_const24(<2 x i32> %A, <2 x i32> %B, <4 x i32> %C) nounwind
;
; CHECK-GI-LABEL: dup_const24:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: adrp x8, .LCPI40_1
+; CHECK-GI-NEXT: adrp x8, .LCPI41_1
; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT: ldr d3, [x8, :lo12:.LCPI40_1]
-; CHECK-GI-NEXT: adrp x8, .LCPI40_0
+; CHECK-GI-NEXT: ldr d3, [x8, :lo12:.LCPI41_1]
+; CHECK-GI-NEXT: adrp x8, .LCPI41_0
; CHECK-GI-NEXT: add.2s v0, v0, v3
-; CHECK-GI-NEXT: ldr q3, [x8, :lo12:.LCPI40_0]
+; CHECK-GI-NEXT: ldr q3, [x8, :lo12:.LCPI41_0]
; CHECK-GI-NEXT: mov.d v0[1], v1[0]
; CHECK-GI-NEXT: add.4s v1, v2, v3
; CHECK-GI-NEXT: eor.16b v0, v1, v0
@@ -687,3 +700,17 @@ define <8 x i16> @bitcast_v2f64_v8i16(<2 x i64> %a) {
ret <8 x i16> %r
}
+define <4 x i16> @dup_i16_v4i16_constant() {
+; CHECK-SD-LABEL: dup_i16_v4i16_constant:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: mov w8, #9211 // =0x23fb
+; CHECK-SD-NEXT: dup.4h v0, w8
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: dup_i16_v4i16_constant:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: adrp x8, .LCPI50_0
+; CHECK-GI-NEXT: ldr d0, [x8, :lo12:.LCPI50_0]
+; CHECK-GI-NEXT: ret
+ ret <4 x i16> <i16 9211, i16 9211, i16 9211, i16 9211>
+}
diff --git a/llvm/test/CodeGen/AArch64/neon-mov.ll b/llvm/test/CodeGen/AArch64/neon-mov.ll
index 170ba7292ae60..ff80ff097b28f 100644
--- a/llvm/test/CodeGen/AArch64/neon-mov.ll
+++ b/llvm/test/CodeGen/AArch64/neon-mov.ll
@@ -109,29 +109,11 @@ define <4 x i32> @movi4s_lsl16() {
}
define <4 x i32> @movi4s_fneg() {
-; CHECK-NOFP16-SD-LABEL: movi4s_fneg:
-; CHECK-NOFP16-SD: // %bb.0:
-; CHECK-NOFP16-SD-NEXT: movi v0.4s, #240, lsl #8
-; CHECK-NOFP16-SD-NEXT: fneg v0.4s, v0.4s
-; CHECK-NOFP16-SD-NEXT: ret
-;
-; CHECK-FP16-SD-LABEL: movi4s_fneg:
-; CHECK-FP16-SD: // %bb.0:
-; CHECK-FP16-SD-NEXT: movi v0.4s, #240, lsl #8
-; CHECK-FP16-SD-NEXT: fneg v0.4s, v0.4s
-; CHECK-FP16-SD-NEXT: ret
-;
-; CHECK-NOFP16-GI-LABEL: movi4s_fneg:
-; CHECK-NOFP16-GI: // %bb.0:
-; CHECK-NOFP16-GI-NEXT: movi v0.4s, #240, lsl #8
-; CHECK-NOFP16-GI-NEXT: fneg v0.4s, v0.4s
-; CHECK-NOFP16-GI-NEXT: ret
-;
-; CHECK-FP16-GI-LABEL: movi4s_fneg:
-; CHECK-FP16-GI: // %bb.0:
-; CHECK-FP16-GI-NEXT: movi v0.4s, #240, lsl #8
-; CHECK-FP16-GI-NEXT: fneg v0.4s, v0.4s
-; CHECK-FP16-GI-NEXT: ret
+; CHECK-LABEL: movi4s_fneg:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v0.4s, #240, lsl #8
+; CHECK-NEXT: fneg v0.4s, v0.4s
+; CHECK-NEXT: ret
ret <4 x i32> <i32 2147545088, i32 2147545088, i32 2147545088, i32 2147545088>
}
@@ -308,23 +290,17 @@ define <8 x i16> @mvni8h_neg() {
; CHECK-NOFP16-SD-NEXT: dup v0.8h, w8
; CHECK-NOFP16-SD-NEXT: ret
;
-; CHECK-FP16-SD-LABEL: mvni8h_neg:
-; CHECK-FP16-SD: // %bb.0:
-; CHECK-FP16-SD-NEXT: movi v0.8h, #240
-; CHECK-FP16-SD-NEXT: fneg v0.8h, v0.8h
-; CHECK-FP16-SD-NEXT: ret
+; CHECK-FP16-LABEL: mvni8h_neg:
+; CHECK-FP16: // %bb.0:
+; CHECK-FP16-NEXT: movi v0.8h, #240
+; CHECK-FP16-NEXT: fneg v0.8h, v0.8h
+; CHECK-FP16-NEXT: ret
;
; CHECK-NOFP16-GI-LABEL: mvni8h_neg:
; CHECK-NOFP16-GI: // %bb.0:
; CHECK-NOFP16-GI-NEXT: adrp x8, .LCPI32_0
; CHECK-NOFP16-GI-NEXT: ldr q0, [x8, :lo12:.LCPI32_0]
; CHECK-NOFP16-GI-NEXT: ret
-;
-; CHECK-FP16-GI-LABEL: mvni8h_neg:
-; CHECK-FP16-GI: // %bb.0:
-; CHECK-FP16-GI-NEXT: movi v0.8h, #240
-; CHECK-FP16-GI-NEXT: fneg v0.8h, v0.8h
-; CHECK-FP16-GI-NEXT: ret
ret <8 x i16> <i16 33008, i16 33008, i16 33008, i16 33008, i16 33008, i16 33008, i16 33008, i16 33008>
}
@@ -494,29 +470,11 @@ define <2 x double> @fmov2d() {
}
define <2 x double> @fmov2d_neg0() {
-; CHECK-NOFP16-SD-LABEL: fmov2d_neg0:
-; CHECK-NOFP16-SD: // %bb.0:
-; CHECK-NOFP16-SD-NEXT: movi v0.2d, #0000000000000000
-; CHECK-NOFP16-SD-NEXT: fneg v0.2d, v0.2d
-; CHECK-NOFP16-SD-NEXT: ret
-;
-; CHECK-FP16-SD-LABEL: fmov2d_neg0:
-; CHECK-FP16-SD: // %bb.0:
-; CHECK-FP16-SD-NEXT: movi v0.2d, #0000000000000000
-; CHECK-FP16-SD-NEXT: fneg v0.2d, v0.2d
-; CHECK-FP16-SD-NEXT: ret
-;
-; CHECK-NOFP16-GI-LABEL: fmov2d_neg0:
-; CHECK-NOFP16-GI: // %bb.0:
-; CHECK-NOFP16-GI-NEXT: movi v0.2d, #0000000000000000
-; CHECK-NOFP16-GI-NEXT: fneg v0.2d, v0.2d
-; CHECK-NOFP16-GI-NEXT: ret
-;
-; CHECK-FP16-GI-LABEL: fmov2d_neg0:
-; CHECK-FP16-GI: // %bb.0:
-; CHECK-FP16-GI-NEXT: movi v0.2d, #0000000000000000
-; CHECK-FP16-GI-NEXT: fneg v0.2d, v0.2d
-; CHECK-FP16-GI-NEXT: ret
+; CHECK-LABEL: fmov2d_neg0:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v0.2d, #0000000000000000
+; CHECK-NEXT: fneg v0.2d, v0.2d
+; CHECK-NEXT: ret
ret <2 x double> <double -0.0, double -0.0>
}
@@ -581,5 +539,4 @@ define <2 x i32> @movi1d() {
ret <2 x i32> %1
}
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; CHECK-FP16: {{.*}}
; CHECK-NOFP16: {{.*}}
More information about the llvm-commits
mailing list