[llvm] 79b21fc - [AArch64][GlobalISel] Fix bug in fewVectorElts action while legalizing oversize G_FPTRUNC vectors.
Amara Emerson via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 17 08:56:36 PDT 2020
Author: Amara Emerson
Date: 2020-09-17T08:56:26-07:00
New Revision: 79b21fc187643416dbd21db10abe46a91b4c3f09
URL: https://github.com/llvm/llvm-project/commit/79b21fc187643416dbd21db10abe46a91b4c3f09
DIFF: https://github.com/llvm/llvm-project/commit/79b21fc187643416dbd21db10abe46a91b4c3f09.diff
LOG: [AArch64][GlobalISel] Fix bug in fewVectorElts action while legalizing oversize G_FPTRUNC vectors.
For <8 x s32> = fptrunc <8 x s64> the fewerElementsVector action tries to break
down the source vector into the final source vectors of <2 x s64> using unmerge.
This fixes a crash due to using the wrong number of elements for the breakdown
type.
Also add some legalizer tests for explicitly G_FPTRUNC which we didn't have.
Differential Revision: https://reviews.llvm.org/D87814
Added:
llvm/test/CodeGen/AArch64/GlobalISel/legalize-fptrunc.mir
Modified:
llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
llvm/test/CodeGen/AArch64/arm64-vcvt.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index a8283e47acdd..e8ddfc8e083e 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -3285,7 +3285,7 @@ LegalizerHelper::fewerElementsVectorCasts(MachineInstr &MI, unsigned TypeIdx,
if (NumParts * NarrowTy.getNumElements() != DstTy.getNumElements())
return UnableToLegalize;
- NarrowTy1 = LLT::vector(NumParts, SrcTy.getElementType().getSizeInBits());
+ NarrowTy1 = LLT::vector(NarrowTy.getNumElements(), SrcTy.getElementType());
} else {
NumParts = DstTy.getNumElements();
NarrowTy1 = SrcTy.getElementType();
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 6b98e7a58328..f162f148f09d 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -384,7 +384,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
// FP conversions
getActionDefinitionsBuilder(G_FPTRUNC).legalFor(
- {{s16, s32}, {s16, s64}, {s32, s64}, {v4s16, v4s32}, {v2s32, v2s64}});
+ {{s16, s32}, {s16, s64}, {s32, s64}, {v4s16, v4s32}, {v2s32, v2s64}})
+ .clampMaxNumElements(0, s32, 2);
getActionDefinitionsBuilder(G_FPEXT).legalFor(
{{s32, s16}, {s64, s16}, {s64, s32}, {v4s32, v4s16}, {v2s64, v2s32}});
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fptrunc.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fptrunc.mir
new file mode 100644
index 000000000000..381bd03cf19c
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fptrunc.mir
@@ -0,0 +1,139 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -verify-machineinstrs -mtriple aarch64-unknown-unknown -run-pass=legalizer -O0 -global-isel %s -o - | FileCheck %s
+---
+name: fptrunc_s16_s32
+body: |
+ bb.0:
+ liveins: $s0
+
+ ; CHECK-LABEL: name: fptrunc_s16_s32
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $s0
+ ; CHECK: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[COPY]](s32)
+ ; CHECK: $h0 = COPY [[FPTRUNC]](s16)
+ ; CHECK: RET_ReallyLR implicit $h0
+ %0:_(s32) = COPY $s0
+ %1:_(s16) = G_FPTRUNC %0
+ $h0 = COPY %1(s16)
+ RET_ReallyLR implicit $h0
+...
+---
+name: fptrunc_s16_s64
+body: |
+ bb.0:
+ liveins: $d0
+
+ ; CHECK-LABEL: name: fptrunc_s16_s64
+ ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $d0
+ ; CHECK: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[COPY]](s64)
+ ; CHECK: $h0 = COPY [[FPTRUNC]](s16)
+ ; CHECK: RET_ReallyLR implicit $h0
+ %0:_(s64) = COPY $d0
+ %1:_(s16) = G_FPTRUNC %0
+ $h0 = COPY %1(s16)
+ RET_ReallyLR implicit $h0
+...
+---
+name: fptrunc_s32_s64
+body: |
+ bb.0:
+ liveins: $d0
+
+ ; CHECK-LABEL: name: fptrunc_s32_s64
+ ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $d0
+ ; CHECK: [[FPTRUNC:%[0-9]+]]:_(s32) = G_FPTRUNC [[COPY]](s64)
+ ; CHECK: $s0 = COPY [[FPTRUNC]](s32)
+ ; CHECK: RET_ReallyLR implicit $s0
+ %0:_(s64) = COPY $d0
+ %1:_(s32) = G_FPTRUNC %0
+ $s0 = COPY %1(s32)
+ RET_ReallyLR implicit $s0
+...
+---
+name: fptrunc_v4s16_v4s32
+body: |
+ bb.0:
+ liveins: $q0
+
+ ; CHECK-LABEL: name: fptrunc_v4s16_v4s32
+ ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+ ; CHECK: [[FPTRUNC:%[0-9]+]]:_(<4 x s16>) = G_FPTRUNC [[COPY]](<4 x s32>)
+ ; CHECK: $d0 = COPY [[FPTRUNC]](<4 x s16>)
+ ; CHECK: RET_ReallyLR implicit $d0
+ %0:_(<4 x s32>) = COPY $q0
+ %1:_(<4 x s16>) = G_FPTRUNC %0
+ $d0 = COPY %1(<4 x s16>)
+ RET_ReallyLR implicit $d0
+...
+---
+name: fptrunc_v2s16_v2s32
+body: |
+ bb.0:
+ liveins: $d0
+
+ ; CHECK-LABEL: name: fptrunc_v2s16_v2s32
+ ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
+ ; CHECK: [[FPTRUNC:%[0-9]+]]:_(<2 x s16>) = G_FPTRUNC [[COPY]](<2 x s32>)
+ ; CHECK: $s0 = COPY [[FPTRUNC]](<2 x s16>)
+ ; CHECK: RET_ReallyLR implicit $s0
+ %0:_(<2 x s32>) = COPY $d0
+ %1:_(<2 x s16>) = G_FPTRUNC %0
+ $s0 = COPY %1(<2 x s16>)
+ RET_ReallyLR implicit $s0
+...
+---
+name: fptrunc_v4s32_v4s64
+body: |
+ bb.0:
+
+ ; CHECK-LABEL: name: fptrunc_v4s32_v4s64
+ ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s64>) = G_IMPLICIT_DEF
+ ; CHECK: [[FPTRUNC:%[0-9]+]]:_(<2 x s32>) = G_FPTRUNC [[DEF]](<2 x s64>)
+ ; CHECK: [[FPTRUNC1:%[0-9]+]]:_(<2 x s32>) = G_FPTRUNC [[DEF]](<2 x s64>)
+ ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[FPTRUNC]](<2 x s32>), [[FPTRUNC1]](<2 x s32>)
+ ; CHECK: $q0 = COPY [[CONCAT_VECTORS]](<4 x s32>)
+ ; CHECK: RET_ReallyLR implicit $q0
+ %0:_(<4 x s64>) = G_IMPLICIT_DEF
+ %1:_(<4 x s32>) = G_FPTRUNC %0
+ $q0 = COPY %1(<4 x s32>)
+ RET_ReallyLR implicit $q0
+...
+---
+name: fptrunc_v8s32_v8s64
+body: |
+ bb.0:
+
+ liveins: $x0, $q0, $q1, $q2, $q3, $x0
+
+ ; CHECK-LABEL: name: fptrunc_v8s32_v8s64
+ ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1
+ ; CHECK: [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $q2
+ ; CHECK: [[COPY3:%[0-9]+]]:_(<2 x s64>) = COPY $q3
+ ; CHECK: [[COPY4:%[0-9]+]]:_(p0) = COPY $x0
+ ; CHECK: [[FPTRUNC:%[0-9]+]]:_(<2 x s32>) = G_FPTRUNC [[COPY]](<2 x s64>)
+ ; CHECK: [[FPTRUNC1:%[0-9]+]]:_(<2 x s32>) = G_FPTRUNC [[COPY1]](<2 x s64>)
+ ; CHECK: [[FPTRUNC2:%[0-9]+]]:_(<2 x s32>) = G_FPTRUNC [[COPY2]](<2 x s64>)
+ ; CHECK: [[FPTRUNC3:%[0-9]+]]:_(<2 x s32>) = G_FPTRUNC [[COPY3]](<2 x s64>)
+ ; CHECK: [[COPY5:%[0-9]+]]:_(p0) = COPY $x0
+ ; CHECK: G_STORE [[FPTRUNC]](<2 x s32>), [[COPY5]](p0) :: (store 8, align 32)
+ ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+ ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY5]], [[C]](s64)
+ ; CHECK: G_STORE [[FPTRUNC1]](<2 x s32>), [[PTR_ADD]](p0) :: (store 8 + 8)
+ ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+ ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY5]], [[C1]](s64)
+ ; CHECK: G_STORE [[FPTRUNC2]](<2 x s32>), [[PTR_ADD1]](p0) :: (store 8 + 16, align 16)
+ ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 24
+ ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY5]], [[C2]](s64)
+ ; CHECK: G_STORE [[FPTRUNC3]](<2 x s32>), [[PTR_ADD2]](p0) :: (store 8 + 24)
+ ; CHECK: RET_ReallyLR
+ %2:_(<2 x s64>) = COPY $q0
+ %3:_(<2 x s64>) = COPY $q1
+ %4:_(<2 x s64>) = COPY $q2
+ %5:_(<2 x s64>) = COPY $q3
+ %0:_(<8 x s64>) = G_CONCAT_VECTORS %2(<2 x s64>), %3(<2 x s64>), %4(<2 x s64>), %5(<2 x s64>)
+ %1:_(p0) = COPY $x0
+ %6:_(<8 x s32>) = G_FPTRUNC %0(<8 x s64>)
+ %7:_(p0) = COPY $x0
+ G_STORE %6(<8 x s32>), %7(p0) :: (store 32)
+ RET_ReallyLR
+...
diff --git a/llvm/test/CodeGen/AArch64/arm64-vcvt.ll b/llvm/test/CodeGen/AArch64/arm64-vcvt.ll
index 9ab724767707..67eba3f4e307 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vcvt.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vcvt.ll
@@ -3,6 +3,7 @@
; RUN: -aarch64-neon-syntax=apple -global-isel -global-isel-abort=2 2>&1 | \
; RUN: FileCheck %s --check-prefixes=FALLBACK,CHECK
+; FALLBACK-NOT: remark{{.*}}fcvtas_2s
define <2 x i32> @fcvtas_2s(<2 x float> %A) nounwind {
;CHECK-LABEL: fcvtas_2s:
;CHECK-NOT: ld1
@@ -12,6 +13,7 @@ define <2 x i32> @fcvtas_2s(<2 x float> %A) nounwind {
ret <2 x i32> %tmp3
}
+; FALLBACK-NOT: remark{{.*}}fcvtas_4s
define <4 x i32> @fcvtas_4s(<4 x float> %A) nounwind {
;CHECK-LABEL: fcvtas_4s:
;CHECK-NOT: ld1
@@ -21,6 +23,7 @@ define <4 x i32> @fcvtas_4s(<4 x float> %A) nounwind {
ret <4 x i32> %tmp3
}
+; FALLBACK-NOT: remark{{.*}}fcvtas_2d
define <2 x i64> @fcvtas_2d(<2 x double> %A) nounwind {
;CHECK-LABEL: fcvtas_2d:
;CHECK-NOT: ld1
More information about the llvm-commits
mailing list