[llvm] 79b21fc - [AArch64][GlobalISel] Fix bug in fewVectorElts action while legalizing oversize G_FPTRUNC vectors.

Amara Emerson via llvm-commits llvm-commits at lists.llvm.org
Thu Sep 17 08:56:36 PDT 2020


Author: Amara Emerson
Date: 2020-09-17T08:56:26-07:00
New Revision: 79b21fc187643416dbd21db10abe46a91b4c3f09

URL: https://github.com/llvm/llvm-project/commit/79b21fc187643416dbd21db10abe46a91b4c3f09
DIFF: https://github.com/llvm/llvm-project/commit/79b21fc187643416dbd21db10abe46a91b4c3f09.diff

LOG: [AArch64][GlobalISel] Fix bug in fewVectorElts action while legalizing oversize G_FPTRUNC vectors.

For <8 x s32> = fptrunc <8 x s64> the fewerElementsVector action tries to break
down the source vector into the final source vectors of <2 x s64> using unmerge.
This fixes a crash due to using the wrong number of elements for the breakdown
type.

Also add some legalizer tests for explicitly G_FPTRUNC which we didn't have.

Differential Revision: https://reviews.llvm.org/D87814

Added: 
    llvm/test/CodeGen/AArch64/GlobalISel/legalize-fptrunc.mir

Modified: 
    llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
    llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
    llvm/test/CodeGen/AArch64/arm64-vcvt.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index a8283e47acdd..e8ddfc8e083e 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -3285,7 +3285,7 @@ LegalizerHelper::fewerElementsVectorCasts(MachineInstr &MI, unsigned TypeIdx,
     if (NumParts * NarrowTy.getNumElements() != DstTy.getNumElements())
       return UnableToLegalize;
 
-    NarrowTy1 = LLT::vector(NumParts, SrcTy.getElementType().getSizeInBits());
+    NarrowTy1 = LLT::vector(NarrowTy.getNumElements(), SrcTy.getElementType());
   } else {
     NumParts = DstTy.getNumElements();
     NarrowTy1 = SrcTy.getElementType();

diff  --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 6b98e7a58328..f162f148f09d 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -384,7 +384,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
 
   // FP conversions
   getActionDefinitionsBuilder(G_FPTRUNC).legalFor(
-      {{s16, s32}, {s16, s64}, {s32, s64}, {v4s16, v4s32}, {v2s32, v2s64}});
+      {{s16, s32}, {s16, s64}, {s32, s64}, {v4s16, v4s32}, {v2s32, v2s64}})
+      .clampMaxNumElements(0, s32, 2);
   getActionDefinitionsBuilder(G_FPEXT).legalFor(
       {{s32, s16}, {s64, s16}, {s64, s32}, {v4s32, v4s16}, {v2s64, v2s32}});
 

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fptrunc.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fptrunc.mir
new file mode 100644
index 000000000000..381bd03cf19c
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fptrunc.mir
@@ -0,0 +1,139 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -verify-machineinstrs -mtriple aarch64-unknown-unknown -run-pass=legalizer -O0 -global-isel %s -o - | FileCheck %s
+---
+name:            fptrunc_s16_s32
+body:             |
+  bb.0:
+    liveins: $s0
+
+    ; CHECK-LABEL: name: fptrunc_s16_s32
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $s0
+    ; CHECK: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[COPY]](s32)
+    ; CHECK: $h0 = COPY [[FPTRUNC]](s16)
+    ; CHECK: RET_ReallyLR implicit $h0
+    %0:_(s32) = COPY $s0
+    %1:_(s16) = G_FPTRUNC %0
+    $h0 = COPY %1(s16)
+    RET_ReallyLR implicit $h0
+...
+---
+name:            fptrunc_s16_s64
+body:             |
+  bb.0:
+    liveins: $d0
+
+    ; CHECK-LABEL: name: fptrunc_s16_s64
+    ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $d0
+    ; CHECK: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[COPY]](s64)
+    ; CHECK: $h0 = COPY [[FPTRUNC]](s16)
+    ; CHECK: RET_ReallyLR implicit $h0
+    %0:_(s64) = COPY $d0
+    %1:_(s16) = G_FPTRUNC %0
+    $h0 = COPY %1(s16)
+    RET_ReallyLR implicit $h0
+...
+---
+name:            fptrunc_s32_s64
+body:             |
+  bb.0:
+    liveins: $d0
+
+    ; CHECK-LABEL: name: fptrunc_s32_s64
+    ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $d0
+    ; CHECK: [[FPTRUNC:%[0-9]+]]:_(s32) = G_FPTRUNC [[COPY]](s64)
+    ; CHECK: $s0 = COPY [[FPTRUNC]](s32)
+    ; CHECK: RET_ReallyLR implicit $s0
+    %0:_(s64) = COPY $d0
+    %1:_(s32) = G_FPTRUNC %0
+    $s0 = COPY %1(s32)
+    RET_ReallyLR implicit $s0
+...
+---
+name:            fptrunc_v4s16_v4s32
+body:             |
+  bb.0:
+    liveins: $q0
+
+    ; CHECK-LABEL: name: fptrunc_v4s16_v4s32
+    ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+    ; CHECK: [[FPTRUNC:%[0-9]+]]:_(<4 x s16>) = G_FPTRUNC [[COPY]](<4 x s32>)
+    ; CHECK: $d0 = COPY [[FPTRUNC]](<4 x s16>)
+    ; CHECK: RET_ReallyLR implicit $d0
+    %0:_(<4 x s32>) = COPY $q0
+    %1:_(<4 x s16>) = G_FPTRUNC %0
+    $d0 = COPY %1(<4 x s16>)
+    RET_ReallyLR implicit $d0
+...
+---
+name:            fptrunc_v2s16_v2s32
+body:             |
+  bb.0:
+    liveins: $d0
+
+    ; CHECK-LABEL: name: fptrunc_v2s16_v2s32
+    ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
+    ; CHECK: [[FPTRUNC:%[0-9]+]]:_(<2 x s16>) = G_FPTRUNC [[COPY]](<2 x s32>)
+    ; CHECK: $s0 = COPY [[FPTRUNC]](<2 x s16>)
+    ; CHECK: RET_ReallyLR implicit $s0
+    %0:_(<2 x s32>) = COPY $d0
+    %1:_(<2 x s16>) = G_FPTRUNC %0
+    $s0 = COPY %1(<2 x s16>)
+    RET_ReallyLR implicit $s0
+...
+---
+name:            fptrunc_v4s32_v4s64
+body:             |
+  bb.0:
+
+    ; CHECK-LABEL: name: fptrunc_v4s32_v4s64
+    ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s64>) = G_IMPLICIT_DEF
+    ; CHECK: [[FPTRUNC:%[0-9]+]]:_(<2 x s32>) = G_FPTRUNC [[DEF]](<2 x s64>)
+    ; CHECK: [[FPTRUNC1:%[0-9]+]]:_(<2 x s32>) = G_FPTRUNC [[DEF]](<2 x s64>)
+    ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[FPTRUNC]](<2 x s32>), [[FPTRUNC1]](<2 x s32>)
+    ; CHECK: $q0 = COPY [[CONCAT_VECTORS]](<4 x s32>)
+    ; CHECK: RET_ReallyLR implicit $q0
+    %0:_(<4 x s64>) = G_IMPLICIT_DEF
+    %1:_(<4 x s32>) = G_FPTRUNC %0
+    $q0 = COPY %1(<4 x s32>)
+    RET_ReallyLR implicit $q0
+...
+---
+name:            fptrunc_v8s32_v8s64
+body:             |
+  bb.0:
+
+    liveins: $x0, $q0, $q1, $q2, $q3, $x0
+
+    ; CHECK-LABEL: name: fptrunc_v8s32_v8s64
+    ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
+    ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1
+    ; CHECK: [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $q2
+    ; CHECK: [[COPY3:%[0-9]+]]:_(<2 x s64>) = COPY $q3
+    ; CHECK: [[COPY4:%[0-9]+]]:_(p0) = COPY $x0
+    ; CHECK: [[FPTRUNC:%[0-9]+]]:_(<2 x s32>) = G_FPTRUNC [[COPY]](<2 x s64>)
+    ; CHECK: [[FPTRUNC1:%[0-9]+]]:_(<2 x s32>) = G_FPTRUNC [[COPY1]](<2 x s64>)
+    ; CHECK: [[FPTRUNC2:%[0-9]+]]:_(<2 x s32>) = G_FPTRUNC [[COPY2]](<2 x s64>)
+    ; CHECK: [[FPTRUNC3:%[0-9]+]]:_(<2 x s32>) = G_FPTRUNC [[COPY3]](<2 x s64>)
+    ; CHECK: [[COPY5:%[0-9]+]]:_(p0) = COPY $x0
+    ; CHECK: G_STORE [[FPTRUNC]](<2 x s32>), [[COPY5]](p0) :: (store 8, align 32)
+    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY5]], [[C]](s64)
+    ; CHECK: G_STORE [[FPTRUNC1]](<2 x s32>), [[PTR_ADD]](p0) :: (store 8 + 8)
+    ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY5]], [[C1]](s64)
+    ; CHECK: G_STORE [[FPTRUNC2]](<2 x s32>), [[PTR_ADD1]](p0) :: (store 8 + 16, align 16)
+    ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 24
+    ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY5]], [[C2]](s64)
+    ; CHECK: G_STORE [[FPTRUNC3]](<2 x s32>), [[PTR_ADD2]](p0) :: (store 8 + 24)
+    ; CHECK: RET_ReallyLR
+    %2:_(<2 x s64>) = COPY $q0
+    %3:_(<2 x s64>) = COPY $q1
+    %4:_(<2 x s64>) = COPY $q2
+    %5:_(<2 x s64>) = COPY $q3
+    %0:_(<8 x s64>) = G_CONCAT_VECTORS %2(<2 x s64>), %3(<2 x s64>), %4(<2 x s64>), %5(<2 x s64>)
+    %1:_(p0) = COPY $x0
+    %6:_(<8 x s32>) = G_FPTRUNC %0(<8 x s64>)
+    %7:_(p0) = COPY $x0
+    G_STORE %6(<8 x s32>), %7(p0) :: (store 32)
+    RET_ReallyLR
+...

diff  --git a/llvm/test/CodeGen/AArch64/arm64-vcvt.ll b/llvm/test/CodeGen/AArch64/arm64-vcvt.ll
index 9ab724767707..67eba3f4e307 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vcvt.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vcvt.ll
@@ -3,6 +3,7 @@
 ; RUN: -aarch64-neon-syntax=apple -global-isel -global-isel-abort=2 2>&1 | \
 ; RUN: FileCheck %s --check-prefixes=FALLBACK,CHECK
 
+; FALLBACK-NOT: remark{{.*}}fcvtas_2s
 define <2 x i32> @fcvtas_2s(<2 x float> %A) nounwind {
 ;CHECK-LABEL: fcvtas_2s:
 ;CHECK-NOT: ld1
@@ -12,6 +13,7 @@ define <2 x i32> @fcvtas_2s(<2 x float> %A) nounwind {
 	ret <2 x i32> %tmp3
 }
 
+; FALLBACK-NOT: remark{{.*}}fcvtas_4s
 define <4 x i32> @fcvtas_4s(<4 x float> %A) nounwind {
 ;CHECK-LABEL: fcvtas_4s:
 ;CHECK-NOT: ld1
@@ -21,6 +23,7 @@ define <4 x i32> @fcvtas_4s(<4 x float> %A) nounwind {
 	ret <4 x i32> %tmp3
 }
 
+; FALLBACK-NOT: remark{{.*}}fcvtas_2d
 define <2 x i64> @fcvtas_2d(<2 x double> %A) nounwind {
 ;CHECK-LABEL: fcvtas_2d:
 ;CHECK-NOT: ld1


        


More information about the llvm-commits mailing list