[PATCH] D100025: [CodeGen][AArch64] Fix isel crash for truncating FP stores

Wed Apr 7 01:52:46 PDT 2021

david-arm created this revision.
david-arm added reviewers: sdesmalen, CarolineConcatto, joechrisellis.
Herald added subscribers: danielkiss, hiraditya, kristof.beyls.
david-arm requested review of this revision.
Herald added a project: LLVM.
Herald added a subscriber: llvm-commits.

When attempting to truncate a FP vector and store the result out
to memory we crashed because we had no pattern for truncating FP
stores. In fact, we don't support these types of stores and the
correct fix is to stop marking these truncating stores as legal.

Tests have been added here:

  CodeGen/AArch64/sve-fptrunc-store.ll


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D100025

Files:
  llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
  llvm/test/CodeGen/AArch64/sve-fptrunc-store.ll


Index: llvm/test/CodeGen/AArch64/sve-fptrunc-store.ll
===================================================================

--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve-fptrunc-store.ll
@@ -0,0 +1,66 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
+; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
+
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
+; WARN-NOT: warning
+
+define void @fptrunc2_f64_f32(<vscale x 2 x float> *%dst, <vscale x 2 x double> *%src) {
+; CHECK-LABEL: fptrunc2_f64_f32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x1]
+; CHECK-NEXT:    fcvt z0.s, p0/m, z0.d
+; CHECK-NEXT:    st1w { z0.d }, p0, [x0]
+; CHECK-NEXT:    ret
+entry:
+  %0 = load <vscale x 2 x double>, <vscale x 2 x double>* %src, align 8
+  %1 = fptrunc <vscale x 2 x double> %0 to <vscale x 2 x float>
+  store <vscale x 2 x float> %1, <vscale x 2 x float>* %dst, align 4
+  ret void
+}
+
+define void @fptrunc2_f64_f16(<vscale x 2 x half> *%dst, <vscale x 2 x double> *%src) {
+; CHECK-LABEL: fptrunc2_f64_f16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x1]
+; CHECK-NEXT:    fcvt z0.h, p0/m, z0.d
+; CHECK-NEXT:    st1h { z0.d }, p0, [x0]
+; CHECK-NEXT:    ret
+entry:
+  %0 = load <vscale x 2 x double>, <vscale x 2 x double>* %src, align 8
+  %1 = fptrunc <vscale x 2 x double> %0 to <vscale x 2 x half>
+  store <vscale x 2 x half> %1, <vscale x 2 x half>* %dst, align 2
+  ret void
+}
+
+define void @fptrunc4_f32_f16(<vscale x 4 x half> *%dst, <vscale x 4 x float> *%src) {
+; CHECK-LABEL: fptrunc4_f32_f16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x1]
+; CHECK-NEXT:    fcvt z0.h, p0/m, z0.s
+; CHECK-NEXT:    st1h { z0.s }, p0, [x0]
+; CHECK-NEXT:    ret
+entry:
+  %0 = load <vscale x 4 x float>, <vscale x 4 x float>* %src, align 8
+  %1 = fptrunc <vscale x 4 x float> %0 to <vscale x 4 x half>
+  store <vscale x 4 x half> %1, <vscale x 4 x half>* %dst, align 2
+  ret void
+}
+
+define void @fptrunc2_f32_f16(<vscale x 2 x half> *%dst, <vscale x 2 x float> *%src) {
+; CHECK-LABEL: fptrunc2_f32_f16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    ld1w { z0.d }, p0/z, [x1]
+; CHECK-NEXT:    fcvt z0.h, p0/m, z0.s
+; CHECK-NEXT:    st1h { z0.d }, p0, [x0]
+; CHECK-NEXT:    ret
+entry:
+  %0 = load <vscale x 2 x float>, <vscale x 2 x float>* %src, align 8
+  %1 = fptrunc <vscale x 2 x float> %0 to <vscale x 2 x half>
+  store <vscale x 2 x half> %1, <vscale x 2 x half>* %dst, align 2
+  ret void
+}
Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
===================================================================
--- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1180,6 +1180,10 @@
 
     for (auto VT : {MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv2f32,
                     MVT::nxv4f32, MVT::nxv2f64}) {
+      for (auto InnerVT : {MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16,
+                           MVT::nxv2f32, MVT::nxv4f32, MVT::nxv2f64})
+        setTruncStoreAction(VT, InnerVT, Expand);
+
       setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
       setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
       setOperationAction(ISD::MGATHER, VT, Custom);


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D100025.335755.patch
Type: text/x-patch
Size: 3554 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20210407/332a027a/attachment.bin>