[PATCH] D86789: [DAGCombiner] Fold an AND of a masked load into a zext_masked_load

Tue Sep 1 06:55:21 PDT 2020

samtebbs added inline comments.


================
Comment at: llvm/test/CodeGen/Thumb2/mve-zext-masked-load.ll:2
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp,+fp64 -verify-machineinstrs -o - %s | FileCheck %s
+
----------------
samtebbs wrote:
> samtebbs wrote:
> > dmgreen wrote:
> > > It can be good to show before and after in the tests, to make the differences clearer.
> > I've added extra checks to show what was generated before.
> It turns out I misunderstood. Here is the difference between codegen with and without this patch.{F12829520}
```

diff --git a/llvm/test/CodeGen/Thumb2/mve-zext-masked-load.ll b/llvm/test/CodeGen/Thumb2/mve-zext-masked-load.ll
index 5db6637ca81..9696827d846 100644
--- a/llvm/test/CodeGen/Thumb2/mve-zext-masked-load.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-zext-masked-load.ll
@@ -7,10 +7,8 @@ define arm_aapcs_vfpcc <4 x float> @foo_v4i16(<4 x i16>* nocapture readonly %pSr
 ; CHECK-NEXT:    vmovlb.s16 q0, q0
 ; CHECK-NEXT:    vpt.s32 lt, q0, zr
 ; CHECK-NEXT:    vldrht.u32 q0, [r0]
-; CHECK-NEXT:    vmovlb.u16 q0, q0
 ; CHECK-NEXT:    vcvt.f32.u32 q0, q0
 ; CHECK-NEXT:    bx lr
-; CHECK-OLD-NEXT:    vmovlb.u16 q0, q0
 entry:
   %active.lane.mask = icmp slt <4 x i16> %a, zeroinitializer
   %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %pSrc, i32 2, <4 x i1> %active.lane.mask, <4 x i16> undef)
@@ -24,10 +22,8 @@ define arm_aapcs_vfpcc <8 x half> @foo_v8i8(<8 x i8>* nocapture readonly %pSrc,
 ; CHECK-NEXT:    vmovlb.s8 q0, q0
 ; CHECK-NEXT:    vpt.s16 lt, q0, zr
 ; CHECK-NEXT:    vldrbt.u16 q0, [r0]
-; CHECK-NEXT:    vmovlb.u8 q0, q0
 ; CHECK-NEXT:    vcvt.f16.u16 q0, q0
 ; CHECK-NEXT:    bx lr
-; CHECK-OLD-NEXT:    vmovlb.u8 q0, q0
 entry:
   %active.lane.mask = icmp slt <8 x i8> %a, zeroinitializer
   %wide.masked.load = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %pSrc, i32 1, <8 x i1> %active.lane.mask, <8 x i8> undef)
@@ -39,15 +35,11 @@ define arm_aapcs_vfpcc <4 x float> @foo_v4i8(<4 x i8>* nocapture readonly %pSrc,
 ; CHECK-LABEL: foo_v4i8:
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    vmovlb.s8 q0, q0
-; CHECK-NEXT:    vmov.i32 q1, #0xff
 ; CHECK-NEXT:    vmovlb.s16 q0, q0
 ; CHECK-NEXT:    vpt.s32 lt, q0, zr
 ; CHECK-NEXT:    vldrbt.u32 q0, [r0]
-; CHECK-NEXT:    vand q0, q0, q1
 ; CHECK-NEXT:    vcvt.f32.u32 q0, q0
 ; CHECK-NEXT:    bx lr
-; CHECK-OLD-NEXT:    vmov.i32 q1, #0xff
-; CHECK-OLD-NEXT:    vand q0, q0, q1
 entry:
   %active.lane.mask = icmp slt <4 x i8> %a, zeroinitializer
   %wide.masked.load = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %pSrc, i32 1, <4 x i1> %active.lane.mask, <4 x i8> undef)


```


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D86789/new/

https://reviews.llvm.org/D86789