[llvm] 5a333dc - [AArch64][GlobalISel] Improve legalization for odd-type G_LOAD

Jessica Paquette via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 28 17:20:29 PDT 2021


Author: Jessica Paquette
Date: 2021-07-28T17:19:14-07:00
New Revision: 5a333dc5da9fac3407b78d52ad3bc18049f8a73b

URL: https://github.com/llvm/llvm-project/commit/5a333dc5da9fac3407b78d52ad3bc18049f8a73b
DIFF: https://github.com/llvm/llvm-project/commit/5a333dc5da9fac3407b78d52ad3bc18049f8a73b.diff

LOG: [AArch64][GlobalISel] Improve legalization for odd-type G_LOAD

Swap the order of widening so that we widen to the next power-of-2 first when
legalizing G_LOAD.

Also, provide a minimum type for the power of 2 to disallow s2 + s1. Clamping
ought to disallow s2 and s1, but I think it's better to be explicit about the
expected minimum size.

We probably need a similar change for G_STORE, but it seems to be a bit more
finnicky. So, let's just handle G_LOAD for now.

Differential Revision: https://reviews.llvm.org/D107013

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
    llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 81f3702b8299..3dd82ec6c136 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -300,9 +300,9 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
                                  {v2s64, p0, s128, 8}})
       // These extends are also legal
       .legalForTypesWithMemDesc({{s32, p0, s8, 8}, {s32, p0, s16, 8}})
-      .clampScalar(0, s8, s64)
+      .widenScalarToNextPow2(0, /* MinSize = */8)
       .lowerIfMemSizeNotPow2()
-      .widenScalarToNextPow2(0)
+      .clampScalar(0, s8, s64)
       .narrowScalarIf([=](const LegalityQuery &Query) {
         // Clamp extending load results to 32-bits.
         return Query.Types[0].isScalar() &&

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir
index 89e273e12ca4..9b37f664e53d 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir
@@ -588,3 +588,113 @@ body:             |
     RET_ReallyLR
 
 ...
+---
+name:            load_store_s88_s88_mem_size
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $x0
+    ; CHECK-LABEL: name: load_store_s88_s88_mem_size
+    ; CHECK: liveins: $x0
+    ; CHECK: %ptr:_(p0) = COPY $x0
+    ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD %ptr(p0) :: (load (s64), align 16)
+    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C1]](s64)
+    ; CHECK: [[ZEXTLOAD:%[0-9]+]]:_(s64) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 8, align 8)
+    ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD]], [[C2]](s64)
+    ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 10, align 2)
+    ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD1]](s32), [[DEF]](s32)
+    ; CHECK: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+    ; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; CHECK: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 64
+    ; CHECK: [[SUB:%[0-9]+]]:_(s64) = G_SUB [[C3]], [[C4]]
+    ; CHECK: [[SUB1:%[0-9]+]]:_(s64) = G_SUB [[C4]], [[C3]]
+    ; CHECK: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[C3]](s64), [[C4]]
+    ; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ICMP]](s32)
+    ; CHECK: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[C3]](s64), [[C]]
+    ; CHECK: [[TRUNC1:%[0-9]+]]:_(s1) = G_TRUNC [[ICMP1]](s32)
+    ; CHECK: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[MV]], [[C3]](s64)
+    ; CHECK: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[MV]], [[SUB1]](s64)
+    ; CHECK: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[DEF1]], [[C3]](s64)
+    ; CHECK: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL1]]
+    ; CHECK: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[MV]], [[SUB]](s64)
+    ; CHECK: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[TRUNC]](s1), [[SHL]], [[C]]
+    ; CHECK: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[TRUNC]](s1), [[OR]], [[SHL2]]
+    ; CHECK: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[TRUNC1]](s1), [[DEF1]], [[SELECT1]]
+    ; CHECK: [[OR1:%[0-9]+]]:_(s64) = G_OR [[SELECT]], [[ZEXTLOAD]]
+    ; CHECK: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SELECT2]], [[C]]
+    ; CHECK: [[SUB2:%[0-9]+]]:_(s64) = G_SUB [[C4]], [[C4]]
+    ; CHECK: [[SUB3:%[0-9]+]]:_(s64) = G_SUB [[C4]], [[C4]]
+    ; CHECK: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[C4]](s64), [[C4]]
+    ; CHECK: [[TRUNC2:%[0-9]+]]:_(s1) = G_TRUNC [[ICMP2]](s32)
+    ; CHECK: [[ICMP3:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[C4]](s64), [[C]]
+    ; CHECK: [[TRUNC3:%[0-9]+]]:_(s1) = G_TRUNC [[ICMP3]](s32)
+    ; CHECK: [[SHL3:%[0-9]+]]:_(s64) = G_SHL [[OR1]], [[C4]](s64)
+    ; CHECK: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[OR1]], [[SUB3]](s64)
+    ; CHECK: [[SHL4:%[0-9]+]]:_(s64) = G_SHL [[OR2]], [[C4]](s64)
+    ; CHECK: [[OR3:%[0-9]+]]:_(s64) = G_OR [[LSHR1]], [[SHL4]]
+    ; CHECK: [[SHL5:%[0-9]+]]:_(s64) = G_SHL [[OR1]], [[SUB2]](s64)
+    ; CHECK: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[TRUNC2]](s1), [[SHL3]], [[C]]
+    ; CHECK: [[SELECT4:%[0-9]+]]:_(s64) = G_SELECT [[TRUNC2]](s1), [[OR3]], [[SHL5]]
+    ; CHECK: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[TRUNC3]](s1), [[OR2]], [[SELECT4]]
+    ; CHECK: [[OR4:%[0-9]+]]:_(s64) = G_OR [[SELECT3]], [[LOAD]]
+    ; CHECK: [[OR5:%[0-9]+]]:_(s64) = G_OR [[SELECT5]], [[C]]
+    ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY [[OR4]](s64)
+    ; CHECK: [[TRUNC4:%[0-9]+]]:_(s32) = G_TRUNC [[OR5]](s64)
+    ; CHECK: G_STORE [[COPY]](s64), %ptr(p0) :: (store (s64), align 16)
+    ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C1]](s64)
+    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[TRUNC4]](s32)
+    ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C3]](s64)
+    ; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD2]], [[C2]](s64)
+    ; CHECK: G_STORE [[COPY1]](s32), [[PTR_ADD2]](p0) :: (store (s16) into unknown-address + 8, align 8)
+    ; CHECK: G_STORE [[LSHR2]](s32), [[PTR_ADD3]](p0) :: (store (s8) into unknown-address + 10, align 2)
+    ; CHECK: RET_ReallyLR
+    %ptr:_(p0) = COPY $x0
+    %load:_(s88) = G_LOAD %ptr(p0) :: (load (s88))
+    G_STORE %load(s88), %ptr(p0) :: (store (s88))
+    RET_ReallyLR
+...
+---
+name:            load_store_s88_s64_mem_size
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $x0
+    ; CHECK-LABEL: name: load_store_s88_s64_mem_size
+    ; CHECK: liveins: $x0
+    ; CHECK: %ptr:_(p0) = COPY $x0
+    ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD %ptr(p0) :: (load (s64))
+    ; CHECK: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+    ; CHECK: G_STORE [[LOAD]](s64), %ptr(p0) :: (store (s64))
+    ; CHECK: RET_ReallyLR
+    %ptr:_(p0) = COPY $x0
+    %load:_(s88) = G_LOAD %ptr(p0) :: (load (s64))
+    G_STORE %load(s88), %ptr(p0) :: (store (s64))
+    RET_ReallyLR
+...
+---
+name:            load_s1
+alignment:       4
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.1:
+    liveins: $x0
+    ; CHECK-LABEL: name: load_s1
+    ; CHECK: liveins: $x0
+    ; CHECK: %ptr:_(p0) = COPY $x0
+    ; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD %ptr(p0) :: (load (s8))
+    ; CHECK: [[ASSERT_ZEXT:%[0-9]+]]:_(s8) = G_ASSERT_ZEXT [[LOAD]], 1
+    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[ASSERT_ZEXT]](s8)
+    ; CHECK: %ext:_(s64) = G_AND [[ANYEXT]], [[C]]
+    ; CHECK: $x0 = COPY %ext(s64)
+    ; CHECK: RET_ReallyLR implicit $x0
+    %ptr:_(p0) = COPY $x0
+    %load:_(s1) = G_LOAD %ptr(p0) :: (load (s1))
+    %ext:_(s64) = G_ZEXT %load
+    $x0 = COPY %ext
+    RET_ReallyLR implicit $x0


        


More information about the llvm-commits mailing list