[llvm] ae2b36e - [AArch64][GlobalISel] Support truncstorei8/i16 w/ combine to form truncating G_STOREs.

Amara Emerson via llvm-commits llvm-commits at lists.llvm.org
Tue May 11 11:33:08 PDT 2021


Author: Amara Emerson
Date: 2021-05-11T11:33:03-07:00
New Revision: ae2b36e8bdfa612649c6f2d8b6b9079679cb2572

URL: https://github.com/llvm/llvm-project/commit/ae2b36e8bdfa612649c6f2d8b6b9079679cb2572
DIFF: https://github.com/llvm/llvm-project/commit/ae2b36e8bdfa612649c6f2d8b6b9079679cb2572.diff

LOG: [AArch64][GlobalISel] Support truncstorei8/i16 w/ combine to form truncating G_STOREs.

This needs some tablegen changes so that we can actually import the patterns properly.

Differential Revision: https://reviews.llvm.org/D102204

Added: 
    llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-truncstore.mir

Modified: 
    llvm/include/llvm/Target/TargetSelectionDAG.td
    llvm/lib/Target/AArch64/AArch64Combine.td
    llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
    llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
    llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll
    llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir
    llvm/test/CodeGen/AArch64/GlobalISel/select-store.mir
    llvm/utils/TableGen/GlobalISelEmitter.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td
index 314c6745b9984..1913396609fd0 100644
--- a/llvm/include/llvm/Target/TargetSelectionDAG.td
+++ b/llvm/include/llvm/Target/TargetSelectionDAG.td
@@ -1114,16 +1114,19 @@ def truncstorei8 : PatFrag<(ops node:$val, node:$ptr),
                            (truncstore node:$val, node:$ptr)> {
   let IsStore = true;
   let MemoryVT = i8;
+  let IsTruncStore = true;
 }
 def truncstorei16 : PatFrag<(ops node:$val, node:$ptr),
                             (truncstore node:$val, node:$ptr)> {
   let IsStore = true;
   let MemoryVT = i16;
+  let IsTruncStore = true;
 }
 def truncstorei32 : PatFrag<(ops node:$val, node:$ptr),
                             (truncstore node:$val, node:$ptr)> {
   let IsStore = true;
   let MemoryVT = i32;
+  let IsTruncStore = true;
 }
 def truncstoref16 : PatFrag<(ops node:$val, node:$ptr),
                             (truncstore node:$val, node:$ptr)> {

diff  --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td
index a8dd6262771ad..54e8959ec94a6 100644
--- a/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -182,6 +182,14 @@ def lower_vector_fcmp : GICombineRule<
     [{ return lowerVectorFCMP(*${root}, MRI, B); }]),
   (apply [{}])>;
 
+def form_truncstore_matchdata : GIDefMatchData<"Register">;
+def form_truncstore : GICombineRule<
+  (defs root:$root, form_truncstore_matchdata:$matchinfo),
+  (match (wip_match_opcode G_STORE):$root,
+          [{ return matchFormTruncstore(*${root}, MRI, ${matchinfo}); }]),
+  (apply [{ applyFormTruncstore(*${root}, MRI, B, Observer, ${matchinfo}); }])
+>;
+
 // Post-legalization combines which should happen at all optimization levels.
 // (E.g. ones that facilitate matching for the selector) For example, matching
 // pseudos.
@@ -189,7 +197,7 @@ def AArch64PostLegalizerLoweringHelper
     : GICombinerHelper<"AArch64GenPostLegalizerLoweringHelper",
                        [shuffle_vector_lowering, vashr_vlshr_imm,
                         icmp_lowering, build_vector_lowering,
-                        lower_vector_fcmp]> {
+                        lower_vector_fcmp, form_truncstore]> {
   let DisableRuleOption = "aarch64postlegalizerlowering-disable-rule";
 }
 

diff  --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index d6cc93fa9d3a2..234fe533c3a59 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -306,11 +306,17 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
 
   getActionDefinitionsBuilder(G_STORE)
       .legalForTypesWithMemDesc({{s8, p0, 8, 8},
+                                 {s16, p0, 8, 8}, // truncstorei8 from s16
+                                 {s32, p0, 8, 8}, // truncstorei8 from s32
+                                 {s64, p0, 8, 8}, // truncstorei8 from s64
                                  {s16, p0, 16, 8},
+                                 {s32, p0, 16, 8}, // truncstorei16 from s32
+                                 {s64, p0, 16, 8}, // truncstorei16 from s64
                                  {s32, p0, 8, 8},
                                  {s32, p0, 16, 8},
                                  {s32, p0, 32, 8},
                                  {s64, p0, 64, 8},
+                                 {s64, p0, 32, 8}, // truncstorei32 from s64
                                  {p0, p0, 64, 8},
                                  {s128, p0, 128, 8},
                                  {v16s8, p0, 128, 8},

diff  --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
index 35cf2a9b3c27d..d6cd1c46b2f81 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
@@ -951,6 +951,27 @@ static bool lowerVectorFCMP(MachineInstr &MI, MachineRegisterInfo &MRI,
   return false;
 }
 
+static bool matchFormTruncstore(MachineInstr &MI, MachineRegisterInfo &MRI,
+                                Register &SrcReg) {
+  assert(MI.getOpcode() == TargetOpcode::G_STORE);
+  Register DstReg = MI.getOperand(0).getReg();
+  if (MRI.getType(DstReg).isVector())
+    return false;
+  // Match a store of a truncate.
+  return mi_match(DstReg, MRI, m_GTrunc(m_Reg(SrcReg)));
+}
+
+static bool applyFormTruncstore(MachineInstr &MI, MachineRegisterInfo &MRI,
+                                MachineIRBuilder &B,
+                                GISelChangeObserver &Observer,
+                                Register &SrcReg) {
+  assert(MI.getOpcode() == TargetOpcode::G_STORE);
+  Observer.changingInstr(MI);
+  MI.getOperand(0).setReg(SrcReg);
+  Observer.changedInstr(MI);
+  return true;
+}
+
 #define AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_DEPS
 #include "AArch64GenPostLegalizeGILowering.inc"
 #undef AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_DEPS

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll
index c1ace198f1724..90c117affc76c 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll
@@ -888,22 +888,18 @@ define void @atomc_store(i32* %p) #0 {
 define void @atomic_store_relaxed_8(i8* %p, i32 %off32, i8 %val) #0 {
 ; CHECK-NOLSE-O1-LABEL: atomic_store_relaxed_8:
 ; CHECK-NOLSE-O1:       ; %bb.0:
-; CHECK-NOLSE-O1-NEXT:    add x8, x0, w1, sxtw
-; CHECK-NOLSE-O1-NEXT:    sub x9, x0, #256 ; =256
-; CHECK-NOLSE-O1-NEXT:    add x10, x0, #291, lsl #12 ; =1191936
+; CHECK-NOLSE-O1-NEXT:    add x8, x0, #291, lsl #12 ; =1191936
 ; CHECK-NOLSE-O1-NEXT:    strb w2, [x0, #4095]
+; CHECK-NOLSE-O1-NEXT:    strb w2, [x0, w1, sxtw]
+; CHECK-NOLSE-O1-NEXT:    sturb w2, [x0, #-256]
 ; CHECK-NOLSE-O1-NEXT:    strb w2, [x8]
-; CHECK-NOLSE-O1-NEXT:    strb w2, [x9]
-; CHECK-NOLSE-O1-NEXT:    strb w2, [x10]
 ; CHECK-NOLSE-O1-NEXT:    ret
 ;
 ; CHECK-NOLSE-O0-LABEL: atomic_store_relaxed_8:
 ; CHECK-NOLSE-O0:       ; %bb.0:
 ; CHECK-NOLSE-O0-NEXT:    strb w2, [x0, #4095]
-; CHECK-NOLSE-O0-NEXT:    add x8, x0, w1, sxtw
-; CHECK-NOLSE-O0-NEXT:    strb w2, [x8]
-; CHECK-NOLSE-O0-NEXT:    subs x8, x0, #256 ; =256
-; CHECK-NOLSE-O0-NEXT:    strb w2, [x8]
+; CHECK-NOLSE-O0-NEXT:    strb w2, [x0, w1, sxtw]
+; CHECK-NOLSE-O0-NEXT:    sturb w2, [x0, #-256]
 ; CHECK-NOLSE-O0-NEXT:    add x8, x0, #291, lsl #12 ; =1191936
 ; CHECK-NOLSE-O0-NEXT:    strb w2, [x8]
 ; CHECK-NOLSE-O0-NEXT:    ret
@@ -911,10 +907,8 @@ define void @atomic_store_relaxed_8(i8* %p, i32 %off32, i8 %val) #0 {
 ; CHECK-LSE-O1-LABEL: atomic_store_relaxed_8:
 ; CHECK-LSE-O1:       ; %bb.0:
 ; CHECK-LSE-O1-NEXT:    strb w2, [x0, #4095]
-; CHECK-LSE-O1-NEXT:    add x8, x0, w1, sxtw
-; CHECK-LSE-O1-NEXT:    strb w2, [x8]
-; CHECK-LSE-O1-NEXT:    sub x8, x0, #256 ; =256
-; CHECK-LSE-O1-NEXT:    strb w2, [x8]
+; CHECK-LSE-O1-NEXT:    strb w2, [x0, w1, sxtw]
+; CHECK-LSE-O1-NEXT:    sturb w2, [x0, #-256]
 ; CHECK-LSE-O1-NEXT:    add x8, x0, #291, lsl #12 ; =1191936
 ; CHECK-LSE-O1-NEXT:    strb w2, [x8]
 ; CHECK-LSE-O1-NEXT:    ret
@@ -922,10 +916,8 @@ define void @atomic_store_relaxed_8(i8* %p, i32 %off32, i8 %val) #0 {
 ; CHECK-LSE-O0-LABEL: atomic_store_relaxed_8:
 ; CHECK-LSE-O0:       ; %bb.0:
 ; CHECK-LSE-O0-NEXT:    strb w2, [x0, #4095]
-; CHECK-LSE-O0-NEXT:    add x8, x0, w1, sxtw
-; CHECK-LSE-O0-NEXT:    strb w2, [x8]
-; CHECK-LSE-O0-NEXT:    subs x8, x0, #256 ; =256
-; CHECK-LSE-O0-NEXT:    strb w2, [x8]
+; CHECK-LSE-O0-NEXT:    strb w2, [x0, w1, sxtw]
+; CHECK-LSE-O0-NEXT:    sturb w2, [x0, #-256]
 ; CHECK-LSE-O0-NEXT:    add x8, x0, #291, lsl #12 ; =1191936
 ; CHECK-LSE-O0-NEXT:    strb w2, [x8]
 ; CHECK-LSE-O0-NEXT:    ret
@@ -947,22 +939,18 @@ define void @atomic_store_relaxed_8(i8* %p, i32 %off32, i8 %val) #0 {
 define void @atomic_store_relaxed_16(i16* %p, i32 %off32, i16 %val) #0 {
 ; CHECK-NOLSE-O1-LABEL: atomic_store_relaxed_16:
 ; CHECK-NOLSE-O1:       ; %bb.0:
-; CHECK-NOLSE-O1-NEXT:    add x8, x0, w1, sxtw #1
-; CHECK-NOLSE-O1-NEXT:    sub x9, x0, #256 ; =256
-; CHECK-NOLSE-O1-NEXT:    add x10, x0, #291, lsl #12 ; =1191936
+; CHECK-NOLSE-O1-NEXT:    add x8, x0, #291, lsl #12 ; =1191936
 ; CHECK-NOLSE-O1-NEXT:    strh w2, [x0, #8190]
+; CHECK-NOLSE-O1-NEXT:    strh w2, [x0, w1, sxtw #1]
+; CHECK-NOLSE-O1-NEXT:    sturh w2, [x0, #-256]
 ; CHECK-NOLSE-O1-NEXT:    strh w2, [x8]
-; CHECK-NOLSE-O1-NEXT:    strh w2, [x9]
-; CHECK-NOLSE-O1-NEXT:    strh w2, [x10]
 ; CHECK-NOLSE-O1-NEXT:    ret
 ;
 ; CHECK-NOLSE-O0-LABEL: atomic_store_relaxed_16:
 ; CHECK-NOLSE-O0:       ; %bb.0:
 ; CHECK-NOLSE-O0-NEXT:    strh w2, [x0, #8190]
-; CHECK-NOLSE-O0-NEXT:    add x8, x0, w1, sxtw #1
-; CHECK-NOLSE-O0-NEXT:    strh w2, [x8]
-; CHECK-NOLSE-O0-NEXT:    subs x8, x0, #256 ; =256
-; CHECK-NOLSE-O0-NEXT:    strh w2, [x8]
+; CHECK-NOLSE-O0-NEXT:    strh w2, [x0, w1, sxtw #1]
+; CHECK-NOLSE-O0-NEXT:    sturh w2, [x0, #-256]
 ; CHECK-NOLSE-O0-NEXT:    add x8, x0, #291, lsl #12 ; =1191936
 ; CHECK-NOLSE-O0-NEXT:    strh w2, [x8]
 ; CHECK-NOLSE-O0-NEXT:    ret
@@ -970,10 +958,8 @@ define void @atomic_store_relaxed_16(i16* %p, i32 %off32, i16 %val) #0 {
 ; CHECK-LSE-O1-LABEL: atomic_store_relaxed_16:
 ; CHECK-LSE-O1:       ; %bb.0:
 ; CHECK-LSE-O1-NEXT:    strh w2, [x0, #8190]
-; CHECK-LSE-O1-NEXT:    add x8, x0, w1, sxtw #1
-; CHECK-LSE-O1-NEXT:    strh w2, [x8]
-; CHECK-LSE-O1-NEXT:    sub x8, x0, #256 ; =256
-; CHECK-LSE-O1-NEXT:    strh w2, [x8]
+; CHECK-LSE-O1-NEXT:    strh w2, [x0, w1, sxtw #1]
+; CHECK-LSE-O1-NEXT:    sturh w2, [x0, #-256]
 ; CHECK-LSE-O1-NEXT:    add x8, x0, #291, lsl #12 ; =1191936
 ; CHECK-LSE-O1-NEXT:    strh w2, [x8]
 ; CHECK-LSE-O1-NEXT:    ret
@@ -981,10 +967,8 @@ define void @atomic_store_relaxed_16(i16* %p, i32 %off32, i16 %val) #0 {
 ; CHECK-LSE-O0-LABEL: atomic_store_relaxed_16:
 ; CHECK-LSE-O0:       ; %bb.0:
 ; CHECK-LSE-O0-NEXT:    strh w2, [x0, #8190]
-; CHECK-LSE-O0-NEXT:    add x8, x0, w1, sxtw #1
-; CHECK-LSE-O0-NEXT:    strh w2, [x8]
-; CHECK-LSE-O0-NEXT:    subs x8, x0, #256 ; =256
-; CHECK-LSE-O0-NEXT:    strh w2, [x8]
+; CHECK-LSE-O0-NEXT:    strh w2, [x0, w1, sxtw #1]
+; CHECK-LSE-O0-NEXT:    sturh w2, [x0, #-256]
 ; CHECK-LSE-O0-NEXT:    add x8, x0, #291, lsl #12 ; =1191936
 ; CHECK-LSE-O0-NEXT:    strh w2, [x8]
 ; CHECK-LSE-O0-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir
index 5acfbce1825be..797adc5acb687 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir
@@ -491,3 +491,29 @@ body:             |
     %val:_(<4 x s64>) = G_LOAD %ptr(p0) :: (load 32)
     G_STORE %val(<4 x s64>), %ptr(p0) :: (store 32)
     RET_ReallyLR
+...
+---
+name:            test_trunc_store
+body: |
+  bb.0:
+    liveins: $x0, $w1
+
+    ; CHECK-LABEL: name: test_trunc_store
+    ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+    ; CHECK: %val64:_(s64) = COPY $x2
+    ; CHECK: G_STORE [[COPY1]](s32), [[COPY]](p0) :: (store 1)
+    ; CHECK: G_STORE [[COPY1]](s32), [[COPY]](p0) :: (store 2)
+    ; CHECK: G_STORE %val64(s64), [[COPY]](p0) :: (store 1)
+    ; CHECK: G_STORE %val64(s64), [[COPY]](p0) :: (store 2)
+    ; CHECK: G_STORE %val64(s64), [[COPY]](p0) :: (store 4)
+    %0:_(p0) = COPY $x0
+    %1:_(s32) = COPY $w1
+    %2:_(s8) = G_TRUNC %1(s32)
+    %val64:_(s64) = COPY $x2
+    G_STORE %1(s32), %0(p0) :: (store 1)
+    G_STORE %1(s32), %0(p0) :: (store 2)
+    G_STORE %val64(s64), %0(p0) :: (store 1)
+    G_STORE %val64(s64), %0(p0) :: (store 2)
+    G_STORE %val64(s64), %0(p0) :: (store 4)
+...

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-truncstore.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-truncstore.mir
new file mode 100644
index 0000000000000..636e30613705d
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-truncstore.mir
@@ -0,0 +1,34 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple aarch64 -run-pass=aarch64-postlegalizer-lowering -global-isel -verify-machineinstrs %s -o - | FileCheck %s
+
+---
+name:            truncstore_s8
+legalized:       true
+body: |
+  bb.0.entry:
+    liveins: $x0
+    ; CHECK-LABEL: name: truncstore_s8
+    ; CHECK: %ptr:_(p0) = COPY $x0
+    ; CHECK: %val:_(s32) = COPY $w1
+    ; CHECK: G_STORE %val(s32), %ptr(p0) :: (store 1)
+    %ptr:_(p0) = COPY $x0
+    %val:_(s32) = COPY $w1
+    %trunc:_(s8) = G_TRUNC %val
+    G_STORE %trunc(s8), %ptr(p0) :: (store 1)
+...
+---
+name:            truncstore_vector
+legalized:       true
+body: |
+  bb.0.entry:
+    liveins: $x0
+    ; CHECK-LABEL: name: truncstore_vector
+    ; CHECK: %ptr:_(p0) = COPY $x0
+    ; CHECK: %val:_(<4 x s32>) = COPY $q0
+    ; CHECK: %trunc:_(<4 x s8>) = G_TRUNC %val(<4 x s32>)
+    ; CHECK: G_STORE %trunc(<4 x s8>), %ptr(p0) :: (store 4)
+    %ptr:_(p0) = COPY $x0
+    %val:_(<4 x s32>) = COPY $q0
+    %trunc:_(<4 x s8>) = G_TRUNC %val
+    G_STORE %trunc(<4 x s8>), %ptr(p0) :: (store 4)
+...

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-store.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-store.mir
index 5bbd2a73c14ea..a414bf2c55d12 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/select-store.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-store.mir
@@ -1,5 +1,5 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
+# RUN: llc -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=1 %s -o - | FileCheck %s
 
 --- |
   target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
@@ -43,6 +43,8 @@
   define void @store_adrp_add_low() { ret void }
   define void @store_adrp_add_low_foldable_offset() { ret void }
   define void @store_adrp_add_low_unfoldable_offset() { ret void }
+
+  define void @truncstores(i8* %addr) { ret void }
 ...
 
 ---
@@ -663,3 +665,50 @@ body:             |
     %adrp:gpr64(p0) = ADRP target-flags(aarch64-page) @x + 3
     %add_low:gpr(p0) = G_ADD_LOW %adrp(p0), target-flags(aarch64-pageoff, aarch64-nc) @x + 3
     G_STORE %copy(p0), %add_low(p0) :: (store 8 into @x)
+...
+
+---
+name:            truncstores
+legalized:       true
+regBankSelected: true
+body:             |
+  bb.0:
+    liveins: $x0, $w1, $x2
+
+    ; CHECK-LABEL: name: truncstores
+    ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+    ; CHECK: %val32:gpr32 = COPY $w1
+    ; CHECK: %val64:gpr64 = COPY $x2
+    ; CHECK: STRBBui %val32, [[COPY]], 0 :: (store 1)
+    ; CHECK: STRBBui %val32, [[COPY]], 43 :: (store 1)
+    ; CHECK: STRHHui %val32, [[COPY]], 0 :: (store 2)
+    ; CHECK: STURHHi %val32, [[COPY]], 43 :: (store 2)
+    ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY %val64.sub_32
+    ; CHECK: STRHHui [[COPY1]], [[COPY]], 0 :: (store 2)
+    ; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY %val64.sub_32
+    ; CHECK: STURHHi [[COPY2]], [[COPY]], 43 :: (store 2)
+    ; CHECK: [[COPY3:%[0-9]+]]:gpr32 = COPY %val64.sub_32
+    ; CHECK: STRWui [[COPY3]], [[COPY]], 0 :: (store 4)
+    ; CHECK: [[COPY4:%[0-9]+]]:gpr32 = COPY %val64.sub_32
+    ; CHECK: STURWi [[COPY4]], [[COPY]], 43 :: (store 4)
+    %0:gpr(p0) = COPY $x0
+    %val32:gpr(s32) = COPY $w1
+    %val64:gpr(s64) = COPY $x2
+    G_STORE %val32, %0 :: (store 1)
+    ; unscaled offset:
+    %cst:gpr(s64) = G_CONSTANT i64 43
+    %newptr:gpr(p0) = G_PTR_ADD %0, %cst
+    G_STORE %val32, %newptr :: (store 1)
+
+    G_STORE %val32, %0 :: (store 2)
+    ; unscaled offset:
+    G_STORE %val32, %newptr :: (store 2)
+
+    G_STORE %val64, %0 :: (store 2)
+    ; unscaled offset:
+    G_STORE %val64, %newptr :: (store 2)
+
+    G_STORE %val64, %0 :: (store 4)
+    ; unscaled offset:
+    G_STORE %val64, %newptr :: (store 4)
+...

diff  --git a/llvm/utils/TableGen/GlobalISelEmitter.cpp b/llvm/utils/TableGen/GlobalISelEmitter.cpp
index 9a869de138f3d..06513e8ccdec6 100644
--- a/llvm/utils/TableGen/GlobalISelEmitter.cpp
+++ b/llvm/utils/TableGen/GlobalISelEmitter.cpp
@@ -3657,6 +3657,10 @@ class GlobalISelEmitter {
   Optional<const CodeGenRegisterClass *>
   inferRegClassFromPattern(TreePatternNode *N);
 
+  /// Return the size of the MemoryVT in this predicate, if possible.
+  Optional<unsigned>
+  getMemSizeBitsFromPredicate(const TreePredicateFn &Predicate);
+
   // Add builtin predicates.
   Expected<InstructionMatcher &>
   addBuiltinPredicates(const Record *SrcGIEquivOrNull,
@@ -3769,6 +3773,17 @@ Error GlobalISelEmitter::importRulePredicates(RuleMatcher &M,
   return Error::success();
 }
 
+Optional<unsigned> GlobalISelEmitter::getMemSizeBitsFromPredicate(const TreePredicateFn &Predicate) {
+  Optional<LLTCodeGen> MemTyOrNone =
+      MVTToLLT(getValueType(Predicate.getMemoryVT()));
+
+  if (!MemTyOrNone)
+    return None;
+
+  // Align so unusual types like i1 don't get rounded down.
+  return llvm::alignTo(MemTyOrNone->get().getSizeInBits(), 8);
+}
+
 Expected<InstructionMatcher &> GlobalISelEmitter::addBuiltinPredicates(
     const Record *SrcGIEquivOrNull, const TreePredicateFn &Predicate,
     InstructionMatcher &InsnMatcher, bool &HasAddedMatcher) {
@@ -3808,9 +3823,18 @@ Expected<InstructionMatcher &> GlobalISelEmitter::addBuiltinPredicates(
 
   if (Predicate.isStore()) {
     if (Predicate.isTruncStore()) {
-      // FIXME: If MemoryVT is set, we end up with 2 checks for the MMO size.
-      InsnMatcher.addPredicate<MemoryVsLLTSizePredicateMatcher>(
-          0, MemoryVsLLTSizePredicateMatcher::LessThan, 0);
+      if (Predicate.getMemoryVT() != nullptr) {
+        // FIXME: If MemoryVT is set, we end up with 2 checks for the MMO size.
+        auto MemSizeInBits = getMemSizeBitsFromPredicate(Predicate);
+        if (!MemSizeInBits)
+          return failedImport("MemVT could not be converted to LLT");
+
+        InsnMatcher.addPredicate<MemorySizePredicateMatcher>(0, *MemSizeInBits /
+                                                                    8);
+      } else {
+        InsnMatcher.addPredicate<MemoryVsLLTSizePredicateMatcher>(
+            0, MemoryVsLLTSizePredicateMatcher::LessThan, 0);
+      }
       return InsnMatcher;
     }
     if (Predicate.isNonTruncStore()) {
@@ -3837,19 +3861,12 @@ Expected<InstructionMatcher &> GlobalISelEmitter::addBuiltinPredicates(
 
   if (Predicate.isLoad() || Predicate.isStore() || Predicate.isAtomic()) {
     if (Predicate.getMemoryVT() != nullptr) {
-      Optional<LLTCodeGen> MemTyOrNone =
-          MVTToLLT(getValueType(Predicate.getMemoryVT()));
-
-      if (!MemTyOrNone)
+      auto MemSizeInBits = getMemSizeBitsFromPredicate(Predicate);
+      if (!MemSizeInBits)
         return failedImport("MemVT could not be converted to LLT");
 
-      // MMO's work in bytes so we must take care of unusual types like i1
-      // don't round down.
-      unsigned MemSizeInBits =
-          llvm::alignTo(MemTyOrNone->get().getSizeInBits(), 8);
-
       InsnMatcher.addPredicate<MemorySizePredicateMatcher>(0,
-                                                           MemSizeInBits / 8);
+                                                           *MemSizeInBits / 8);
       return InsnMatcher;
     }
   }


        


More information about the llvm-commits mailing list