[llvm] ae2b36e - [AArch64][GlobalISel] Support truncstorei8/i16 w/ combine to form truncating G_STOREs.
Amara Emerson via llvm-commits
llvm-commits at lists.llvm.org
Tue May 11 11:33:08 PDT 2021
Author: Amara Emerson
Date: 2021-05-11T11:33:03-07:00
New Revision: ae2b36e8bdfa612649c6f2d8b6b9079679cb2572
URL: https://github.com/llvm/llvm-project/commit/ae2b36e8bdfa612649c6f2d8b6b9079679cb2572
DIFF: https://github.com/llvm/llvm-project/commit/ae2b36e8bdfa612649c6f2d8b6b9079679cb2572.diff
LOG: [AArch64][GlobalISel] Support truncstorei8/i16 w/ combine to form truncating G_STOREs.
This needs some tablegen changes so that we can actually import the patterns properly.
Differential Revision: https://reviews.llvm.org/D102204
Added:
llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-truncstore.mir
Modified:
llvm/include/llvm/Target/TargetSelectionDAG.td
llvm/lib/Target/AArch64/AArch64Combine.td
llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll
llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir
llvm/test/CodeGen/AArch64/GlobalISel/select-store.mir
llvm/utils/TableGen/GlobalISelEmitter.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td
index 314c6745b9984..1913396609fd0 100644
--- a/llvm/include/llvm/Target/TargetSelectionDAG.td
+++ b/llvm/include/llvm/Target/TargetSelectionDAG.td
@@ -1114,16 +1114,19 @@ def truncstorei8 : PatFrag<(ops node:$val, node:$ptr),
(truncstore node:$val, node:$ptr)> {
let IsStore = true;
let MemoryVT = i8;
+ let IsTruncStore = true;
}
def truncstorei16 : PatFrag<(ops node:$val, node:$ptr),
(truncstore node:$val, node:$ptr)> {
let IsStore = true;
let MemoryVT = i16;
+ let IsTruncStore = true;
}
def truncstorei32 : PatFrag<(ops node:$val, node:$ptr),
(truncstore node:$val, node:$ptr)> {
let IsStore = true;
let MemoryVT = i32;
+ let IsTruncStore = true;
}
def truncstoref16 : PatFrag<(ops node:$val, node:$ptr),
(truncstore node:$val, node:$ptr)> {
diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td
index a8dd6262771ad..54e8959ec94a6 100644
--- a/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -182,6 +182,14 @@ def lower_vector_fcmp : GICombineRule<
[{ return lowerVectorFCMP(*${root}, MRI, B); }]),
(apply [{}])>;
+def form_truncstore_matchdata : GIDefMatchData<"Register">;
+def form_truncstore : GICombineRule<
+ (defs root:$root, form_truncstore_matchdata:$matchinfo),
+ (match (wip_match_opcode G_STORE):$root,
+ [{ return matchFormTruncstore(*${root}, MRI, ${matchinfo}); }]),
+ (apply [{ applyFormTruncstore(*${root}, MRI, B, Observer, ${matchinfo}); }])
+>;
+
// Post-legalization combines which should happen at all optimization levels.
// (E.g. ones that facilitate matching for the selector) For example, matching
// pseudos.
@@ -189,7 +197,7 @@ def AArch64PostLegalizerLoweringHelper
: GICombinerHelper<"AArch64GenPostLegalizerLoweringHelper",
[shuffle_vector_lowering, vashr_vlshr_imm,
icmp_lowering, build_vector_lowering,
- lower_vector_fcmp]> {
+ lower_vector_fcmp, form_truncstore]> {
let DisableRuleOption = "aarch64postlegalizerlowering-disable-rule";
}
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index d6cc93fa9d3a2..234fe533c3a59 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -306,11 +306,17 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
getActionDefinitionsBuilder(G_STORE)
.legalForTypesWithMemDesc({{s8, p0, 8, 8},
+ {s16, p0, 8, 8}, // truncstorei8 from s16
+ {s32, p0, 8, 8}, // truncstorei8 from s32
+ {s64, p0, 8, 8}, // truncstorei8 from s64
{s16, p0, 16, 8},
+ {s32, p0, 16, 8}, // truncstorei16 from s32
+ {s64, p0, 16, 8}, // truncstorei16 from s64
{s32, p0, 8, 8},
{s32, p0, 16, 8},
{s32, p0, 32, 8},
{s64, p0, 64, 8},
+ {s64, p0, 32, 8}, // truncstorei32 from s64
{p0, p0, 64, 8},
{s128, p0, 128, 8},
{v16s8, p0, 128, 8},
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
index 35cf2a9b3c27d..d6cd1c46b2f81 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
@@ -951,6 +951,27 @@ static bool lowerVectorFCMP(MachineInstr &MI, MachineRegisterInfo &MRI,
return false;
}
+static bool matchFormTruncstore(MachineInstr &MI, MachineRegisterInfo &MRI,
+ Register &SrcReg) {
+ assert(MI.getOpcode() == TargetOpcode::G_STORE);
+ Register DstReg = MI.getOperand(0).getReg();
+ if (MRI.getType(DstReg).isVector())
+ return false;
+ // Match a store of a truncate.
+ return mi_match(DstReg, MRI, m_GTrunc(m_Reg(SrcReg)));
+}
+
+static bool applyFormTruncstore(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B,
+ GISelChangeObserver &Observer,
+ Register &SrcReg) {
+ assert(MI.getOpcode() == TargetOpcode::G_STORE);
+ Observer.changingInstr(MI);
+ MI.getOperand(0).setReg(SrcReg);
+ Observer.changedInstr(MI);
+ return true;
+}
+
#define AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_DEPS
#include "AArch64GenPostLegalizeGILowering.inc"
#undef AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_DEPS
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll
index c1ace198f1724..90c117affc76c 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll
@@ -888,22 +888,18 @@ define void @atomc_store(i32* %p) #0 {
define void @atomic_store_relaxed_8(i8* %p, i32 %off32, i8 %val) #0 {
; CHECK-NOLSE-O1-LABEL: atomic_store_relaxed_8:
; CHECK-NOLSE-O1: ; %bb.0:
-; CHECK-NOLSE-O1-NEXT: add x8, x0, w1, sxtw
-; CHECK-NOLSE-O1-NEXT: sub x9, x0, #256 ; =256
-; CHECK-NOLSE-O1-NEXT: add x10, x0, #291, lsl #12 ; =1191936
+; CHECK-NOLSE-O1-NEXT: add x8, x0, #291, lsl #12 ; =1191936
; CHECK-NOLSE-O1-NEXT: strb w2, [x0, #4095]
+; CHECK-NOLSE-O1-NEXT: strb w2, [x0, w1, sxtw]
+; CHECK-NOLSE-O1-NEXT: sturb w2, [x0, #-256]
; CHECK-NOLSE-O1-NEXT: strb w2, [x8]
-; CHECK-NOLSE-O1-NEXT: strb w2, [x9]
-; CHECK-NOLSE-O1-NEXT: strb w2, [x10]
; CHECK-NOLSE-O1-NEXT: ret
;
; CHECK-NOLSE-O0-LABEL: atomic_store_relaxed_8:
; CHECK-NOLSE-O0: ; %bb.0:
; CHECK-NOLSE-O0-NEXT: strb w2, [x0, #4095]
-; CHECK-NOLSE-O0-NEXT: add x8, x0, w1, sxtw
-; CHECK-NOLSE-O0-NEXT: strb w2, [x8]
-; CHECK-NOLSE-O0-NEXT: subs x8, x0, #256 ; =256
-; CHECK-NOLSE-O0-NEXT: strb w2, [x8]
+; CHECK-NOLSE-O0-NEXT: strb w2, [x0, w1, sxtw]
+; CHECK-NOLSE-O0-NEXT: sturb w2, [x0, #-256]
; CHECK-NOLSE-O0-NEXT: add x8, x0, #291, lsl #12 ; =1191936
; CHECK-NOLSE-O0-NEXT: strb w2, [x8]
; CHECK-NOLSE-O0-NEXT: ret
@@ -911,10 +907,8 @@ define void @atomic_store_relaxed_8(i8* %p, i32 %off32, i8 %val) #0 {
; CHECK-LSE-O1-LABEL: atomic_store_relaxed_8:
; CHECK-LSE-O1: ; %bb.0:
; CHECK-LSE-O1-NEXT: strb w2, [x0, #4095]
-; CHECK-LSE-O1-NEXT: add x8, x0, w1, sxtw
-; CHECK-LSE-O1-NEXT: strb w2, [x8]
-; CHECK-LSE-O1-NEXT: sub x8, x0, #256 ; =256
-; CHECK-LSE-O1-NEXT: strb w2, [x8]
+; CHECK-LSE-O1-NEXT: strb w2, [x0, w1, sxtw]
+; CHECK-LSE-O1-NEXT: sturb w2, [x0, #-256]
; CHECK-LSE-O1-NEXT: add x8, x0, #291, lsl #12 ; =1191936
; CHECK-LSE-O1-NEXT: strb w2, [x8]
; CHECK-LSE-O1-NEXT: ret
@@ -922,10 +916,8 @@ define void @atomic_store_relaxed_8(i8* %p, i32 %off32, i8 %val) #0 {
; CHECK-LSE-O0-LABEL: atomic_store_relaxed_8:
; CHECK-LSE-O0: ; %bb.0:
; CHECK-LSE-O0-NEXT: strb w2, [x0, #4095]
-; CHECK-LSE-O0-NEXT: add x8, x0, w1, sxtw
-; CHECK-LSE-O0-NEXT: strb w2, [x8]
-; CHECK-LSE-O0-NEXT: subs x8, x0, #256 ; =256
-; CHECK-LSE-O0-NEXT: strb w2, [x8]
+; CHECK-LSE-O0-NEXT: strb w2, [x0, w1, sxtw]
+; CHECK-LSE-O0-NEXT: sturb w2, [x0, #-256]
; CHECK-LSE-O0-NEXT: add x8, x0, #291, lsl #12 ; =1191936
; CHECK-LSE-O0-NEXT: strb w2, [x8]
; CHECK-LSE-O0-NEXT: ret
@@ -947,22 +939,18 @@ define void @atomic_store_relaxed_8(i8* %p, i32 %off32, i8 %val) #0 {
define void @atomic_store_relaxed_16(i16* %p, i32 %off32, i16 %val) #0 {
; CHECK-NOLSE-O1-LABEL: atomic_store_relaxed_16:
; CHECK-NOLSE-O1: ; %bb.0:
-; CHECK-NOLSE-O1-NEXT: add x8, x0, w1, sxtw #1
-; CHECK-NOLSE-O1-NEXT: sub x9, x0, #256 ; =256
-; CHECK-NOLSE-O1-NEXT: add x10, x0, #291, lsl #12 ; =1191936
+; CHECK-NOLSE-O1-NEXT: add x8, x0, #291, lsl #12 ; =1191936
; CHECK-NOLSE-O1-NEXT: strh w2, [x0, #8190]
+; CHECK-NOLSE-O1-NEXT: strh w2, [x0, w1, sxtw #1]
+; CHECK-NOLSE-O1-NEXT: sturh w2, [x0, #-256]
; CHECK-NOLSE-O1-NEXT: strh w2, [x8]
-; CHECK-NOLSE-O1-NEXT: strh w2, [x9]
-; CHECK-NOLSE-O1-NEXT: strh w2, [x10]
; CHECK-NOLSE-O1-NEXT: ret
;
; CHECK-NOLSE-O0-LABEL: atomic_store_relaxed_16:
; CHECK-NOLSE-O0: ; %bb.0:
; CHECK-NOLSE-O0-NEXT: strh w2, [x0, #8190]
-; CHECK-NOLSE-O0-NEXT: add x8, x0, w1, sxtw #1
-; CHECK-NOLSE-O0-NEXT: strh w2, [x8]
-; CHECK-NOLSE-O0-NEXT: subs x8, x0, #256 ; =256
-; CHECK-NOLSE-O0-NEXT: strh w2, [x8]
+; CHECK-NOLSE-O0-NEXT: strh w2, [x0, w1, sxtw #1]
+; CHECK-NOLSE-O0-NEXT: sturh w2, [x0, #-256]
; CHECK-NOLSE-O0-NEXT: add x8, x0, #291, lsl #12 ; =1191936
; CHECK-NOLSE-O0-NEXT: strh w2, [x8]
; CHECK-NOLSE-O0-NEXT: ret
@@ -970,10 +958,8 @@ define void @atomic_store_relaxed_16(i16* %p, i32 %off32, i16 %val) #0 {
; CHECK-LSE-O1-LABEL: atomic_store_relaxed_16:
; CHECK-LSE-O1: ; %bb.0:
; CHECK-LSE-O1-NEXT: strh w2, [x0, #8190]
-; CHECK-LSE-O1-NEXT: add x8, x0, w1, sxtw #1
-; CHECK-LSE-O1-NEXT: strh w2, [x8]
-; CHECK-LSE-O1-NEXT: sub x8, x0, #256 ; =256
-; CHECK-LSE-O1-NEXT: strh w2, [x8]
+; CHECK-LSE-O1-NEXT: strh w2, [x0, w1, sxtw #1]
+; CHECK-LSE-O1-NEXT: sturh w2, [x0, #-256]
; CHECK-LSE-O1-NEXT: add x8, x0, #291, lsl #12 ; =1191936
; CHECK-LSE-O1-NEXT: strh w2, [x8]
; CHECK-LSE-O1-NEXT: ret
@@ -981,10 +967,8 @@ define void @atomic_store_relaxed_16(i16* %p, i32 %off32, i16 %val) #0 {
; CHECK-LSE-O0-LABEL: atomic_store_relaxed_16:
; CHECK-LSE-O0: ; %bb.0:
; CHECK-LSE-O0-NEXT: strh w2, [x0, #8190]
-; CHECK-LSE-O0-NEXT: add x8, x0, w1, sxtw #1
-; CHECK-LSE-O0-NEXT: strh w2, [x8]
-; CHECK-LSE-O0-NEXT: subs x8, x0, #256 ; =256
-; CHECK-LSE-O0-NEXT: strh w2, [x8]
+; CHECK-LSE-O0-NEXT: strh w2, [x0, w1, sxtw #1]
+; CHECK-LSE-O0-NEXT: sturh w2, [x0, #-256]
; CHECK-LSE-O0-NEXT: add x8, x0, #291, lsl #12 ; =1191936
; CHECK-LSE-O0-NEXT: strh w2, [x8]
; CHECK-LSE-O0-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir
index 5acfbce1825be..797adc5acb687 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir
@@ -491,3 +491,29 @@ body: |
%val:_(<4 x s64>) = G_LOAD %ptr(p0) :: (load 32)
G_STORE %val(<4 x s64>), %ptr(p0) :: (store 32)
RET_ReallyLR
+...
+---
+name: test_trunc_store
+body: |
+ bb.0:
+ liveins: $x0, $w1
+
+ ; CHECK-LABEL: name: test_trunc_store
+ ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+ ; CHECK: %val64:_(s64) = COPY $x2
+ ; CHECK: G_STORE [[COPY1]](s32), [[COPY]](p0) :: (store 1)
+ ; CHECK: G_STORE [[COPY1]](s32), [[COPY]](p0) :: (store 2)
+ ; CHECK: G_STORE %val64(s64), [[COPY]](p0) :: (store 1)
+ ; CHECK: G_STORE %val64(s64), [[COPY]](p0) :: (store 2)
+ ; CHECK: G_STORE %val64(s64), [[COPY]](p0) :: (store 4)
+ %0:_(p0) = COPY $x0
+ %1:_(s32) = COPY $w1
+ %2:_(s8) = G_TRUNC %1(s32)
+ %val64:_(s64) = COPY $x2
+ G_STORE %1(s32), %0(p0) :: (store 1)
+ G_STORE %1(s32), %0(p0) :: (store 2)
+ G_STORE %val64(s64), %0(p0) :: (store 1)
+ G_STORE %val64(s64), %0(p0) :: (store 2)
+ G_STORE %val64(s64), %0(p0) :: (store 4)
+...
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-truncstore.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-truncstore.mir
new file mode 100644
index 0000000000000..636e30613705d
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-truncstore.mir
@@ -0,0 +1,34 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple aarch64 -run-pass=aarch64-postlegalizer-lowering -global-isel -verify-machineinstrs %s -o - | FileCheck %s
+
+---
+name: truncstore_s8
+legalized: true
+body: |
+ bb.0.entry:
+ liveins: $x0
+ ; CHECK-LABEL: name: truncstore_s8
+ ; CHECK: %ptr:_(p0) = COPY $x0
+ ; CHECK: %val:_(s32) = COPY $w1
+ ; CHECK: G_STORE %val(s32), %ptr(p0) :: (store 1)
+ %ptr:_(p0) = COPY $x0
+ %val:_(s32) = COPY $w1
+ %trunc:_(s8) = G_TRUNC %val
+ G_STORE %trunc(s8), %ptr(p0) :: (store 1)
+...
+---
+name: truncstore_vector
+legalized: true
+body: |
+ bb.0.entry:
+ liveins: $x0
+ ; CHECK-LABEL: name: truncstore_vector
+ ; CHECK: %ptr:_(p0) = COPY $x0
+ ; CHECK: %val:_(<4 x s32>) = COPY $q0
+ ; CHECK: %trunc:_(<4 x s8>) = G_TRUNC %val(<4 x s32>)
+ ; CHECK: G_STORE %trunc(<4 x s8>), %ptr(p0) :: (store 4)
+ %ptr:_(p0) = COPY $x0
+ %val:_(<4 x s32>) = COPY $q0
+ %trunc:_(<4 x s8>) = G_TRUNC %val
+ G_STORE %trunc(<4 x s8>), %ptr(p0) :: (store 4)
+...
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-store.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-store.mir
index 5bbd2a73c14ea..a414bf2c55d12 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/select-store.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-store.mir
@@ -1,5 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
+# RUN: llc -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=1 %s -o - | FileCheck %s
--- |
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
@@ -43,6 +43,8 @@
define void @store_adrp_add_low() { ret void }
define void @store_adrp_add_low_foldable_offset() { ret void }
define void @store_adrp_add_low_unfoldable_offset() { ret void }
+
+ define void @truncstores(i8* %addr) { ret void }
...
---
@@ -663,3 +665,50 @@ body: |
%adrp:gpr64(p0) = ADRP target-flags(aarch64-page) @x + 3
%add_low:gpr(p0) = G_ADD_LOW %adrp(p0), target-flags(aarch64-pageoff, aarch64-nc) @x + 3
G_STORE %copy(p0), %add_low(p0) :: (store 8 into @x)
+...
+
+---
+name: truncstores
+legalized: true
+regBankSelected: true
+body: |
+ bb.0:
+ liveins: $x0, $w1, $x2
+
+ ; CHECK-LABEL: name: truncstores
+ ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+ ; CHECK: %val32:gpr32 = COPY $w1
+ ; CHECK: %val64:gpr64 = COPY $x2
+ ; CHECK: STRBBui %val32, [[COPY]], 0 :: (store 1)
+ ; CHECK: STRBBui %val32, [[COPY]], 43 :: (store 1)
+ ; CHECK: STRHHui %val32, [[COPY]], 0 :: (store 2)
+ ; CHECK: STURHHi %val32, [[COPY]], 43 :: (store 2)
+ ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY %val64.sub_32
+ ; CHECK: STRHHui [[COPY1]], [[COPY]], 0 :: (store 2)
+ ; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY %val64.sub_32
+ ; CHECK: STURHHi [[COPY2]], [[COPY]], 43 :: (store 2)
+ ; CHECK: [[COPY3:%[0-9]+]]:gpr32 = COPY %val64.sub_32
+ ; CHECK: STRWui [[COPY3]], [[COPY]], 0 :: (store 4)
+ ; CHECK: [[COPY4:%[0-9]+]]:gpr32 = COPY %val64.sub_32
+ ; CHECK: STURWi [[COPY4]], [[COPY]], 43 :: (store 4)
+ %0:gpr(p0) = COPY $x0
+ %val32:gpr(s32) = COPY $w1
+ %val64:gpr(s64) = COPY $x2
+ G_STORE %val32, %0 :: (store 1)
+ ; unscaled offset:
+ %cst:gpr(s64) = G_CONSTANT i64 43
+ %newptr:gpr(p0) = G_PTR_ADD %0, %cst
+ G_STORE %val32, %newptr :: (store 1)
+
+ G_STORE %val32, %0 :: (store 2)
+ ; unscaled offset:
+ G_STORE %val32, %newptr :: (store 2)
+
+ G_STORE %val64, %0 :: (store 2)
+ ; unscaled offset:
+ G_STORE %val64, %newptr :: (store 2)
+
+ G_STORE %val64, %0 :: (store 4)
+ ; unscaled offset:
+ G_STORE %val64, %newptr :: (store 4)
+...
diff --git a/llvm/utils/TableGen/GlobalISelEmitter.cpp b/llvm/utils/TableGen/GlobalISelEmitter.cpp
index 9a869de138f3d..06513e8ccdec6 100644
--- a/llvm/utils/TableGen/GlobalISelEmitter.cpp
+++ b/llvm/utils/TableGen/GlobalISelEmitter.cpp
@@ -3657,6 +3657,10 @@ class GlobalISelEmitter {
Optional<const CodeGenRegisterClass *>
inferRegClassFromPattern(TreePatternNode *N);
+ /// Return the size of the MemoryVT in this predicate, if possible.
+ Optional<unsigned>
+ getMemSizeBitsFromPredicate(const TreePredicateFn &Predicate);
+
// Add builtin predicates.
Expected<InstructionMatcher &>
addBuiltinPredicates(const Record *SrcGIEquivOrNull,
@@ -3769,6 +3773,17 @@ Error GlobalISelEmitter::importRulePredicates(RuleMatcher &M,
return Error::success();
}
+Optional<unsigned> GlobalISelEmitter::getMemSizeBitsFromPredicate(const TreePredicateFn &Predicate) {
+ Optional<LLTCodeGen> MemTyOrNone =
+ MVTToLLT(getValueType(Predicate.getMemoryVT()));
+
+ if (!MemTyOrNone)
+ return None;
+
+ // Align so unusual types like i1 don't get rounded down.
+ return llvm::alignTo(MemTyOrNone->get().getSizeInBits(), 8);
+}
+
Expected<InstructionMatcher &> GlobalISelEmitter::addBuiltinPredicates(
const Record *SrcGIEquivOrNull, const TreePredicateFn &Predicate,
InstructionMatcher &InsnMatcher, bool &HasAddedMatcher) {
@@ -3808,9 +3823,18 @@ Expected<InstructionMatcher &> GlobalISelEmitter::addBuiltinPredicates(
if (Predicate.isStore()) {
if (Predicate.isTruncStore()) {
- // FIXME: If MemoryVT is set, we end up with 2 checks for the MMO size.
- InsnMatcher.addPredicate<MemoryVsLLTSizePredicateMatcher>(
- 0, MemoryVsLLTSizePredicateMatcher::LessThan, 0);
+ if (Predicate.getMemoryVT() != nullptr) {
+ // FIXME: If MemoryVT is set, we end up with 2 checks for the MMO size.
+ auto MemSizeInBits = getMemSizeBitsFromPredicate(Predicate);
+ if (!MemSizeInBits)
+ return failedImport("MemVT could not be converted to LLT");
+
+ InsnMatcher.addPredicate<MemorySizePredicateMatcher>(0, *MemSizeInBits /
+ 8);
+ } else {
+ InsnMatcher.addPredicate<MemoryVsLLTSizePredicateMatcher>(
+ 0, MemoryVsLLTSizePredicateMatcher::LessThan, 0);
+ }
return InsnMatcher;
}
if (Predicate.isNonTruncStore()) {
@@ -3837,19 +3861,12 @@ Expected<InstructionMatcher &> GlobalISelEmitter::addBuiltinPredicates(
if (Predicate.isLoad() || Predicate.isStore() || Predicate.isAtomic()) {
if (Predicate.getMemoryVT() != nullptr) {
- Optional<LLTCodeGen> MemTyOrNone =
- MVTToLLT(getValueType(Predicate.getMemoryVT()));
-
- if (!MemTyOrNone)
+ auto MemSizeInBits = getMemSizeBitsFromPredicate(Predicate);
+ if (!MemSizeInBits)
return failedImport("MemVT could not be converted to LLT");
- // MMO's work in bytes so we must take care of unusual types like i1
- // don't round down.
- unsigned MemSizeInBits =
- llvm::alignTo(MemTyOrNone->get().getSizeInBits(), 8);
-
InsnMatcher.addPredicate<MemorySizePredicateMatcher>(0,
- MemSizeInBits / 8);
+ *MemSizeInBits / 8);
return InsnMatcher;
}
}
More information about the llvm-commits
mailing list