[llvm] 7ff9575 - [AArch64][GlobalISel] Select XRO addressing mode with wide immediates
Jessica Paquette via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 29 11:02:22 PDT 2020
Author: Jessica Paquette
Date: 2020-07-29T11:02:10-07:00
New Revision: 7ff9575594d001f0c514f35add36eaf7b6c5e12f
URL: https://github.com/llvm/llvm-project/commit/7ff9575594d001f0c514f35add36eaf7b6c5e12f
DIFF: https://github.com/llvm/llvm-project/commit/7ff9575594d001f0c514f35add36eaf7b6c5e12f.diff
LOG: [AArch64][GlobalISel] Select XRO addressing mode with wide immediates
Port the wide immediate case from AArch64DAGToDAGISel::SelectAddrModeXRO.
If we have a wide immediate which can't be represented in an add, we can end up
with code like this:
```
mov x0, imm
add x1, base, x0
ldr x2, [x1, 0]
```
If we use the [base, xN] addressing mode instead, we can produce this:
```
mov x0, imm
ldr x2, [base, x0]
```
This saves 0.4% code size on 7zip at -O3, and gives a geomean code size
improvement of 0.1% on CTMark.
Differential Revision: https://reviews.llvm.org/D84784
Added:
llvm/test/CodeGen/AArch64/GlobalISel/xro-addressing-mode-constant.mir
Modified:
llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 6de5e5ea5b95..b96451e376c4 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -5083,12 +5083,60 @@ InstructionSelector::ComplexRendererFns
AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root,
unsigned SizeInBytes) const {
MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
-
- // If we have a constant offset, then we probably don't want to match a
- // register offset.
- if (isBaseWithConstantOffset(Root, MRI))
+ if (!Root.isReg())
+ return None;
+ MachineInstr *PtrAdd =
+ getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
+ if (!PtrAdd)
return None;
+ // Check for an immediates which cannot be encoded in the [base + imm]
+ // addressing mode, and can't be encoded in an add/sub. If this happens, we'll
+ // end up with code like:
+ //
+ // mov x0, wide
+ // add x1 base, x0
+ // ldr x2, [x1, x0]
+ //
+ // In this situation, we can use the [base, xreg] addressing mode to save an
+ // add/sub:
+ //
+ // mov x0, wide
+ // ldr x2, [base, x0]
+ auto ValAndVReg =
+ getConstantVRegValWithLookThrough(PtrAdd->getOperand(2).getReg(), MRI);
+ if (ValAndVReg) {
+ unsigned Scale = Log2_32(SizeInBytes);
+ int64_t ImmOff = ValAndVReg->Value;
+
+ // Skip immediates that can be selected in the load/store addresing
+ // mode.
+ if (ImmOff % SizeInBytes == 0 && ImmOff >= 0 &&
+ ImmOff < (0x1000 << Scale))
+ return None;
+
+ // Helper lambda to decide whether or not it is preferable to emit an add.
+ auto isPreferredADD = [](int64_t ImmOff) {
+ // Constants in [0x0, 0xfff] can be encoded in an add.
+ if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
+ return true;
+
+ // Can it be encoded in an add lsl #12?
+ if ((ImmOff & 0xffffffffff000fffLL) != 0x0LL)
+ return false;
+
+ // It can be encoded in an add lsl #12, but we may not want to. If it is
+ // possible to select this as a single movz, then prefer that. A single
+ // movz is faster than an add with a shift.
+ return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
+ (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
+ };
+
+ // If the immediate can be encoded in a single add/sub, then bail out.
+ if (isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
+ return None;
+ }
+
// Try to fold shifts into the addressing mode.
auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes);
if (AddrModeFns)
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/xro-addressing-mode-constant.mir b/llvm/test/CodeGen/AArch64/GlobalISel/xro-addressing-mode-constant.mir
new file mode 100644
index 000000000000..7b2dae2f7e50
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/xro-addressing-mode-constant.mir
@@ -0,0 +1,211 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=aarch64-unknown-unknown -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
+#
+# Test using the xro addressing mode with immediates. This should be done for
+# wide constants which are preferably selected using a mov rather than an add.
+
+...
+---
+name: use_xro_cannot_encode_add_lsl
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x0
+ ; Check that we use the XRO addressing mode when the constant cannot be
+ ; represented using an add + lsl.
+ ;
+ ; cst = 0000000111000000
+ ; cst & 000fffffff000000 != 0
+ ;
+ ; CHECK-LABEL: name: use_xro_cannot_encode_add_lsl
+ ; CHECK: liveins: $x0
+ ; CHECK: %copy:gpr64sp = COPY $x0
+ ; CHECK: %cst:gpr64 = MOVi64imm 4580179968
+ ; CHECK: %load:gpr64 = LDRXroX %copy, %cst, 0, 0 :: (volatile load 8)
+ ; CHECK: RET_ReallyLR
+ %copy:gpr(p0) = COPY $x0
+ %cst:gpr(s64) = G_CONSTANT i64 4580179968
+ %addr:gpr(p0) = G_PTR_ADD %copy, %cst(s64)
+ %load:gpr(s64) = G_LOAD %addr(p0) :: (volatile load 8)
+ RET_ReallyLR
+
+...
+---
+name: use_xro_preferred_mov
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x0
+ ; Check that we use the XRO addressing mode when the constant can be
+ ; represented using a single movk.
+ ;
+ ; cst = 000000000000f000
+ ; cst & 000fffffff000000 == 0
+ ; cst & ffffffffffff0fff != 0
+ ;
+ ; CHECK-LABEL: name: use_xro_preferred_mov
+ ; CHECK: liveins: $x0
+ ; CHECK: %copy:gpr64sp = COPY $x0
+ ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 61440
+ ; CHECK: %cst:gpr64 = SUBREG_TO_REG 0, [[MOVi32imm]], %subreg.sub_32
+ ; CHECK: %load:gpr64 = LDRXroX %copy, %cst, 0, 0 :: (volatile load 8)
+ ; CHECK: RET_ReallyLR
+ %copy:gpr(p0) = COPY $x0
+ %cst:gpr(s64) = G_CONSTANT i64 61440
+ %addr:gpr(p0) = G_PTR_ADD %copy, %cst(s64)
+ %load:gpr(s64) = G_LOAD %addr(p0) :: (volatile load 8)
+ RET_ReallyLR
+
+...
+---
+name: use_xro_negative_imm
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x0
+ ; Check that this works even if we have a negative immediate.
+ ;
+ ; CHECK-LABEL: name: use_xro_negative_imm
+ ; CHECK: liveins: $x0
+ ; CHECK: %copy:gpr64sp = COPY $x0
+ ; CHECK: %cst:gpr64 = MOVi64imm -61440
+ ; CHECK: %load:gpr64 = LDRXroX %copy, %cst, 0, 0 :: (volatile load 8)
+ ; CHECK: RET_ReallyLR
+ %copy:gpr(p0) = COPY $x0
+ %cst:gpr(s64) = G_CONSTANT i64 -61440
+ %addr:gpr(p0) = G_PTR_ADD %copy, %cst(s64)
+ %load:gpr(s64) = G_LOAD %addr(p0) :: (volatile load 8)
+ RET_ReallyLR
+
+...
+---
+name: dont_use_xro_selectable_imm
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x0
+ ; Immediates that can be encoded in a LDRXui should be skipped.
+ ;
+ ; CHECK-LABEL: name: dont_use_xro_selectable_imm
+ ; CHECK: liveins: $x0
+ ; CHECK: %copy:gpr64sp = COPY $x0
+ ; CHECK: %load:gpr64 = LDRXui %copy, 2 :: (volatile load 8)
+ ; CHECK: RET_ReallyLR
+ %copy:gpr(p0) = COPY $x0
+ %cst:gpr(s64) = G_CONSTANT i64 16
+ %addr:gpr(p0) = G_PTR_ADD %copy, %cst(s64)
+ %load:gpr(s64) = G_LOAD %addr(p0) :: (volatile load 8)
+ RET_ReallyLR
+
+...
+---
+name: dont_use_xro_selectable_negative_imm
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x0
+ ; Immediates that can be encoded in a LDRXui should be skipped.
+ ;
+ ; CHECK-LABEL: name: dont_use_xro_selectable_negative_imm
+ ; CHECK: liveins: $x0
+ ; CHECK: %copy:gpr64sp = COPY $x0
+ ; CHECK: %load:gpr64 = LDURXi %copy, -16 :: (volatile load 8)
+ ; CHECK: RET_ReallyLR
+ %copy:gpr(p0) = COPY $x0
+ %cst:gpr(s64) = G_CONSTANT i64 -16
+ %addr:gpr(p0) = G_PTR_ADD %copy, %cst(s64)
+ %load:gpr(s64) = G_LOAD %addr(p0) :: (volatile load 8)
+ RET_ReallyLR
+
+...
+---
+name: dont_use_xro_zero
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x0
+ ; Immediates that can be encoded in a LDRXui should be skipped.
+ ;
+ ; CHECK-LABEL: name: dont_use_xro_zero
+ ; CHECK: liveins: $x0
+ ; CHECK: %copy:gpr64sp = COPY $x0
+ ; CHECK: %load:gpr64 = LDRXui %copy, 0 :: (volatile load 8)
+ ; CHECK: RET_ReallyLR
+ %copy:gpr(p0) = COPY $x0
+ %cst:gpr(s64) = G_CONSTANT i64 0
+ %addr:gpr(p0) = G_PTR_ADD %copy, %cst(s64)
+ %load:gpr(s64) = G_LOAD %addr(p0) :: (volatile load 8)
+ RET_ReallyLR
+
+...
+---
+name: dont_use_xro_in_range
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x0
+ ; Check that we skip constants which can be encoded in an add.
+ ; 17 is in [0x0, 0xfff]
+ ;
+ ; CHECK-LABEL: name: dont_use_xro_in_range
+ ; CHECK: liveins: $x0
+ ; CHECK: %copy:gpr64sp = COPY $x0
+ ; CHECK: %load:gpr64 = LDURXi %copy, 17 :: (volatile load 8)
+ ; CHECK: RET_ReallyLR
+ %copy:gpr(p0) = COPY $x0
+ %cst:gpr(s64) = G_CONSTANT i64 17
+ %addr:gpr(p0) = G_PTR_ADD %copy, %cst(s64)
+ %load:gpr(s64) = G_LOAD %addr(p0) :: (volatile load 8)
+ RET_ReallyLR
+
+...
+---
+name: dont_use_xro_add_lsl
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x0
+ ; Check that we skip when we have an add with an lsl which cannot be
+ ; represented as a movk.
+ ;
+ ; cst = 0x0000000000111000
+ ; cst & 000fffffff000000 = 0
+ ; cst & ffffffffff00ffff != 0
+ ; cst & ffffffffffff0fff != 0
+ ;
+ ; CHECK-LABEL: name: dont_use_xro_add_lsl
+ ; CHECK: liveins: $x0
+ ; CHECK: %copy:gpr64 = COPY $x0
+ ; CHECK: [[COPY:%[0-9]+]]:gpr64common = COPY %copy
+ ; CHECK: %addr:gpr64sp = ADDXri [[COPY]], 273, 12
+ ; CHECK: %load:gpr64 = LDRXui %addr, 0 :: (volatile load 8)
+ ; CHECK: RET_ReallyLR
+ %copy:gpr(p0) = COPY $x0
+ %cst:gpr(s64) = G_CONSTANT i64 1118208
+ %addr:gpr(p0) = G_PTR_ADD %copy, %cst(s64)
+ %load:gpr(s64) = G_LOAD %addr(p0) :: (volatile load 8)
+ RET_ReallyLR
More information about the llvm-commits
mailing list