[llvm] 50efbf9 - [GlobalISel] Narrow binops feeding into G_AND with a mask
Jessica Paquette via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 13 18:39:48 PDT 2021
Author: Jessica Paquette
Date: 2021-08-13T18:31:13-07:00
New Revision: 50efbf9cbeed9d202e9f66a0d154489811918944
URL: https://github.com/llvm/llvm-project/commit/50efbf9cbeed9d202e9f66a0d154489811918944
DIFF: https://github.com/llvm/llvm-project/commit/50efbf9cbeed9d202e9f66a0d154489811918944.diff
LOG: [GlobalISel] Narrow binops feeding into G_AND with a mask
This is a fairly common pattern:
```
%mask = G_CONSTANT iN <mask val>
%add = G_ADD %lhs, %rhs
%and = G_AND %add, %mask
```
We have combines to eliminate G_AND with a mask that does nothing.
If we combined the above to this:
```
%mask = G_CONSTANT iN <mask val>
%narrow_lhs = G_TRUNC %lhs
%narrow_rhs = G_TRUNC %rhs
%narrow_add = G_ADD %narrow_lhs, %narrow_rhs
%ext = G_ZEXT %narrow_add
%and = G_AND %ext, %mask
```
We'd be able to take advantage of those combines using the trunc + zext.
For this to work (or be beneficial in the best case)
- The operation we want to narrow then widen must only be used by the G_AND
- The G_TRUNC + G_ZEXT must be free
- Performing the operation at a narrower width must not produce a different
value than performing it at the original width *after masking.*
Example comparison between SDAG + GISel: https://godbolt.org/z/63jzb1Yvj
At -Os for AArch64, this is a 0.2% code size improvement on CTMark/pairlocalign.
Differential Revision: https://reviews.llvm.org/D107929
Added:
llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-narrow-binop-feeding-add.mir
Modified:
llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
llvm/include/llvm/CodeGen/TargetLowering.h
llvm/include/llvm/Target/GlobalISel/Combine.td
llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 555be8be8885b..8855631859fcf 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -554,6 +554,11 @@ class CombinerHelper {
/// Do constant folding when opportunities are exposed after MIR building.
bool matchConstantFold(MachineInstr &MI, APInt &MatchInfo);
+ /// \returns true if it is possible to narrow the width of a scalar binop
+ /// feeding a G_AND instruction \p MI.
+ bool matchNarrowBinopFeedingAnd(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo);
+
/// Try to transform \p MI by using all of the above
/// combine functions. Returns true if changed.
bool tryCombine(MachineInstr &MI);
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index d0328b1405afe..07de68fa1bf21 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -30,6 +30,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/DAGCombine.h"
#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/LowLevelType.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
@@ -2509,8 +2510,11 @@ class TargetLoweringBase {
return false;
}
- virtual bool isTruncateFree(EVT FromVT, EVT ToVT) const {
- return false;
+ virtual bool isTruncateFree(EVT FromVT, EVT ToVT) const { return false; }
+ virtual bool isTruncateFree(LLT FromTy, LLT ToTy, const DataLayout &DL,
+ LLVMContext &Ctx) const {
+ return isTruncateFree(getApproximateEVTForLLT(FromTy, DL, Ctx),
+ getApproximateEVTForLLT(ToTy, DL, Ctx));
}
virtual bool isProfitableToHoist(Instruction *I) const { return true; }
@@ -2586,8 +2590,11 @@ class TargetLoweringBase {
return false;
}
- virtual bool isZExtFree(EVT FromTy, EVT ToTy) const {
- return false;
+ virtual bool isZExtFree(EVT FromTy, EVT ToTy) const { return false; }
+ virtual bool isZExtFree(LLT FromTy, LLT ToTy, const DataLayout &DL,
+ LLVMContext &Ctx) const {
+ return isZExtFree(getApproximateEVTForLLT(FromTy, DL, Ctx),
+ getApproximateEVTForLLT(ToTy, DL, Ctx));
}
/// Return true if sign-extension from FromTy to ToTy is cheaper than
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 13ebc43697c2f..e65073a1d28d0 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -197,6 +197,12 @@ def reduce_shl_of_extend : GICombineRule<
[{ return Helper.matchCombineShlOfExtend(*${mi}, ${matchinfo}); }]),
(apply [{ Helper.applyCombineShlOfExtend(*${mi}, ${matchinfo}); }])>;
+def narrow_binop_feeding_and : GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$matchinfo),
+ (match (wip_match_opcode G_AND):$root,
+ [{ return Helper.matchNarrowBinopFeedingAnd(*${root}, ${matchinfo}); }]),
+ (apply [{ Helper.applyBuildFnNoErase(*${root}, ${matchinfo}); }])>;
+
// [us]itofp(undef) = 0, because the result value is bounded.
def undef_to_fp_zero : GICombineRule<
(defs root:$root),
@@ -698,7 +704,8 @@ def known_bits_simplifications : GICombineGroup<[
redundant_and, redundant_sext_inreg, redundant_or, urem_pow2_to_mask,
zext_trunc_fold, icmp_to_true_false_known_bits]>;
-def width_reduction_combines : GICombineGroup<[reduce_shl_of_extend]>;
+def width_reduction_combines : GICombineGroup<[reduce_shl_of_extend,
+ narrow_binop_feeding_and]>;
def phi_combines : GICombineGroup<[extend_through_phis]>;
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 23ff22fe3aa66..732b7ed5dd9d6 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -4344,6 +4344,97 @@ bool CombinerHelper::matchConstantFold(MachineInstr &MI, APInt &MatchInfo) {
return true;
}
+bool CombinerHelper::matchNarrowBinopFeedingAnd(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ // Look for a binop feeding into an AND with a mask:
+ //
+ // %add = G_ADD %lhs, %rhs
+ // %and = G_AND %add, 000...11111111
+ //
+ // Check if it's possible to perform the binop at a narrower width and zext
+ // back to the original width like so:
+ //
+ // %narrow_lhs = G_TRUNC %lhs
+ // %narrow_rhs = G_TRUNC %rhs
+ // %narrow_add = G_ADD %narrow_lhs, %narrow_rhs
+ // %new_add = G_ZEXT %narrow_add
+ // %and = G_AND %new_add, 000...11111111
+ //
+ // This can allow later combines to eliminate the G_AND if it turns out
+ // that the mask is irrelevant.
+ assert(MI.getOpcode() == TargetOpcode::G_AND);
+ Register Dst = MI.getOperand(0).getReg();
+ Register AndLHS = MI.getOperand(1).getReg();
+ Register AndRHS = MI.getOperand(2).getReg();
+ LLT WideTy = MRI.getType(Dst);
+
+ // If the potential binop has more than one use, then it's possible that one
+ // of those uses will need its full width.
+ if (!WideTy.isScalar() || !MRI.hasOneNonDBGUse(AndLHS))
+ return false;
+
+ // Check if the LHS feeding the AND is impacted by the high bits that we're
+ // masking out.
+ //
+ // e.g. for 64-bit x, y:
+ //
+ // add_64(x, y) & 65535 == zext(add_16(trunc(x), trunc(y))) & 65535
+ MachineInstr *LHSInst = getDefIgnoringCopies(AndLHS, MRI);
+ if (!LHSInst)
+ return false;
+ unsigned LHSOpc = LHSInst->getOpcode();
+ switch (LHSOpc) {
+ default:
+ return false;
+ case TargetOpcode::G_ADD:
+ case TargetOpcode::G_SUB:
+ case TargetOpcode::G_MUL:
+ case TargetOpcode::G_AND:
+ case TargetOpcode::G_OR:
+ case TargetOpcode::G_XOR:
+ break;
+ }
+
+ // Find the mask on the RHS.
+ auto Cst = getConstantVRegValWithLookThrough(AndRHS, MRI);
+ if (!Cst)
+ return false;
+ auto Mask = Cst->Value;
+ if (!Mask.isMask())
+ return false;
+
+ // No point in combining if there's nothing to truncate.
+ unsigned NarrowWidth = Mask.countTrailingOnes();
+ if (NarrowWidth == WideTy.getSizeInBits())
+ return false;
+ LLT NarrowTy = LLT::scalar(NarrowWidth);
+
+ // Check if adding the zext + truncates could be harmful.
+ auto &MF = *MI.getMF();
+ const auto &TLI = getTargetLowering();
+ LLVMContext &Ctx = MF.getFunction().getContext();
+ auto &DL = MF.getDataLayout();
+ if (!TLI.isTruncateFree(WideTy, NarrowTy, DL, Ctx) ||
+ !TLI.isZExtFree(NarrowTy, WideTy, DL, Ctx))
+ return false;
+ if (!isLegalOrBeforeLegalizer({TargetOpcode::G_TRUNC, {NarrowTy, WideTy}}) ||
+ !isLegalOrBeforeLegalizer({TargetOpcode::G_ZEXT, {WideTy, NarrowTy}}))
+ return false;
+ Register BinOpLHS = LHSInst->getOperand(1).getReg();
+ Register BinOpRHS = LHSInst->getOperand(2).getReg();
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ auto NarrowLHS = Builder.buildTrunc(NarrowTy, BinOpLHS);
+ auto NarrowRHS = Builder.buildTrunc(NarrowTy, BinOpRHS);
+ auto NarrowBinOp =
+ Builder.buildInstr(LHSOpc, {NarrowTy}, {NarrowLHS, NarrowRHS});
+ auto Ext = Builder.buildZExt(WideTy, NarrowBinOp);
+ Observer.changingInstr(MI);
+ MI.getOperand(1).setReg(Ext.getReg(0));
+ Observer.changedInstr(MI);
+ };
+ return true;
+}
+
bool CombinerHelper::tryCombine(MachineInstr &MI) {
if (tryCombineCopy(MI))
return true;
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-narrow-binop-feeding-add.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-narrow-binop-feeding-add.mir
new file mode 100644
index 0000000000000..fb19cda303d36
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-narrow-binop-feeding-add.mir
@@ -0,0 +1,332 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple aarch64 -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s
+
+...
+---
+name: add_64_mask_32
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x0, $x1
+ ; CHECK-LABEL: name: add_64_mask_32
+ ; CHECK: liveins: $x0, $x1
+ ; CHECK: %binop_lhs:_(s64) = COPY $x0
+ ; CHECK: %binop_rhs:_(s64) = COPY $x1
+ ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %binop_lhs(s64)
+ ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC %binop_rhs(s64)
+ ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[TRUNC]], [[TRUNC1]]
+ ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[ADD]](s32)
+ ; CHECK: $x0 = COPY [[ZEXT]](s64)
+ ; CHECK: RET_ReallyLR implicit $x0
+ %binop_lhs:_(s64) = COPY $x0
+ %binop_rhs:_(s64) = COPY $x1
+ %mask_32:_(s64) = G_CONSTANT i64 4294967295
+ %binop:_(s64) = G_ADD %binop_lhs, %binop_rhs
+ %and:_(s64) = G_AND %binop, %mask_32
+ $x0 = COPY %and(s64)
+ RET_ReallyLR implicit $x0
+...
+---
+name: sub_64_mask_32
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x0, $x1
+ ; CHECK-LABEL: name: sub_64_mask_32
+ ; CHECK: liveins: $x0, $x1
+ ; CHECK: %binop_lhs:_(s64) = COPY $x0
+ ; CHECK: %binop_rhs:_(s64) = COPY $x1
+ ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %binop_lhs(s64)
+ ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC %binop_rhs(s64)
+ ; CHECK: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[TRUNC1]]
+ ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[SUB]](s32)
+ ; CHECK: $x0 = COPY [[ZEXT]](s64)
+ ; CHECK: RET_ReallyLR implicit $x0
+ %binop_lhs:_(s64) = COPY $x0
+ %binop_rhs:_(s64) = COPY $x1
+ %mask_32:_(s64) = G_CONSTANT i64 4294967295
+ %binop:_(s64) = G_SUB %binop_lhs, %binop_rhs
+ %and:_(s64) = G_AND %binop, %mask_32
+ $x0 = COPY %and(s64)
+ RET_ReallyLR implicit $x0
+...
+---
+name: mul_64_mask_32
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x0, $x1
+ ; CHECK-LABEL: name: mul_64_mask_32
+ ; CHECK: liveins: $x0, $x1
+ ; CHECK: %binop_lhs:_(s64) = COPY $x0
+ ; CHECK: %binop_rhs:_(s64) = COPY $x1
+ ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %binop_lhs(s64)
+ ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC %binop_rhs(s64)
+ ; CHECK: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[TRUNC]], [[TRUNC1]]
+ ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[MUL]](s32)
+ ; CHECK: $x0 = COPY [[ZEXT]](s64)
+ ; CHECK: RET_ReallyLR implicit $x0
+ %binop_lhs:_(s64) = COPY $x0
+ %binop_rhs:_(s64) = COPY $x1
+ %mask_32:_(s64) = G_CONSTANT i64 4294967295
+ %binop:_(s64) = G_MUL %binop_lhs, %binop_rhs
+ %and:_(s64) = G_AND %binop, %mask_32
+ $x0 = COPY %and(s64)
+ RET_ReallyLR implicit $x0
+...
+---
+name: and_64_mask_32
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x0, $x1
+ ; CHECK-LABEL: name: and_64_mask_32
+ ; CHECK: liveins: $x0, $x1
+ ; CHECK: %binop_lhs:_(s64) = COPY $x0
+ ; CHECK: %binop_rhs:_(s64) = COPY $x1
+ ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %binop_lhs(s64)
+ ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC %binop_rhs(s64)
+ ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[TRUNC1]]
+ ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[AND]](s32)
+ ; CHECK: $x0 = COPY [[ZEXT]](s64)
+ ; CHECK: RET_ReallyLR implicit $x0
+ %binop_lhs:_(s64) = COPY $x0
+ %binop_rhs:_(s64) = COPY $x1
+ %mask_32:_(s64) = G_CONSTANT i64 4294967295
+ %binop:_(s64) = G_AND %binop_lhs, %binop_rhs
+ %and:_(s64) = G_AND %binop, %mask_32
+ $x0 = COPY %and(s64)
+ RET_ReallyLR implicit $x0
+...
+---
+name: or_64_mask_32
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x0, $x1
+ ; CHECK-LABEL: name: or_64_mask_32
+ ; CHECK: liveins: $x0, $x1
+ ; CHECK: %binop_lhs:_(s64) = COPY $x0
+ ; CHECK: %binop_rhs:_(s64) = COPY $x1
+ ; CHECK: %mask_32:_(s64) = G_CONSTANT i64 4294967295
+ ; CHECK: %binop:_(s64) = G_SUB %binop_lhs, %binop_rhs
+ ; CHECK: %and:_(s64) = G_OR %binop, %mask_32
+ ; CHECK: $x0 = COPY %and(s64)
+ ; CHECK: RET_ReallyLR implicit $x0
+ %binop_lhs:_(s64) = COPY $x0
+ %binop_rhs:_(s64) = COPY $x1
+ %mask_32:_(s64) = G_CONSTANT i64 4294967295
+ %binop:_(s64) = G_SUB %binop_lhs, %binop_rhs
+ %and:_(s64) = G_OR %binop, %mask_32
+ $x0 = COPY %and(s64)
+ RET_ReallyLR implicit $x0
+...
+---
+name: xor_64_mask_32
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x0, $x1
+ ; CHECK-LABEL: name: xor_64_mask_32
+ ; CHECK: liveins: $x0, $x1
+ ; CHECK: %binop_lhs:_(s64) = COPY $x0
+ ; CHECK: %binop_rhs:_(s64) = COPY $x1
+ ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %binop_lhs(s64)
+ ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC %binop_rhs(s64)
+ ; CHECK: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[TRUNC]], [[TRUNC1]]
+ ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[XOR]](s32)
+ ; CHECK: $x0 = COPY [[ZEXT]](s64)
+ ; CHECK: RET_ReallyLR implicit $x0
+ %binop_lhs:_(s64) = COPY $x0
+ %binop_rhs:_(s64) = COPY $x1
+ %mask_32:_(s64) = G_CONSTANT i64 4294967295
+ %binop:_(s64) = G_XOR %binop_lhs, %binop_rhs
+ %and:_(s64) = G_AND %binop, %mask_32
+ $x0 = COPY %and(s64)
+ RET_ReallyLR implicit $x0
+...
+---
+name: walk_thru_copy
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x0, $x1
+ ; CHECK-LABEL: name: walk_thru_copy
+ ; CHECK: liveins: $x0, $x1
+ ; CHECK: %binop_lhs:_(s64) = COPY $x0
+ ; CHECK: %binop_rhs:_(s64) = COPY $x1
+ ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %binop_lhs(s64)
+ ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC %binop_rhs(s64)
+ ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[TRUNC]], [[TRUNC1]]
+ ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[ADD]](s32)
+ ; CHECK: $x0 = COPY [[ZEXT]](s64)
+ ; CHECK: RET_ReallyLR implicit $x0
+ %binop_lhs:_(s64) = COPY $x0
+ %binop_rhs:_(s64) = COPY $x1
+ %mask_32:_(s64) = G_CONSTANT i64 4294967295
+ %binop:_(s64) = G_ADD %binop_lhs, %binop_rhs
+ %copy:_(s64) = COPY %binop
+ %and:_(s64) = G_AND %copy, %mask_32
+ $x0 = COPY %and(s64)
+ RET_ReallyLR implicit $x0
+...
+---
+name: dont_combine_zext_not_free_add_64_mask_16
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x0, $x1
+ ; CHECK-LABEL: name: dont_combine_zext_not_free_add_64_mask_16
+ ; CHECK: liveins: $x0, $x1
+ ; CHECK: %binop_lhs:_(s64) = COPY $x0
+ ; CHECK: %binop_rhs:_(s64) = COPY $x1
+ ; CHECK: %mask_16:_(s64) = G_CONSTANT i64 65535
+ ; CHECK: %binop:_(s64) = G_ADD %binop_lhs, %binop_rhs
+ ; CHECK: %and:_(s64) = G_AND %binop, %mask_16
+ ; CHECK: $x0 = COPY %and(s64)
+ ; CHECK: RET_ReallyLR implicit $x0
+ %binop_lhs:_(s64) = COPY $x0
+ %binop_rhs:_(s64) = COPY $x1
+ %mask_16:_(s64) = G_CONSTANT i64 65535
+ %binop:_(s64) = G_ADD %binop_lhs, %binop_rhs
+ %and:_(s64) = G_AND %binop, %mask_16
+ $x0 = COPY %and(s64)
+ RET_ReallyLR implicit $x0
+...
+---
+name: dont_combine_zext_not_free_add_64_mask_8
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x0, $x1
+ ; CHECK-LABEL: name: dont_combine_zext_not_free_add_64_mask_8
+ ; CHECK: liveins: $x0, $x1
+ ; CHECK: %binop_lhs:_(s64) = COPY $x0
+ ; CHECK: %binop_rhs:_(s64) = COPY $x1
+ ; CHECK: %mask_8:_(s64) = G_CONSTANT i64 255
+ ; CHECK: %binop:_(s64) = G_ADD %binop_lhs, %binop_rhs
+ ; CHECK: %and:_(s64) = G_AND %binop, %mask_8
+ ; CHECK: $x0 = COPY %and(s64)
+ ; CHECK: RET_ReallyLR implicit $x0
+ %binop_lhs:_(s64) = COPY $x0
+ %binop_rhs:_(s64) = COPY $x1
+ %mask_8:_(s64) = G_CONSTANT i64 255
+ %binop:_(s64) = G_ADD %binop_lhs, %binop_rhs
+ %and:_(s64) = G_AND %binop, %mask_8
+ $x0 = COPY %and(s64)
+ RET_ReallyLR implicit $x0
+...
+---
+name: dont_combine_not_a_mask
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x0, $x1
+ ; CHECK-LABEL: name: dont_combine_not_a_mask
+ ; CHECK: liveins: $x0, $x1
+ ; CHECK: %binop_lhs:_(s64) = COPY $x0
+ ; CHECK: %binop_rhs:_(s64) = COPY $x1
+ ; CHECK: %not_a_mask:_(s64) = G_CONSTANT i64 26
+ ; CHECK: %binop:_(s64) = G_ADD %binop_lhs, %binop_rhs
+ ; CHECK: %and:_(s64) = G_AND %binop, %not_a_mask
+ ; CHECK: $x0 = COPY %and(s64)
+ ; CHECK: RET_ReallyLR implicit $x0
+ %binop_lhs:_(s64) = COPY $x0
+ %binop_rhs:_(s64) = COPY $x1
+ %not_a_mask:_(s64) = G_CONSTANT i64 26
+ %binop:_(s64) = G_ADD %binop_lhs, %binop_rhs
+ %and:_(s64) = G_AND %binop, %not_a_mask
+ $x0 = COPY %and(s64)
+ RET_ReallyLR implicit $x0
+...
+---
+name: dont_combine_more_than_one_use
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x0, $x1
+ ; CHECK-LABEL: name: dont_combine_more_than_one_use
+ ; CHECK: liveins: $x0, $x1
+ ; CHECK: %binop_lhs:_(s64) = COPY $x0
+ ; CHECK: %binop_rhs:_(s64) = COPY $x1
+ ; CHECK: %not_a_mask:_(s64) = G_CONSTANT i64 26
+ ; CHECK: %binop:_(s64) = G_ADD %binop_lhs, %binop_rhs
+ ; CHECK: %and:_(s64) = G_AND %binop, %not_a_mask
+ ; CHECK: %or:_(s64) = G_OR %and, %binop
+ ; CHECK: $x0 = COPY %or(s64)
+ ; CHECK: RET_ReallyLR implicit $x0
+ %binop_lhs:_(s64) = COPY $x0
+ %binop_rhs:_(s64) = COPY $x1
+ %not_a_mask:_(s64) = G_CONSTANT i64 26
+ %binop:_(s64) = G_ADD %binop_lhs, %binop_rhs
+ %and:_(s64) = G_AND %binop, %not_a_mask
+ %or:_(s64) = G_OR %and, %binop
+ $x0 = COPY %or(s64)
+ RET_ReallyLR implicit $x0
+...
+---
+name: dont_combine_vector
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $q0, $q1
+ ; CHECK-LABEL: name: dont_combine_vector
+ ; CHECK: liveins: $q0, $q1
+ ; CHECK: %binop_lhs:_(<2 x s64>) = COPY $q0
+ ; CHECK: %binop_rhs:_(<2 x s64>) = COPY $q1
+ ; CHECK: %mask_elt:_(s64) = G_CONSTANT i64 4294967295
+ ; CHECK: %mask:_(<2 x s64>) = G_BUILD_VECTOR %mask_elt(s64), %mask_elt(s64)
+ ; CHECK: %binop:_(<2 x s64>) = G_ADD %binop_lhs, %binop_rhs
+ ; CHECK: %and:_(<2 x s64>) = G_AND %binop, %mask
+ ; CHECK: $q0 = COPY %and(<2 x s64>)
+ ; CHECK: RET_ReallyLR implicit $q0
+ %binop_lhs:_(<2 x s64>) = COPY $q0
+ %binop_rhs:_(<2 x s64>) = COPY $q1
+ %mask_elt:_(s64) = G_CONSTANT i64 4294967295
+ %mask:_(<2 x s64>) = G_BUILD_VECTOR %mask_elt, %mask_elt
+ %binop:_(<2 x s64>) = G_ADD %binop_lhs, %binop_rhs
+ %and:_(<2 x s64>) = G_AND %binop, %mask
+ $q0 = COPY %and(<2 x s64>)
+ RET_ReallyLR implicit $q0
+...
+---
+name: dont_combine_add_64_mask_64
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x0, $x1
+ ; CHECK-LABEL: name: dont_combine_add_64_mask_64
+ ; CHECK: liveins: $x0, $x1
+ ; CHECK: %binop_lhs:_(s64) = COPY $x0
+ ; CHECK: %binop_rhs:_(s64) = COPY $x1
+ ; CHECK: %binop:_(s64) = G_ADD %binop_lhs, %binop_rhs
+ ; CHECK: $x0 = COPY %binop(s64)
+ ; CHECK: RET_ReallyLR implicit $x0
+ %binop_lhs:_(s64) = COPY $x0
+ %binop_rhs:_(s64) = COPY $x1
+ %mask_64:_(s64) = G_CONSTANT i64 18446744073709551615
+ %binop:_(s64) = G_ADD %binop_lhs, %binop_rhs
+ %and:_(s64) = G_AND %binop, %mask_64
+ $x0 = COPY %and(s64)
+ RET_ReallyLR implicit $x0
+...
+---
+name: dont_combine_copy_from_physreg
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x0, $x1
+ ; CHECK-LABEL: name: dont_combine_copy_from_physreg
+ ; CHECK: liveins: $x0, $x1
+ ; CHECK: %copy_from_physreg:_(s64) = COPY $x0
+ ; CHECK: %mask_32:_(s64) = G_CONSTANT i64 4294967295
+ ; CHECK: %and:_(s64) = G_AND %copy_from_physreg, %mask_32
+ ; CHECK: $x0 = COPY %and(s64)
+ ; CHECK: RET_ReallyLR implicit $x0
+ %copy_from_physreg:_(s64) = COPY $x0
+ %binop_rhs:_(s64) = COPY $x1
+ %mask_32:_(s64) = G_CONSTANT i64 4294967295
+ %copy:_(s64) = COPY %copy_from_physreg
+ %and:_(s64) = G_AND %copy, %mask_32
+ $x0 = COPY %and(s64)
+ RET_ReallyLR implicit $x0
More information about the llvm-commits
mailing list