[llvm] a7aaafd - [AArch64][GlobalISel] Implement custom legalization for s32/s64 G_FCOPYSIGN
Jessica Paquette via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 28 16:03:51 PDT 2022
Author: Jessica Paquette
Date: 2022-09-28T16:03:22-07:00
New Revision: a7aaafde2ef5cdcb8a930a6be1bc638721b515ff
URL: https://github.com/llvm/llvm-project/commit/a7aaafde2ef5cdcb8a930a6be1bc638721b515ff
DIFF: https://github.com/llvm/llvm-project/commit/a7aaafde2ef5cdcb8a930a6be1bc638721b515ff.diff
LOG: [AArch64][GlobalISel] Implement custom legalization for s32/s64 G_FCOPYSIGN
This is intended to be equivalent to the s32 + s64 cases in
AArch64TargetLowering::LowerFCOPYSIGN.
Widen everything and then use G_BIT + a mask to handle the actual copysign
operation. Then, narrow back down to s32/s64.
I wasn't sure about what the best/most canonical INSERT_SUBREG-selectable
pattern is. I chose G_INSERT_VECTOR_ELT + an undef vector because it produces
reasonably okay codegen. (It doesn't produce INSERT_SUBREG right now though.)
If there's a better way to do this then I'm happy to change it.
We also have a couple codegen deficiencies with how we emit vector constants
right now. (We need a GISel equivalent to the tryAdvSIMDModImm64 stuff)
Differential Revision: https://reviews.llvm.org/D108725
Added:
llvm/test/CodeGen/AArch64/GlobalISel/legalize-fcopysign.mir
Modified:
llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h
llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 1cb61589eee97..1a1feb772c870 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -814,6 +814,11 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
getActionDefinitionsBuilder({G_LROUND, G_LLROUND})
.legalFor({{s64, s32}, {s64, s64}});
+ // TODO: Custom legalization for vector types.
+ // TODO: Custom legalization for mismatched types.
+ // TODO: s16 support.
+ getActionDefinitionsBuilder(G_FCOPYSIGN).customFor({{s32, s32}, {s64, s64}});
+
getLegacyLegalizerInfo().computeTables();
verify(*ST.getInstrInfo());
}
@@ -856,6 +861,8 @@ bool AArch64LegalizerInfo::legalizeCustom(LegalizerHelper &Helper,
case TargetOpcode::G_MEMMOVE:
case TargetOpcode::G_MEMSET:
return legalizeMemOps(MI, Helper);
+ case TargetOpcode::G_FCOPYSIGN:
+ return legalizeFCopySign(MI, Helper);
}
llvm_unreachable("expected switch to return");
@@ -1438,3 +1445,63 @@ bool AArch64LegalizerInfo::legalizeMemOps(MachineInstr &MI,
return false;
}
+
+bool AArch64LegalizerInfo::legalizeFCopySign(MachineInstr &MI,
+ LegalizerHelper &Helper) const {
+ MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
+ MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
+ Register Dst = MI.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(Dst);
+ assert(DstTy.isScalar() && "Only expected scalars right now!");
+ const unsigned DstSize = DstTy.getSizeInBits();
+ assert((DstSize == 32 || DstSize == 64) && "Unexpected dst type!");
+ assert(MRI.getType(MI.getOperand(2).getReg()) == DstTy &&
+ "Expected homogeneous types!");
+
+ // We want to materialize a mask with the high bit set.
+ uint64_t EltMask;
+ LLT VecTy;
+
+ // TODO: s16 support.
+ switch (DstSize) {
+ default:
+ llvm_unreachable("Unexpected type for G_FCOPYSIGN!");
+ case 64: {
+ // AdvSIMD immediate moves cannot materialize out mask in a single
+ // instruction for 64-bit elements. Instead, materialize zero and then
+ // negate it.
+ EltMask = 0;
+ VecTy = LLT::fixed_vector(2, DstTy);
+ break;
+ }
+ case 32:
+ EltMask = 0x80000000ULL;
+ VecTy = LLT::fixed_vector(4, DstTy);
+ break;
+ }
+
+ // Widen In1 and In2 to 128 bits. We want these to eventually become
+ // INSERT_SUBREGs.
+ auto Undef = MIRBuilder.buildUndef(VecTy);
+ auto Zero = MIRBuilder.buildConstant(DstTy, 0);
+ auto Ins1 = MIRBuilder.buildInsertVectorElement(
+ VecTy, Undef, MI.getOperand(1).getReg(), Zero);
+ auto Ins2 = MIRBuilder.buildInsertVectorElement(
+ VecTy, Undef, MI.getOperand(2).getReg(), Zero);
+
+ // Construct the mask.
+ auto Mask = MIRBuilder.buildConstant(VecTy, EltMask);
+ if (DstSize == 64)
+ Mask = MIRBuilder.buildFNeg(VecTy, Mask);
+
+ auto Sel = MIRBuilder.buildInstr(AArch64::G_BIT, {VecTy}, {Ins1, Ins2, Mask});
+
+ // Build an unmerge whose 0th elt is the original G_FCOPYSIGN destination. We
+ // want this to eventually become an EXTRACT_SUBREG.
+ SmallVector<Register, 2> DstRegs(1, Dst);
+ for (unsigned I = 1, E = VecTy.getNumElements(); I < E; ++I)
+ DstRegs.push_back(MRI.createGenericVirtualRegister(DstTy));
+ MIRBuilder.buildUnmerge(DstRegs, Sel);
+ MI.eraseFromParent();
+ return true;
+}
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h
index 973f96ff47755..c10f6e071ed43 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h
@@ -57,6 +57,7 @@ class AArch64LegalizerInfo : public LegalizerInfo {
LegalizerHelper &Helper) const;
bool legalizeCTTZ(MachineInstr &MI, LegalizerHelper &Helper) const;
bool legalizeMemOps(MachineInstr &MI, LegalizerHelper &Helper) const;
+ bool legalizeFCopySign(MachineInstr &MI, LegalizerHelper &Helper) const;
const AArch64Subtarget *ST;
};
} // End llvm namespace.
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fcopysign.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fcopysign.mir
new file mode 100644
index 0000000000000..912daad7d60b0
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fcopysign.mir
@@ -0,0 +1,56 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=aarch64 -run-pass=legalizer -verify-machineinstrs %s -o - | FileCheck %s
+
+...
+---
+name: legalize_s32
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $s0, $s1
+ ; CHECK-LABEL: name: legalize_s32
+ ; CHECK: liveins: $s0, $s1
+ ; CHECK: %val:_(s32) = COPY $s0
+ ; CHECK: %sign:_(s32) = COPY $s1
+ ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK: [[IVEC:%[0-9]+]]:_(<4 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], %val(s32), [[C]](s32)
+ ; CHECK: [[IVEC1:%[0-9]+]]:_(<4 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], %sign(s32), [[C]](s32)
+ ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32), [[C1]](s32), [[C1]](s32)
+ ; CHECK: [[BIT:%[0-9]+]]:_(<4 x s32>) = G_BIT [[IVEC]], [[IVEC1]], [[BUILD_VECTOR]]
+ ; CHECK: %fcopysign:_(s32), %10:_(s32), %11:_(s32), %12:_(s32) = G_UNMERGE_VALUES [[BIT]](<4 x s32>)
+ ; CHECK: $s0 = COPY %fcopysign(s32)
+ ; CHECK: RET_ReallyLR implicit $s0
+ %val:_(s32) = COPY $s0
+ %sign:_(s32) = COPY $s1
+ %fcopysign:_(s32) = G_FCOPYSIGN %val, %sign(s32)
+ $s0 = COPY %fcopysign(s32)
+ RET_ReallyLR implicit $s0
+
+...
+---
+name: legalize_s64
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $d0, $d1
+ ; CHECK-LABEL: name: legalize_s64
+ ; CHECK: liveins: $d0, $d1
+ ; CHECK: %val:_(s64) = COPY $d0
+ ; CHECK: %sign:_(s64) = COPY $d1
+ ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s64>) = G_IMPLICIT_DEF
+ ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; CHECK: [[IVEC:%[0-9]+]]:_(<2 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], %val(s64), [[C]](s64)
+ ; CHECK: [[IVEC1:%[0-9]+]]:_(<2 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], %sign(s64), [[C]](s64)
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64)
+ ; CHECK: [[FNEG:%[0-9]+]]:_(<2 x s64>) = G_FNEG [[BUILD_VECTOR]]
+ ; CHECK: [[BIT:%[0-9]+]]:_(<2 x s64>) = G_BIT [[IVEC]], [[IVEC1]], [[FNEG]]
+ ; CHECK: %fcopysign:_(s64), %10:_(s64) = G_UNMERGE_VALUES [[BIT]](<2 x s64>)
+ ; CHECK: $d0 = COPY %fcopysign(s64)
+ ; CHECK: RET_ReallyLR implicit $d0
+ %val:_(s64) = COPY $d0
+ %sign:_(s64) = COPY $d1
+ %fcopysign:_(s64) = G_FCOPYSIGN %val, %sign(s64)
+ $d0 = COPY %fcopysign(s64)
+ RET_ReallyLR implicit $d0
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
index 725baf0f620f4..4be1cf2e0ab3d 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
@@ -487,8 +487,8 @@
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: G_FCOPYSIGN (opcode {{[0-9]+}}): 2 type indices
-# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
-# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
+# DEBUG-NEXT: .. the first uncovered type index: 2, OK
+# DEBUG-NEXT: .. the first uncovered imm index: 0, OK
# DEBUG-NEXT: G_IS_FPCLASS (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
More information about the llvm-commits
mailing list