[llvm] 8269fd2 - [GlobalIsel][X86] Add initial scalar G_MUL/G_SMULH/G_UMULH instruction selection handling
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sun Jul 2 04:56:48 PDT 2023
Author: Simon Pilgrim
Date: 2023-07-02T12:56:41+01:00
New Revision: 8269fd2db50bf97ae3b6e928365d157314e491c8
URL: https://github.com/llvm/llvm-project/commit/8269fd2db50bf97ae3b6e928365d157314e491c8
DIFF: https://github.com/llvm/llvm-project/commit/8269fd2db50bf97ae3b6e928365d157314e491c8.diff
LOG: [GlobalIsel][X86] Add initial scalar G_MUL/G_SMULH/G_UMULH instruction selection handling
Reuse the existing div/rem selection code to also handle mul/imul to support G_MUL/G_SMULH/G_UMULH, as they have a similar pattern using rDX/rAX for mulh/mul results, plus the AH/AL support for i8 multiplies.
Added:
Modified:
llvm/lib/Target/X86/X86InstructionSelector.cpp
llvm/lib/Target/X86/X86LegalizerInfo.cpp
llvm/test/CodeGen/X86/GlobalISel/legalize-mul-scalar.mir
llvm/test/CodeGen/X86/GlobalISel/mul-scalar.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86InstructionSelector.cpp b/llvm/lib/Target/X86/X86InstructionSelector.cpp
index c962c5204b04fc..fc6f3bd876acd7 100644
--- a/llvm/lib/Target/X86/X86InstructionSelector.cpp
+++ b/llvm/lib/Target/X86/X86InstructionSelector.cpp
@@ -114,8 +114,8 @@ class X86InstructionSelector : public InstructionSelector {
bool materializeFP(MachineInstr &I, MachineRegisterInfo &MRI,
MachineFunction &MF) const;
bool selectImplicitDefOrPHI(MachineInstr &I, MachineRegisterInfo &MRI) const;
- bool selectDivRem(MachineInstr &I, MachineRegisterInfo &MRI,
- MachineFunction &MF) const;
+ bool selectMulDivRem(MachineInstr &I, MachineRegisterInfo &MRI,
+ MachineFunction &MF) const;
bool selectIntrinsicWSideEffects(MachineInstr &I, MachineRegisterInfo &MRI,
MachineFunction &MF) const;
@@ -421,11 +421,14 @@ bool X86InstructionSelector::select(MachineInstr &I) {
case TargetOpcode::G_IMPLICIT_DEF:
case TargetOpcode::G_PHI:
return selectImplicitDefOrPHI(I, MRI);
+ case TargetOpcode::G_MUL:
+ case TargetOpcode::G_SMULH:
+ case TargetOpcode::G_UMULH:
case TargetOpcode::G_SDIV:
case TargetOpcode::G_UDIV:
case TargetOpcode::G_SREM:
case TargetOpcode::G_UREM:
- return selectDivRem(I, MRI, MF);
+ return selectMulDivRem(I, MRI, MF);
case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
return selectIntrinsicWSideEffects(I, MRI, MF);
}
@@ -1558,11 +1561,14 @@ bool X86InstructionSelector::selectImplicitDefOrPHI(
return true;
}
-bool X86InstructionSelector::selectDivRem(MachineInstr &I,
- MachineRegisterInfo &MRI,
- MachineFunction &MF) const {
- // The implementation of this function is taken from X86FastISel.
- assert((I.getOpcode() == TargetOpcode::G_SDIV ||
+bool X86InstructionSelector::selectMulDivRem(MachineInstr &I,
+ MachineRegisterInfo &MRI,
+ MachineFunction &MF) const {
+ // The implementation of this function is adapted from X86FastISel.
+ assert((I.getOpcode() == TargetOpcode::G_MUL ||
+ I.getOpcode() == TargetOpcode::G_SMULH ||
+ I.getOpcode() == TargetOpcode::G_UMULH ||
+ I.getOpcode() == TargetOpcode::G_SDIV ||
I.getOpcode() == TargetOpcode::G_SREM ||
I.getOpcode() == TargetOpcode::G_UDIV ||
I.getOpcode() == TargetOpcode::G_UREM) &&
@@ -1581,10 +1587,11 @@ bool X86InstructionSelector::selectDivRem(MachineInstr &I,
return false;
const static unsigned NumTypes = 4; // i8, i16, i32, i64
- const static unsigned NumOps = 4; // SDiv, SRem, UDiv, URem
+ const static unsigned NumOps = 7; // SDiv/SRem/UDiv/URem/Mul/SMulH/UMulh
const static bool S = true; // IsSigned
const static bool U = false; // !IsSigned
const static unsigned Copy = TargetOpcode::COPY;
+
// For the X86 IDIV instruction, in most cases the dividend
// (numerator) must be in a specific register pair highreg:lowreg,
// producing the quotient in lowreg and the remainder in highreg.
@@ -1593,19 +1600,19 @@ bool X86InstructionSelector::selectDivRem(MachineInstr &I,
// exception is i8, where the dividend is defined as a single register rather
// than a register pair, and we therefore directly sign-extend the dividend
// into lowreg, instead of copying, and ignore the highreg.
- const static struct DivRemEntry {
+ const static struct MulDivRemEntry {
// The following portion depends only on the data type.
unsigned SizeInBits;
unsigned LowInReg; // low part of the register pair
unsigned HighInReg; // high part of the register pair
// The following portion depends on both the data type and the operation.
- struct DivRemResult {
- unsigned OpDivRem; // The specific DIV/IDIV opcode to use.
+ struct MulDivRemResult {
+ unsigned OpMulDivRem; // The specific MUL/DIV opcode to use.
unsigned OpSignExtend; // Opcode for sign-extending lowreg into
// highreg, or copying a zero into highreg.
unsigned OpCopy; // Opcode for copying dividend into lowreg, or
// zero/sign-extending into lowreg for i8.
- unsigned DivRemResultReg; // Register containing the desired result.
+ unsigned ResultReg; // Register containing the desired result.
bool IsOpSigned; // Whether to use signed or unsigned form.
} ResultTable[NumOps];
} OpTable[NumTypes] = {
@@ -1617,25 +1624,34 @@ bool X86InstructionSelector::selectDivRem(MachineInstr &I,
{X86::IDIV8r, 0, X86::MOVSX16rr8, X86::AH, S}, // SRem
{X86::DIV8r, 0, X86::MOVZX16rr8, X86::AL, U}, // UDiv
{X86::DIV8r, 0, X86::MOVZX16rr8, X86::AH, U}, // URem
+ {X86::IMUL8r, 0, X86::MOVSX16rr8, X86::AL, S}, // Mul
+ {X86::IMUL8r, 0, X86::MOVSX16rr8, X86::AH, S}, // SMulH
+ {X86::MUL8r, 0, X86::MOVZX16rr8, X86::AH, U}, // UMulH
}}, // i8
{16,
X86::AX,
X86::DX,
{
- {X86::IDIV16r, X86::CWD, Copy, X86::AX, S}, // SDiv
- {X86::IDIV16r, X86::CWD, Copy, X86::DX, S}, // SRem
- {X86::DIV16r, X86::MOV32r0, Copy, X86::AX, U}, // UDiv
- {X86::DIV16r, X86::MOV32r0, Copy, X86::DX, U}, // URem
- }}, // i16
+ {X86::IDIV16r, X86::CWD, Copy, X86::AX, S}, // SDiv
+ {X86::IDIV16r, X86::CWD, Copy, X86::DX, S}, // SRem
+ {X86::DIV16r, X86::MOV32r0, Copy, X86::AX, U}, // UDiv
+ {X86::DIV16r, X86::MOV32r0, Copy, X86::DX, U}, // URem
+ {X86::IMUL16r, X86::MOV32r0, Copy, X86::AX, S}, // Mul
+ {X86::IMUL16r, X86::MOV32r0, Copy, X86::DX, S}, // SMulH
+ {X86::MUL16r, X86::MOV32r0, Copy, X86::DX, U}, // UMulH
+ }}, // i16
{32,
X86::EAX,
X86::EDX,
{
- {X86::IDIV32r, X86::CDQ, Copy, X86::EAX, S}, // SDiv
- {X86::IDIV32r, X86::CDQ, Copy, X86::EDX, S}, // SRem
- {X86::DIV32r, X86::MOV32r0, Copy, X86::EAX, U}, // UDiv
- {X86::DIV32r, X86::MOV32r0, Copy, X86::EDX, U}, // URem
- }}, // i32
+ {X86::IDIV32r, X86::CDQ, Copy, X86::EAX, S}, // SDiv
+ {X86::IDIV32r, X86::CDQ, Copy, X86::EDX, S}, // SRem
+ {X86::DIV32r, X86::MOV32r0, Copy, X86::EAX, U}, // UDiv
+ {X86::DIV32r, X86::MOV32r0, Copy, X86::EDX, U}, // URem
+ {X86::IMUL32r, X86::MOV32r0, Copy, X86::EAX, S}, // Mul
+ {X86::IMUL32r, X86::MOV32r0, Copy, X86::EDX, S}, // SMulH
+ {X86::MUL32r, X86::MOV32r0, Copy, X86::EDX, U}, // UMulH
+ }}, // i32
{64,
X86::RAX,
X86::RDX,
@@ -1644,10 +1660,13 @@ bool X86InstructionSelector::selectDivRem(MachineInstr &I,
{X86::IDIV64r, X86::CQO, Copy, X86::RDX, S}, // SRem
{X86::DIV64r, X86::MOV32r0, Copy, X86::RAX, U}, // UDiv
{X86::DIV64r, X86::MOV32r0, Copy, X86::RDX, U}, // URem
- }}, // i64
+ {X86::IMUL64r, X86::MOV32r0, Copy, X86::RAX, S}, // Mul
+ {X86::IMUL64r, X86::MOV32r0, Copy, X86::RDX, S}, // SMulH
+ {X86::MUL64r, X86::MOV32r0, Copy, X86::RDX, U}, // UMulH
+ }}, // i64
};
- auto OpEntryIt = llvm::find_if(OpTable, [RegTy](const DivRemEntry &El) {
+ auto OpEntryIt = llvm::find_if(OpTable, [RegTy](const MulDivRemEntry &El) {
return El.SizeInBits == RegTy.getSizeInBits();
});
if (OpEntryIt == std::end(OpTable))
@@ -1656,7 +1675,7 @@ bool X86InstructionSelector::selectDivRem(MachineInstr &I,
unsigned OpIndex;
switch (I.getOpcode()) {
default:
- llvm_unreachable("Unexpected div/rem opcode");
+ llvm_unreachable("Unexpected mul/div/rem opcode");
case TargetOpcode::G_SDIV:
OpIndex = 0;
break;
@@ -1669,10 +1688,20 @@ bool X86InstructionSelector::selectDivRem(MachineInstr &I,
case TargetOpcode::G_UREM:
OpIndex = 3;
break;
+ case TargetOpcode::G_MUL:
+ OpIndex = 4;
+ break;
+ case TargetOpcode::G_SMULH:
+ OpIndex = 5;
+ break;
+ case TargetOpcode::G_UMULH:
+ OpIndex = 6;
+ break;
}
- const DivRemEntry &TypeEntry = *OpEntryIt;
- const DivRemEntry::DivRemResult &OpEntry = TypeEntry.ResultTable[OpIndex];
+ const MulDivRemEntry &TypeEntry = *OpEntryIt;
+ const MulDivRemEntry::MulDivRemResult &OpEntry =
+ TypeEntry.ResultTable[OpIndex];
const TargetRegisterClass *RegRC = getRegClass(RegTy, *RegRB);
if (!RBI.constrainGenericRegister(Op1Reg, *RegRC, MRI) ||
@@ -1687,6 +1716,7 @@ bool X86InstructionSelector::selectDivRem(MachineInstr &I,
BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(OpEntry.OpCopy),
TypeEntry.LowInReg)
.addReg(Op1Reg);
+
// Zero-extend or sign-extend into high-order input register.
if (OpEntry.OpSignExtend) {
if (OpEntry.IsOpSigned)
@@ -1717,9 +1747,11 @@ bool X86InstructionSelector::selectDivRem(MachineInstr &I,
}
}
}
- // Generate the DIV/IDIV instruction.
- BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(OpEntry.OpDivRem))
+
+ // Generate the DIV/IDIV/MUL/IMUL instruction.
+ BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(OpEntry.OpMulDivRem))
.addReg(Op2Reg);
+
// For i8 remainder, we can't reference ah directly, as we'll end
// up with bogus copies like %r9b = COPY %ah. Reference ax
// instead to prevent ah references in a rex instruction.
@@ -1728,7 +1760,7 @@ bool X86InstructionSelector::selectDivRem(MachineInstr &I,
// won't generate explicit references to the GR8_NOREX registers. If
// the allocator and/or the backend get enhanced to be more robust in
// that regard, this can be, and should be, removed.
- if (OpEntry.DivRemResultReg == X86::AH && STI.is64Bit()) {
+ if (OpEntry.ResultReg == X86::AH && STI.is64Bit()) {
Register SourceSuperReg = MRI.createVirtualRegister(&X86::GR16RegClass);
Register ResultSuperReg = MRI.createVirtualRegister(&X86::GR16RegClass);
BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Copy), SourceSuperReg)
@@ -1750,9 +1782,10 @@ bool X86InstructionSelector::selectDivRem(MachineInstr &I,
} else {
BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(TargetOpcode::COPY),
DstReg)
- .addReg(OpEntry.DivRemResultReg);
+ .addReg(OpEntry.ResultReg);
}
I.eraseFromParent();
+
return true;
}
diff --git a/llvm/lib/Target/X86/X86LegalizerInfo.cpp b/llvm/lib/Target/X86/X86LegalizerInfo.cpp
index 49a0f69413fc3c..a4a247f85f3d72 100644
--- a/llvm/lib/Target/X86/X86LegalizerInfo.cpp
+++ b/llvm/lib/Target/X86/X86LegalizerInfo.cpp
@@ -196,6 +196,15 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI,
.clampScalar(0, s8, sMaxScalar)
.scalarize(0);
+ getActionDefinitionsBuilder({G_SMULH, G_UMULH})
+ .legalIf([=](const LegalityQuery &Query) -> bool {
+ return typeInSet(0, {s8, s16, s32})(Query) ||
+ (Is64Bit && typeInSet(0, {s64})(Query));
+ })
+ .widenScalarToNextPow2(0, /*Min=*/32)
+ .clampScalar(0, s8, sMaxScalar)
+ .scalarize(0);
+
// integer divisions
getActionDefinitionsBuilder({G_SDIV, G_SREM, G_UDIV, G_UREM})
.legalIf([=](const LegalityQuery &Query) -> bool {
diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-mul-scalar.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-mul-scalar.mir
index d064d6c9c73878..deff2ba439a470 100644
--- a/llvm/test/CodeGen/X86/GlobalISel/legalize-mul-scalar.mir
+++ b/llvm/test/CodeGen/X86/GlobalISel/legalize-mul-scalar.mir
@@ -1,10 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -O0 -mtriple=x86_64-linux-gnu -run-pass=legalizer %s -o - | FileCheck %s --check-prefixes=CHECK,X64
-# RUN: llc -O0 -mtriple=i386-linux-gnu -run-pass=legalizer -global-isel-abort=2 -pass-remarks-missed='gisel*' %s 2>%t -o - | FileCheck %s --check-prefixes=CHECK,X86
-# RUN: FileCheck -check-prefix=ERR32 %s < %t
-
-# ERR32: remark: <unknown>:0:0: unable to legalize instruction: %14:_(s32) = G_UMULH %7:_, %9:_ (in function: test_mul_i42)
-# ERR32: remark: <unknown>:0:0: unable to legalize instruction: %10:_(s32) = G_UMULH %3:_, %5:_ (in function: test_mul_i64)
+# RUN: llc -O0 -mtriple=i386-linux-gnu -run-pass=legalizer %s -o - | FileCheck %s --check-prefixes=CHECK,X86
--- |
define void @test_mul_i1() { ret void }
@@ -200,11 +196,8 @@ body: |
; X86: liveins: $rdi, $rsi
; X86-NEXT: {{ $}}
; X86-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $rdx
- ; X86-NEXT: [[TRUNC:%[0-9]+]]:_(s42) = G_TRUNC [[COPY]](s64)
- ; X86-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC]](s42)
- ; X86-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC]](s42)
- ; X86-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s64)
- ; X86-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT1]](s64)
+ ; X86-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
+ ; X86-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
; X86-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV2]]
; X86-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV2]]
; X86-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV3]]
@@ -212,9 +205,7 @@ body: |
; X86-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]]
; X86-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]]
; X86-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL]](s32), [[ADD1]](s32)
- ; X86-NEXT: [[TRUNC1:%[0-9]+]]:_(s42) = G_TRUNC [[MV]](s64)
- ; X86-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC1]](s42)
- ; X86-NEXT: $rax = COPY [[ANYEXT2]](s64)
+ ; X86-NEXT: $rax = COPY [[MV]](s64)
; X86-NEXT: RET 0
%0(s64) = COPY $rdx
%1(s42) = G_TRUNC %0(s64)
diff --git a/llvm/test/CodeGen/X86/GlobalISel/mul-scalar.ll b/llvm/test/CodeGen/X86/GlobalISel/mul-scalar.ll
index ba0499e903501d..3835debf9b03ed 100644
--- a/llvm/test/CodeGen/X86/GlobalISel/mul-scalar.ll
+++ b/llvm/test/CodeGen/X86/GlobalISel/mul-scalar.ll
@@ -1,40 +1,92 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=x86_64-linux-gnu -global-isel -verify-machineinstrs < %s -o - | FileCheck %s --check-prefix=X64
+; RUN: llc -mtriple=i686-linux-gnu -global-isel -verify-machineinstrs < %s -o - | FileCheck %s --check-prefix=X86
-;TODO: instruction selection not supported yet
-;define i8 @test_mul_i8(i8 %arg1, i8 %arg2) {
-; %ret = mul i8 %arg1, %arg2
-; ret i8 %ret
-;}
+define i8 @test_mul_i8(i8 %arg1, i8 %arg2) nounwind {
+; X64-LABEL: test_mul_i8:
+; X64: # %bb.0:
+; X64-NEXT: movsbl %dil, %eax
+; X64-NEXT: imulb %sil
+; X64-NEXT: retq
+;
+; X86-LABEL: test_mul_i8:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: cbtw
+; X86-NEXT: imulb %cl
+; X86-NEXT: retl
+ %ret = mul i8 %arg1, %arg2
+ ret i8 %ret
+}
-define i16 @test_mul_i16(i16 %arg1, i16 %arg2) {
+define i16 @test_mul_i16(i16 %arg1, i16 %arg2) nounwind {
; X64-LABEL: test_mul_i16:
; X64: # %bb.0:
; X64-NEXT: movl %esi, %eax
; X64-NEXT: imulw %di, %ax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
+;
+; X86-LABEL: test_mul_i16:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: imulw %cx, %ax
+; X86-NEXT: # kill: def $ax killed $ax killed $eax
+; X86-NEXT: retl
%ret = mul i16 %arg1, %arg2
ret i16 %ret
}
-define i32 @test_mul_i32(i32 %arg1, i32 %arg2) {
+define i32 @test_mul_i32(i32 %arg1, i32 %arg2) nounwind {
; X64-LABEL: test_mul_i32:
; X64: # %bb.0:
; X64-NEXT: movl %esi, %eax
; X64-NEXT: imull %edi, %eax
; X64-NEXT: retq
+;
+; X86-LABEL: test_mul_i32:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: imull {{[0-9]+}}(%esp), %eax
+; X86-NEXT: retl
%ret = mul i32 %arg1, %arg2
ret i32 %ret
}
-define i64 @test_mul_i64(i64 %arg1, i64 %arg2) {
+define i64 @test_mul_i64(i64 %arg1, i64 %arg2) nounwind {
; X64-LABEL: test_mul_i64:
; X64: # %bb.0:
; X64-NEXT: movq %rsi, %rax
; X64-NEXT: imulq %rdi, %rax
; X64-NEXT: retq
+;
+; X86-LABEL: test_mul_i64:
+; X86: # %bb.0:
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: imull %eax, %esi
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: imull %edx, %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: imull %edx, %edi
+; X86-NEXT: mull %edx
+; X86-NEXT: addl %edi, %esi
+; X86-NEXT: addl %esi, %edx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: retl
%ret = mul i64 %arg1, %arg2
ret i64 %ret
}
+;TODO: instruction selection not supported yet
+;define i128 @test_mul_i128(i128 %arg1, i128 %arg2) nounwind {
+; %ret = mul i128 %arg1, %arg2
+; ret i128 %ret
+;}
More information about the llvm-commits
mailing list