[llvm] r318800 - [X86] Allow vpclmulqdq instructions to be commuted during isel to allow load folding.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 21 13:05:21 PST 2017
Author: ctopper
Date: Tue Nov 21 13:05:21 2017
New Revision: 318800
URL: http://llvm.org/viewvc/llvm-project?rev=318800&view=rev
Log:
[X86] Allow vpclmulqdq instructions to be commuted during isel to allow load folding.
The commuting patterns for the AVX version actually still had priority over the new patterns.
Modified:
llvm/trunk/lib/Target/X86/X86InstrSSE.td
llvm/trunk/test/CodeGen/X86/commute-clmul.ll
Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=318800&r1=318799&r2=318800&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Tue Nov 21 13:05:21 2017
@@ -7242,24 +7242,37 @@ def AESKEYGENASSIST128rm : AESAI<0xDF, M
// PCLMUL Instructions
//===----------------------------------------------------------------------===//
+// Immediate transform to help with commuting.
+def PCLMULCommuteImm : SDNodeXForm<imm, [{
+ uint8_t Imm = N->getZExtValue();
+ return getI8Imm((uint8_t)((Imm >> 4) | (Imm << 4)), SDLoc(N));
+}]>;
+
// SSE carry-less Multiplication instructions
-let Constraints = "$src1 = $dst", Predicates = [NoAVX, HasPCLMUL] in {
- let isCommutable = 1 in
- def PCLMULQDQrr : PCLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, u8imm:$src3),
- "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(set VR128:$dst,
- (int_x86_pclmulqdq VR128:$src1, VR128:$src2, imm:$src3))],
- IIC_SSE_PCLMULQDQ_RR>, Sched<[WriteCLMul]>;
-
- def PCLMULQDQrm : PCLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src2, u8imm:$src3),
- "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(set VR128:$dst,
- (int_x86_pclmulqdq VR128:$src1, (memopv2i64 addr:$src2),
- imm:$src3))],
- IIC_SSE_PCLMULQDQ_RR>, Sched<[WriteCLMulLd, ReadAfterLd]>;
-}
+let Predicates = [NoAVX, HasPCLMUL] in {
+ let Constraints = "$src1 = $dst" in {
+ let isCommutable = 1 in
+ def PCLMULQDQrr : PCLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, u8imm:$src3),
+ "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set VR128:$dst,
+ (int_x86_pclmulqdq VR128:$src1, VR128:$src2, imm:$src3))],
+ IIC_SSE_PCLMULQDQ_RR>, Sched<[WriteCLMul]>;
+
+ def PCLMULQDQrm : PCLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2, u8imm:$src3),
+ "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set VR128:$dst,
+ (int_x86_pclmulqdq VR128:$src1, (memopv2i64 addr:$src2),
+ imm:$src3))],
+ IIC_SSE_PCLMULQDQ_RR>, Sched<[WriteCLMulLd, ReadAfterLd]>;
+ } // Constraints = "$src1 = $dst"
+
+ def : Pat<(int_x86_pclmulqdq (memopv2i64 addr:$src2), VR128:$src1,
+ (i8 imm:$src3)),
+ (PCLMULQDQrm VR128:$src1, addr:$src2,
+ (PCLMULCommuteImm imm:$src3))>;
+} // Predicates = [NoAVX, HasPCLMUL]
// SSE aliases
foreach HI = ["hq","lq"] in
@@ -7289,6 +7302,12 @@ multiclass vpclmulqdq<RegisterClass RC,
[(set RC:$dst,
(IntId RC:$src1, (LdFrag addr:$src2), imm:$src3))]>,
Sched<[WriteCLMulLd, ReadAfterLd]>;
+
+ // We can commute a load in the first operand by swapping the sources and
+ // rotating the immediate.
+ def : Pat<(IntId (LdFrag addr:$src2), RC:$src1, (i8 imm:$src3)),
+ (!cast<Instruction>(NAME#"rm") RC:$src1, addr:$src2,
+ (PCLMULCommuteImm imm:$src3))>;
}
let Predicates = [HasAVX, NoVLX_Or_NoVPCLMULQDQ, HasPCLMUL] in
@@ -7321,28 +7340,6 @@ multiclass vpclmulqdq_aliases<string Ins
defm : vpclmulqdq_aliases<"VPCLMULQDQ", VR128, i128mem>;
defm : vpclmulqdq_aliases<"VPCLMULQDQY", VR256, i256mem>;
-// Immediate transform to help with commuting.
-def PCLMULCommuteImm : SDNodeXForm<imm, [{
- uint8_t Imm = N->getZExtValue();
- return getI8Imm((uint8_t)((Imm >> 4) | (Imm << 4)), SDLoc(N));
-}]>;
-
-// We can commute a load in the first operand by swapping the sources and
-// rotating the immediate.
-let Predicates = [HasAVX, HasPCLMUL] in {
- def : Pat<(int_x86_pclmulqdq (loadv2i64 addr:$src2), VR128:$src1,
- (i8 imm:$src3)),
- (VPCLMULQDQrm VR128:$src1, addr:$src2,
- (PCLMULCommuteImm imm:$src3))>;
-}
-
-let Predicates = [NoAVX, HasPCLMUL] in {
- def : Pat<(int_x86_pclmulqdq (loadv2i64 addr:$src2), VR128:$src1,
- (i8 imm:$src3)),
- (PCLMULQDQrm VR128:$src1, addr:$src2,
- (PCLMULCommuteImm imm:$src3))>;
-}
-
//===----------------------------------------------------------------------===//
// SSE4A Instructions
//===----------------------------------------------------------------------===//
Modified: llvm/trunk/test/CodeGen/X86/commute-clmul.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/commute-clmul.ll?rev=318800&r1=318799&r2=318800&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/commute-clmul.ll (original)
+++ llvm/trunk/test/CodeGen/X86/commute-clmul.ll Tue Nov 21 13:05:21 2017
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2,+pclmul | FileCheck %s --check-prefix=SSE
-; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2,+pclmul | FileCheck %s --check-prefix=AVX
+; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown -mattr=+sse2,+pclmul | FileCheck %s --check-prefix=SSE
+; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown -mattr=+avx2,+pclmul | FileCheck %s --check-prefix=AVX
+; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown -mattr=+avx512vl,+vpclmulqdq | FileCheck %s --check-prefix=AVX
declare <2 x i64> @llvm.x86.pclmulqdq(<2 x i64>, <2 x i64>, i8) nounwind readnone
More information about the llvm-commits
mailing list