[llvm] r318800 - [X86] Allow vpclmulqdq instructions to be commuted during isel to allow load folding.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Tue Nov 21 13:05:21 PST 2017


Author: ctopper
Date: Tue Nov 21 13:05:21 2017
New Revision: 318800

URL: http://llvm.org/viewvc/llvm-project?rev=318800&view=rev
Log:
[X86] Allow vpclmulqdq instructions to be commuted during isel to allow load folding.

The commuting patterns for the AVX version actually still had priority over the new patterns.

Modified:
    llvm/trunk/lib/Target/X86/X86InstrSSE.td
    llvm/trunk/test/CodeGen/X86/commute-clmul.ll

Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=318800&r1=318799&r2=318800&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Tue Nov 21 13:05:21 2017
@@ -7242,24 +7242,37 @@ def AESKEYGENASSIST128rm : AESAI<0xDF, M
 // PCLMUL Instructions
 //===----------------------------------------------------------------------===//
 
+// Immediate transform to help with commuting.
+def PCLMULCommuteImm : SDNodeXForm<imm, [{
+  uint8_t Imm = N->getZExtValue();
+  return getI8Imm((uint8_t)((Imm >> 4) | (Imm << 4)), SDLoc(N));
+}]>;
+
 // SSE carry-less Multiplication instructions
-let Constraints = "$src1 = $dst", Predicates = [NoAVX, HasPCLMUL] in {
-  let isCommutable = 1 in
-  def PCLMULQDQrr : PCLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst),
-            (ins VR128:$src1, VR128:$src2, u8imm:$src3),
-            "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}",
-            [(set VR128:$dst,
-              (int_x86_pclmulqdq VR128:$src1, VR128:$src2, imm:$src3))],
-            IIC_SSE_PCLMULQDQ_RR>, Sched<[WriteCLMul]>;
-
-  def PCLMULQDQrm : PCLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst),
-            (ins VR128:$src1, i128mem:$src2, u8imm:$src3),
-            "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}",
-            [(set VR128:$dst,
-               (int_x86_pclmulqdq VR128:$src1, (memopv2i64 addr:$src2),
-                imm:$src3))],
-            IIC_SSE_PCLMULQDQ_RR>, Sched<[WriteCLMulLd, ReadAfterLd]>;
-}
+let Predicates = [NoAVX, HasPCLMUL] in {
+  let Constraints = "$src1 = $dst" in {
+    let isCommutable = 1 in
+    def PCLMULQDQrr : PCLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst),
+              (ins VR128:$src1, VR128:$src2, u8imm:$src3),
+              "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+              [(set VR128:$dst,
+                (int_x86_pclmulqdq VR128:$src1, VR128:$src2, imm:$src3))],
+              IIC_SSE_PCLMULQDQ_RR>, Sched<[WriteCLMul]>;
+
+    def PCLMULQDQrm : PCLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst),
+              (ins VR128:$src1, i128mem:$src2, u8imm:$src3),
+              "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+              [(set VR128:$dst,
+                 (int_x86_pclmulqdq VR128:$src1, (memopv2i64 addr:$src2),
+                  imm:$src3))],
+              IIC_SSE_PCLMULQDQ_RR>, Sched<[WriteCLMulLd, ReadAfterLd]>;
+  } // Constraints = "$src1 = $dst"
+
+  def : Pat<(int_x86_pclmulqdq (memopv2i64 addr:$src2), VR128:$src1,
+                                (i8 imm:$src3)),
+            (PCLMULQDQrm VR128:$src1, addr:$src2,
+                          (PCLMULCommuteImm imm:$src3))>;
+} // Predicates = [NoAVX, HasPCLMUL]
 
 // SSE aliases
 foreach HI = ["hq","lq"] in
@@ -7289,6 +7302,12 @@ multiclass vpclmulqdq<RegisterClass RC,
             [(set RC:$dst,
                (IntId RC:$src1, (LdFrag addr:$src2), imm:$src3))]>,
             Sched<[WriteCLMulLd, ReadAfterLd]>;
+
+  // We can commute a load in the first operand by swapping the sources and
+  // rotating the immediate.
+  def : Pat<(IntId (LdFrag addr:$src2), RC:$src1, (i8 imm:$src3)),
+            (!cast<Instruction>(NAME#"rm") RC:$src1, addr:$src2,
+                                           (PCLMULCommuteImm imm:$src3))>;
 }
 
 let Predicates = [HasAVX, NoVLX_Or_NoVPCLMULQDQ, HasPCLMUL] in
@@ -7321,28 +7340,6 @@ multiclass vpclmulqdq_aliases<string Ins
 defm : vpclmulqdq_aliases<"VPCLMULQDQ", VR128, i128mem>;
 defm : vpclmulqdq_aliases<"VPCLMULQDQY", VR256, i256mem>;
 
-// Immediate transform to help with commuting.
-def PCLMULCommuteImm : SDNodeXForm<imm, [{
-  uint8_t Imm = N->getZExtValue();
-  return getI8Imm((uint8_t)((Imm >> 4) | (Imm << 4)), SDLoc(N));
-}]>;
-
-// We can commute a load in the first operand by swapping the sources and
-// rotating the immediate.
-let Predicates = [HasAVX, HasPCLMUL] in {
-  def : Pat<(int_x86_pclmulqdq (loadv2i64 addr:$src2), VR128:$src1,
-                                (i8 imm:$src3)),
-            (VPCLMULQDQrm VR128:$src1, addr:$src2,
-                          (PCLMULCommuteImm imm:$src3))>;
-}
-
-let Predicates = [NoAVX, HasPCLMUL] in {
-  def : Pat<(int_x86_pclmulqdq (loadv2i64 addr:$src2), VR128:$src1,
-                                (i8 imm:$src3)),
-            (PCLMULQDQrm VR128:$src1, addr:$src2,
-                          (PCLMULCommuteImm imm:$src3))>;
-}
-
 //===----------------------------------------------------------------------===//
 // SSE4A Instructions
 //===----------------------------------------------------------------------===//

Modified: llvm/trunk/test/CodeGen/X86/commute-clmul.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/commute-clmul.ll?rev=318800&r1=318799&r2=318800&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/commute-clmul.ll (original)
+++ llvm/trunk/test/CodeGen/X86/commute-clmul.ll Tue Nov 21 13:05:21 2017
@@ -1,6 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2,+pclmul | FileCheck %s --check-prefix=SSE
-; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2,+pclmul | FileCheck %s --check-prefix=AVX
+; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown -mattr=+sse2,+pclmul | FileCheck %s --check-prefix=SSE
+; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown -mattr=+avx2,+pclmul | FileCheck %s --check-prefix=AVX
+; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown -mattr=+avx512vl,+vpclmulqdq | FileCheck %s --check-prefix=AVX
 
 declare <2 x i64> @llvm.x86.pclmulqdq(<2 x i64>, <2 x i64>, i8) nounwind readnone
 




More information about the llvm-commits mailing list