[llvm] r313455 - [X86] Add isel patterns to be able to fold loads into VPERM2F128 even when the load is on the first input to the SDNode.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Sat Sep 16 02:16:48 PDT 2017


Author: ctopper
Date: Sat Sep 16 02:16:48 2017
New Revision: 313455

URL: http://llvm.org/viewvc/llvm-project?rev=313455&view=rev
Log:
[X86] Add isel patterns to be able to fold loads into VPERM2F128 even when the load is on the first input to the SDNode.

We just need to toggle bits 1 and 5 of the immediate and swap the sources. The peephole pass could trigger commuting/folding for this later, but its easy enough to fix in isel.

Disable the peephole pass on the main vperm2x128 test so we know we're doing this through isel.

Modified:
    llvm/trunk/lib/Target/X86/X86InstrSSE.td
    llvm/trunk/test/CodeGen/X86/avx-vperm2x128.ll

Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=313455&r1=313454&r2=313455&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Sat Sep 16 02:16:48 2017
@@ -7670,12 +7670,28 @@ def VPERM2F128rm : AVXAIi8<0x06, MRMSrcM
           Sched<[WriteFShuffleLd, ReadAfterLd]>;
 }
 
+// Immediate transform to help with commuting.
+def Perm2XCommuteImm : SDNodeXForm<imm, [{
+  return getI8Imm(N->getZExtValue() ^ 0x22, SDLoc(N));
+}]>;
+
+let Predicates = [HasAVX] in {
+// Pattern with load in other operand.
+def : Pat<(v4f64 (X86VPerm2x128 (loadv4f64 addr:$src2),
+                                VR256:$src1, (i8 imm:$imm))),
+          (VPERM2F128rm VR256:$src1, addr:$src2, (Perm2XCommuteImm imm:$imm))>;
+}
+
 let Predicates = [HasAVX1Only] in {
 def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
           (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
 def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1,
                   (loadv4i64 addr:$src2), (i8 imm:$imm))),
           (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
+// Pattern with load in other operand.
+def : Pat<(v4i64 (X86VPerm2x128 (loadv4i64 addr:$src2),
+                                VR256:$src1, (i8 imm:$imm))),
+          (VPERM2F128rm VR256:$src1, addr:$src2, (Perm2XCommuteImm imm:$imm))>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -8083,6 +8099,11 @@ def VPERM2I128rm : AVX2AIi8<0x46, MRMSrc
                              (i8 imm:$src3)))]>,
           Sched<[WriteShuffle256Ld, ReadAfterLd]>, VEX_4V, VEX_L;
 
+let Predicates = [HasAVX2] in
+def : Pat<(v4i64 (X86VPerm2x128 (loadv4i64 addr:$src2),
+                                VR256:$src1, (i8 imm:$imm))),
+          (VPERM2I128rm VR256:$src1, addr:$src2, (Perm2XCommuteImm imm:$imm))>;
+
 
 //===----------------------------------------------------------------------===//
 // VINSERTI128 - Insert packed integer values

Modified: llvm/trunk/test/CodeGen/X86/avx-vperm2x128.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-vperm2x128.ll?rev=313455&r1=313454&r2=313455&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-vperm2x128.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx-vperm2x128.ll Sat Sep 16 02:16:48 2017
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX1
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -disable-peephole | FileCheck %s --check-prefix=ALL --check-prefix=AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 -disable-peephole | FileCheck %s --check-prefix=ALL --check-prefix=AVX2
 
 define <8 x float> @shuffle_v8f32_45670123(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
 ; ALL-LABEL: shuffle_v8f32_45670123:




More information about the llvm-commits mailing list