[llvm] r357096 - [X86] Add post-isel pseudos for rotate by immediate using SHLD/SHRD

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Wed Mar 27 10:29:34 PDT 2019


Author: ctopper
Date: Wed Mar 27 10:29:34 2019
New Revision: 357096

URL: http://llvm.org/viewvc/llvm-project?rev=357096&view=rev
Log:
[X86] Add post-isel pseudos for rotate by immediate using SHLD/SHRD

Haswell CPUs have special support for SHLD/SHRD with the same register for both sources. Such an instruction will go to the rotate/shift unit on port 0 or 6. This gives it 1 cycle latency and 0.5 cycle reciprocal throughput. When the register is not the same, it becomes a 3 cycle operation on port 1. Sandybridge and Ivybridge always have 1 cyc latency and 0.5 cycle reciprocal throughput for any SHLD.

When FastSHLDRotate feature flag is set, we try to use SHLD for rotate by immediate unless BMI2 is enabled. But MachineCopyPropagation can look through a copy and change one of the sources to be different. This will break the hardware optimization.

This patch adds psuedo instruction to hide the second source input until after register allocation and MachineCopyPropagation. I'm not sure if this is the best way to do this or if there's some other way we can make this work.

Fixes PR41055

Differential Revision: https://reviews.llvm.org/D59391

Modified:
    llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
    llvm/trunk/lib/Target/X86/X86InstrShiftRotate.td
    llvm/trunk/test/CodeGen/X86/rot32.ll
    llvm/trunk/test/CodeGen/X86/rot64.ll

Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=357096&r1=357095&r2=357096&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Wed Mar 27 10:29:34 2019
@@ -4083,6 +4083,20 @@ static bool expandNOVLXStore(MachineInst
 
   return true;
 }
+
+static bool expandSHXDROT(MachineInstrBuilder &MIB, const MCInstrDesc &Desc) {
+  MIB->setDesc(Desc);
+  int64_t ShiftAmt = MIB->getOperand(2).getImm();
+  // Temporarily remove the immediate so we can add another source register.
+  MIB->RemoveOperand(2);
+  // Add the register. Don't copy the kill flag if there is one.
+  MIB.addReg(MIB->getOperand(1).getReg(),
+             getUndefRegState(MIB->getOperand(1).isUndef()));
+  // Add back the immediate.
+  MIB.addImm(ShiftAmt);
+  return true;
+}
+
 bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
   bool HasAVX = Subtarget.hasAVX();
   MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI);
@@ -4237,6 +4251,10 @@ bool X86InstrInfo::expandPostRAPseudo(Ma
   case X86::XOR64_FP:
   case X86::XOR32_FP:
     return expandXorFP(MIB, *this);
+  case X86::SHLDROT32ri: return expandSHXDROT(MIB, get(X86::SHLD32rri8));
+  case X86::SHLDROT64ri: return expandSHXDROT(MIB, get(X86::SHLD64rri8));
+  case X86::SHRDROT32ri: return expandSHXDROT(MIB, get(X86::SHRD32rri8));
+  case X86::SHRDROT64ri: return expandSHXDROT(MIB, get(X86::SHRD64rri8));
   case X86::ADD8rr_DB:    MIB->setDesc(get(X86::OR8rr));    break;
   case X86::ADD16rr_DB:   MIB->setDesc(get(X86::OR16rr));   break;
   case X86::ADD32rr_DB:   MIB->setDesc(get(X86::OR32rr));   break;

Modified: llvm/trunk/lib/Target/X86/X86InstrShiftRotate.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrShiftRotate.td?rev=357096&r1=357095&r2=357096&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrShiftRotate.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrShiftRotate.td Wed Mar 27 10:29:34 2019
@@ -838,16 +838,24 @@ def : Pat<(store (rotr (loadi64 addr:$ds
 
 // Sandy Bridge and newer Intel processors support faster rotates using
 // SHLD to avoid a partial flag update on the normal rotate instructions.
-let Predicates = [HasFastSHLDRotate], AddedComplexity = 5 in {
-  def : Pat<(rotl GR32:$src, (i8 imm:$shamt)),
-            (SHLD32rri8 GR32:$src, GR32:$src, imm:$shamt)>;
-  def : Pat<(rotl GR64:$src, (i8 imm:$shamt)),
-            (SHLD64rri8 GR64:$src, GR64:$src, imm:$shamt)>;
+// Use a pseudo so that TwoInstructionPass and register allocation will see
+// this as unary instruction.
+let Predicates = [HasFastSHLDRotate], AddedComplexity = 5,
+    Defs = [EFLAGS], isPseudo = 1, SchedRW = [WriteSHDrri],
+    Constraints = "$src1 = $dst" in {
+  def SHLDROT32ri  : I<0, Pseudo, (outs GR32:$dst),
+                       (ins GR32:$src1, u8imm:$shamt), "",
+                     [(set GR32:$dst, (rotl GR32:$src1, (i8 imm:$shamt)))]>;
+  def SHLDROT64ri  : I<0, Pseudo, (outs GR64:$dst),
+                       (ins GR64:$src1, u8imm:$shamt), "",
+                     [(set GR64:$dst, (rotl GR64:$src1, (i8 imm:$shamt)))]>;
 
-  def : Pat<(rotr GR32:$src, (i8 imm:$shamt)),
-            (SHRD32rri8 GR32:$src, GR32:$src, imm:$shamt)>;
-  def : Pat<(rotr GR64:$src, (i8 imm:$shamt)),
-            (SHRD64rri8 GR64:$src, GR64:$src, imm:$shamt)>;
+  def SHRDROT32ri  : I<0, Pseudo, (outs GR32:$dst),
+                       (ins GR32:$src1, u8imm:$shamt), "",
+                     [(set GR32:$dst, (rotr GR32:$src1, (i8 imm:$shamt)))]>;
+  def SHRDROT64ri  : I<0, Pseudo, (outs GR64:$dst),
+                       (ins GR64:$src1, u8imm:$shamt), "",
+                     [(set GR64:$dst, (rotr GR64:$src1, (i8 imm:$shamt)))]>;
 }
 
 def ROT32L2R_imm8  : SDNodeXForm<imm, [{

Modified: llvm/trunk/test/CodeGen/X86/rot32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/rot32.ll?rev=357096&r1=357095&r2=357096&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/rot32.ll (original)
+++ llvm/trunk/test/CodeGen/X86/rot32.ll Wed Mar 27 10:29:34 2019
@@ -127,7 +127,7 @@ define i32 @xfoo(i32 %x, i32 %y, i32 %z)
 ; SHLD64-LABEL: xfoo:
 ; SHLD64:       # %bb.0: # %entry
 ; SHLD64-NEXT:    movl %edi, %eax
-; SHLD64-NEXT:    shldl $7, %edi, %eax
+; SHLD64-NEXT:    shldl $7, %eax, %eax
 ; SHLD64-NEXT:    retq
 ;
 ; BMI264-LABEL: xfoo:
@@ -233,7 +233,7 @@ define i32 @xun(i32 %x, i32 %y, i32 %z)
 ; SHLD64-LABEL: xun:
 ; SHLD64:       # %bb.0: # %entry
 ; SHLD64-NEXT:    movl %edi, %eax
-; SHLD64-NEXT:    shldl $25, %edi, %eax
+; SHLD64-NEXT:    shldl $25, %eax, %eax
 ; SHLD64-NEXT:    retq
 ;
 ; BMI264-LABEL: xun:
@@ -341,7 +341,7 @@ define i32 @fshl(i32 %x) nounwind {
 ; SHLD64-LABEL: fshl:
 ; SHLD64:       # %bb.0:
 ; SHLD64-NEXT:    movl %edi, %eax
-; SHLD64-NEXT:    shldl $7, %edi, %eax
+; SHLD64-NEXT:    shldl $7, %eax, %eax
 ; SHLD64-NEXT:    retq
 ;
 ; BMI264-LABEL: fshl:
@@ -380,7 +380,7 @@ define i32 @fshl1(i32 %x) nounwind {
 ; SHLD64-LABEL: fshl1:
 ; SHLD64:       # %bb.0:
 ; SHLD64-NEXT:    movl %edi, %eax
-; SHLD64-NEXT:    shldl $1, %edi, %eax
+; SHLD64-NEXT:    shldl $1, %eax, %eax
 ; SHLD64-NEXT:    retq
 ;
 ; BMI264-LABEL: fshl1:
@@ -418,7 +418,7 @@ define i32 @fshl31(i32 %x) nounwind {
 ; SHLD64-LABEL: fshl31:
 ; SHLD64:       # %bb.0:
 ; SHLD64-NEXT:    movl %edi, %eax
-; SHLD64-NEXT:    shldl $31, %edi, %eax
+; SHLD64-NEXT:    shldl $31, %eax, %eax
 ; SHLD64-NEXT:    retq
 ;
 ; BMI264-LABEL: fshl31:
@@ -498,7 +498,7 @@ define i32 @fshr(i32 %x) nounwind {
 ; SHLD64-LABEL: fshr:
 ; SHLD64:       # %bb.0:
 ; SHLD64-NEXT:    movl %edi, %eax
-; SHLD64-NEXT:    shrdl $7, %edi, %eax
+; SHLD64-NEXT:    shrdl $7, %eax, %eax
 ; SHLD64-NEXT:    retq
 ;
 ; BMI264-LABEL: fshr:
@@ -537,7 +537,7 @@ define i32 @fshr1(i32 %x) nounwind {
 ; SHLD64-LABEL: fshr1:
 ; SHLD64:       # %bb.0:
 ; SHLD64-NEXT:    movl %edi, %eax
-; SHLD64-NEXT:    shrdl $1, %edi, %eax
+; SHLD64-NEXT:    shrdl $1, %eax, %eax
 ; SHLD64-NEXT:    retq
 ;
 ; BMI264-LABEL: fshr1:
@@ -575,7 +575,7 @@ define i32 @fshr31(i32 %x) nounwind {
 ; SHLD64-LABEL: fshr31:
 ; SHLD64:       # %bb.0:
 ; SHLD64-NEXT:    movl %edi, %eax
-; SHLD64-NEXT:    shrdl $31, %edi, %eax
+; SHLD64-NEXT:    shrdl $31, %eax, %eax
 ; SHLD64-NEXT:    retq
 ;
 ; BMI264-LABEL: fshr31:

Modified: llvm/trunk/test/CodeGen/X86/rot64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/rot64.ll?rev=357096&r1=357095&r2=357096&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/rot64.ll (original)
+++ llvm/trunk/test/CodeGen/X86/rot64.ll Wed Mar 27 10:29:34 2019
@@ -77,7 +77,7 @@ define i64 @xfoo(i64 %x, i64 %y, i64 %z)
 ; SHLD-LABEL: xfoo:
 ; SHLD:       # %bb.0: # %entry
 ; SHLD-NEXT:    movq %rdi, %rax
-; SHLD-NEXT:    shldq $7, %rdi, %rax
+; SHLD-NEXT:    shldq $7, %rax, %rax
 ; SHLD-NEXT:    retq
 ;
 ; BMI2-LABEL: xfoo:
@@ -139,7 +139,7 @@ define i64 @xun(i64 %x, i64 %y, i64 %z)
 ; SHLD-LABEL: xun:
 ; SHLD:       # %bb.0: # %entry
 ; SHLD-NEXT:    movq %rdi, %rax
-; SHLD-NEXT:    shldq $57, %rdi, %rax
+; SHLD-NEXT:    shldq $57, %rax, %rax
 ; SHLD-NEXT:    retq
 ;
 ; BMI2-LABEL: xun:
@@ -201,7 +201,7 @@ define i64 @fshl(i64 %x) nounwind {
 ; SHLD-LABEL: fshl:
 ; SHLD:       # %bb.0:
 ; SHLD-NEXT:    movq %rdi, %rax
-; SHLD-NEXT:    shldq $7, %rdi, %rax
+; SHLD-NEXT:    shldq $7, %rax, %rax
 ; SHLD-NEXT:    retq
 ;
 ; BMI2-LABEL: fshl:
@@ -223,7 +223,7 @@ define i64 @fshl1(i64 %x) nounwind {
 ; SHLD-LABEL: fshl1:
 ; SHLD:       # %bb.0:
 ; SHLD-NEXT:    movq %rdi, %rax
-; SHLD-NEXT:    shldq $1, %rdi, %rax
+; SHLD-NEXT:    shldq $1, %rax, %rax
 ; SHLD-NEXT:    retq
 ;
 ; BMI2-LABEL: fshl1:
@@ -244,7 +244,7 @@ define i64 @fshl63(i64 %x) nounwind {
 ; SHLD-LABEL: fshl63:
 ; SHLD:       # %bb.0:
 ; SHLD-NEXT:    movq %rdi, %rax
-; SHLD-NEXT:    shldq $63, %rdi, %rax
+; SHLD-NEXT:    shldq $63, %rax, %rax
 ; SHLD-NEXT:    retq
 ;
 ; BMI2-LABEL: fshl63:
@@ -287,7 +287,7 @@ define i64 @fshr(i64 %x) nounwind {
 ; SHLD-LABEL: fshr:
 ; SHLD:       # %bb.0:
 ; SHLD-NEXT:    movq %rdi, %rax
-; SHLD-NEXT:    shrdq $7, %rdi, %rax
+; SHLD-NEXT:    shrdq $7, %rax, %rax
 ; SHLD-NEXT:    retq
 ;
 ; BMI2-LABEL: fshr:
@@ -309,7 +309,7 @@ define i64 @fshr1(i64 %x) nounwind {
 ; SHLD-LABEL: fshr1:
 ; SHLD:       # %bb.0:
 ; SHLD-NEXT:    movq %rdi, %rax
-; SHLD-NEXT:    shrdq $1, %rdi, %rax
+; SHLD-NEXT:    shrdq $1, %rax, %rax
 ; SHLD-NEXT:    retq
 ;
 ; BMI2-LABEL: fshr1:
@@ -330,7 +330,7 @@ define i64 @fshr63(i64 %x) nounwind {
 ; SHLD-LABEL: fshr63:
 ; SHLD:       # %bb.0:
 ; SHLD-NEXT:    movq %rdi, %rax
-; SHLD-NEXT:    shrdq $63, %rdi, %rax
+; SHLD-NEXT:    shrdq $63, %rax, %rax
 ; SHLD-NEXT:    retq
 ;
 ; BMI2-LABEL: fshr63:




More information about the llvm-commits mailing list