[llvm] r356121 - [X86] Add patterns for rotr by immediate to fix PR41057.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Thu Mar 14 00:07:27 PDT 2019


Author: ctopper
Date: Thu Mar 14 00:07:26 2019
New Revision: 356121

URL: http://llvm.org/viewvc/llvm-project?rev=356121&view=rev
Log:
[X86] Add patterns for rotr by immediate to fix PR41057.

Prior to the introduction of funnel shift intrinsics we could count on rotate
by immediates prefering to use rotl since that's what MatchRotate would check
first. The or+shift pattern doesn't have a direction so one must be chosen
arbitrarily.

With funnel shift, there is a direction and fshr will try to use rotr first.
While fshl will try to use rotl first.

This patch adds the isel patterns for rotr to complement the rotl patterns. I've
put the rotr by 1 patterns in the instruction patterns. And moved the rotl by
bitwidth-1 patterns to separate Pat patterns.

Fixes PR41057.

Modified:
    llvm/trunk/lib/Target/X86/X86InstrShiftRotate.td
    llvm/trunk/test/CodeGen/X86/funnel-shift-rot.ll
    llvm/trunk/test/CodeGen/X86/rot32.ll
    llvm/trunk/test/CodeGen/X86/rot64.ll

Modified: llvm/trunk/lib/Target/X86/X86InstrShiftRotate.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrShiftRotate.td?rev=356121&r1=356120&r2=356121&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrShiftRotate.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrShiftRotate.td Thu Mar 14 00:07:26 2019
@@ -585,16 +585,16 @@ def ROR64ri  : RIi8<0xC1, MRM1r, (outs G
 // Rotate by 1
 def ROR8r1   : I<0xD0, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1),
                  "ror{b}\t$dst",
-                 [(set GR8:$dst, (rotl GR8:$src1, (i8 7)))]>;
+                 [(set GR8:$dst, (rotl GR8:$src1, (i8 1)))]>;
 def ROR16r1  : I<0xD1, MRM1r, (outs GR16:$dst), (ins GR16:$src1),
                  "ror{w}\t$dst",
-                 [(set GR16:$dst, (rotl GR16:$src1, (i8 15)))]>, OpSize16;
+                 [(set GR16:$dst, (rotl GR16:$src1, (i8 1)))]>, OpSize16;
 def ROR32r1  : I<0xD1, MRM1r, (outs GR32:$dst), (ins GR32:$src1),
                  "ror{l}\t$dst",
-                 [(set GR32:$dst, (rotl GR32:$src1, (i8 31)))]>, OpSize32;
+                 [(set GR32:$dst, (rotl GR32:$src1, (i8 1)))]>, OpSize32;
 def ROR64r1  : RI<0xD1, MRM1r, (outs GR64:$dst), (ins GR64:$src1),
                   "ror{q}\t$dst",
-                  [(set GR64:$dst, (rotl GR64:$src1, (i8 63)))]>;
+                  [(set GR64:$dst, (rotl GR64:$src1, (i8 1)))]>;
 } // Constraints = "$src = $dst", SchedRW
 
 let Uses = [CL], SchedRW = [WriteRotateCLLd, WriteRMW] in {
@@ -633,18 +633,18 @@ def ROR64mi  : RIi8<0xC1, MRM1m, (outs),
 // Rotate by 1
 def ROR8m1   : I<0xD0, MRM1m, (outs), (ins i8mem :$dst),
                  "ror{b}\t$dst",
-                 [(store (rotl (loadi8 addr:$dst), (i8 7)), addr:$dst)]>;
+                 [(store (rotl (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
 def ROR16m1  : I<0xD1, MRM1m, (outs), (ins i16mem:$dst),
                  "ror{w}\t$dst",
-                 [(store (rotl (loadi16 addr:$dst), (i8 15)), addr:$dst)]>,
+                 [(store (rotl (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,
                  OpSize16;
 def ROR32m1  : I<0xD1, MRM1m, (outs), (ins i32mem:$dst),
                  "ror{l}\t$dst",
-                 [(store (rotl (loadi32 addr:$dst), (i8 31)), addr:$dst)]>,
+                 [(store (rotl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>,
                  OpSize32;
 def ROR64m1  : RI<0xD1, MRM1m, (outs), (ins i64mem:$dst),
                  "ror{q}\t$dst",
-                 [(store (rotl (loadi64 addr:$dst), (i8 63)), addr:$dst)]>,
+                 [(store (rotl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>,
                  Requires<[In64BitMode]>;
 } // SchedRW
 
@@ -806,6 +806,34 @@ def SHRD64mri8 : RIi8<0xAC, MRMDestMem,
 
 } // Defs = [EFLAGS]
 
+// Use the opposite rotate if allows us to use the rotate by 1 instruction.
+def : Pat<(rotl GR8:$src1,  (i8 7)),  (ROR8r1  GR8:$src1)>;
+def : Pat<(rotl GR16:$src1, (i8 15)), (ROR16r1 GR16:$src1)>;
+def : Pat<(rotl GR32:$src1, (i8 31)), (ROR32r1 GR32:$src1)>;
+def : Pat<(rotl GR64:$src1, (i8 63)), (ROR64r1 GR64:$src1)>;
+def : Pat<(rotr GR8:$src1,  (i8 7)),  (ROL8r1  GR8:$src1)>;
+def : Pat<(rotr GR16:$src1, (i8 15)), (ROL16r1 GR16:$src1)>;
+def : Pat<(rotr GR32:$src1, (i8 31)), (ROL32r1 GR32:$src1)>;
+def : Pat<(rotr GR64:$src1, (i8 63)), (ROL64r1 GR64:$src1)>;
+
+def : Pat<(store (rotl (loadi8 addr:$dst), (i8 7)), addr:$dst),
+          (ROR8m1 addr:$dst)>;
+def : Pat<(store (rotl (loadi16 addr:$dst), (i8 15)), addr:$dst),
+          (ROR16m1 addr:$dst)>;
+def : Pat<(store (rotl (loadi32 addr:$dst), (i8 31)), addr:$dst),
+          (ROR32m1 addr:$dst)>;
+def : Pat<(store (rotl (loadi64 addr:$dst), (i8 63)), addr:$dst),
+          (ROR64m1 addr:$dst)>, Requires<[In64BitMode]>;
+
+def : Pat<(store (rotr (loadi8 addr:$dst), (i8 7)), addr:$dst),
+          (ROL8m1 addr:$dst)>;
+def : Pat<(store (rotr (loadi16 addr:$dst), (i8 15)), addr:$dst),
+          (ROL16m1 addr:$dst)>;
+def : Pat<(store (rotr (loadi32 addr:$dst), (i8 31)), addr:$dst),
+          (ROL32m1 addr:$dst)>;
+def : Pat<(store (rotr (loadi64 addr:$dst), (i8 63)), addr:$dst),
+          (ROL64m1 addr:$dst)>, Requires<[In64BitMode]>;
+
 // Sandy Bridge and newer Intel processors support faster rotates using
 // SHLD to avoid a partial flag update on the normal rotate instructions.
 let Predicates = [HasFastSHLDRotate], AddedComplexity = 5 in {
@@ -813,6 +841,11 @@ let Predicates = [HasFastSHLDRotate], Ad
             (SHLD32rri8 GR32:$src, GR32:$src, imm:$shamt)>;
   def : Pat<(rotl GR64:$src, (i8 imm:$shamt)),
             (SHLD64rri8 GR64:$src, GR64:$src, imm:$shamt)>;
+
+  def : Pat<(rotr GR32:$src, (i8 imm:$shamt)),
+            (SHRD32rri8 GR32:$src, GR32:$src, imm:$shamt)>;
+  def : Pat<(rotr GR64:$src, (i8 imm:$shamt)),
+            (SHRD64rri8 GR64:$src, GR64:$src, imm:$shamt)>;
 }
 
 def ROT32L2R_imm8  : SDNodeXForm<imm, [{
@@ -870,19 +903,29 @@ let Predicates = [HasBMI2] in {
 
   // Prefer RORX which is non-destructive and doesn't update EFLAGS.
   let AddedComplexity = 10 in {
+    def : Pat<(rotr GR32:$src, (i8 imm:$shamt)),
+              (RORX32ri GR32:$src, imm:$shamt)>;
+    def : Pat<(rotr GR64:$src, (i8 imm:$shamt)),
+              (RORX64ri GR64:$src, imm:$shamt)>;
+
     def : Pat<(rotl GR32:$src, (i8 imm:$shamt)),
               (RORX32ri GR32:$src, (ROT32L2R_imm8 imm:$shamt))>;
     def : Pat<(rotl GR64:$src, (i8 imm:$shamt)),
               (RORX64ri GR64:$src, (ROT64L2R_imm8 imm:$shamt))>;
   }
 
+  def : Pat<(rotr (loadi32 addr:$src), (i8 imm:$shamt)),
+            (RORX32mi addr:$src, imm:$shamt)>;
+  def : Pat<(rotr (loadi64 addr:$src), (i8 imm:$shamt)),
+            (RORX64mi addr:$src, imm:$shamt)>;
+
   def : Pat<(rotl (loadi32 addr:$src), (i8 imm:$shamt)),
             (RORX32mi addr:$src, (ROT32L2R_imm8 imm:$shamt))>;
   def : Pat<(rotl (loadi64 addr:$src), (i8 imm:$shamt)),
             (RORX64mi addr:$src, (ROT64L2R_imm8 imm:$shamt))>;
 
   // Prefer SARX/SHRX/SHLX over SAR/SHR/SHL with variable shift BUT not
-  // immedidate shift, i.e. the following code is considered better
+  // immediate shift, i.e. the following code is considered better
   //
   //  mov %edi, %esi
   //  shl $imm, %esi

Modified: llvm/trunk/test/CodeGen/X86/funnel-shift-rot.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/funnel-shift-rot.ll?rev=356121&r1=356120&r2=356121&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/funnel-shift-rot.ll (original)
+++ llvm/trunk/test/CodeGen/X86/funnel-shift-rot.ll Thu Mar 14 00:07:26 2019
@@ -222,13 +222,13 @@ define i8 @rotr_i8_const_shift7(i8 %x) n
 ; X32-SSE2-LABEL: rotr_i8_const_shift7:
 ; X32-SSE2:       # %bb.0:
 ; X32-SSE2-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X32-SSE2-NEXT:    rorb $7, %al
+; X32-SSE2-NEXT:    rolb %al
 ; X32-SSE2-NEXT:    retl
 ;
 ; X64-AVX2-LABEL: rotr_i8_const_shift7:
 ; X64-AVX2:       # %bb.0:
 ; X64-AVX2-NEXT:    movl %edi, %eax
-; X64-AVX2-NEXT:    rorb $7, %al
+; X64-AVX2-NEXT:    rolb %al
 ; X64-AVX2-NEXT:    # kill: def $al killed $al killed $eax
 ; X64-AVX2-NEXT:    retq
   %f = call i8 @llvm.fshr.i8(i8 %x, i8 %x, i8 7)

Modified: llvm/trunk/test/CodeGen/X86/rot32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/rot32.ll?rev=356121&r1=356120&r2=356121&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/rot32.ll (original)
+++ llvm/trunk/test/CodeGen/X86/rot32.ll Thu Mar 14 00:07:26 2019
@@ -472,67 +472,157 @@ define i32 @fshl_load(i32* %p) nounwind
 }
 
 define i32 @fshr(i32 %x) nounwind {
-; CHECK32-LABEL: fshr:
-; CHECK32:       # %bb.0:
-; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; CHECK32-NEXT:    rorl $7, %eax
-; CHECK32-NEXT:    retl
-;
-; CHECK64-LABEL: fshr:
-; CHECK64:       # %bb.0:
-; CHECK64-NEXT:    movl %edi, %eax
-; CHECK64-NEXT:    rorl $7, %eax
-; CHECK64-NEXT:    retq
+; X86-LABEL: fshr:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    rorl $7, %eax
+; X86-NEXT:    retl
+;
+; SHLD-LABEL: fshr:
+; SHLD:       # %bb.0:
+; SHLD-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; SHLD-NEXT:    shrdl $7, %eax, %eax
+; SHLD-NEXT:    retl
+;
+; BMI2-LABEL: fshr:
+; BMI2:       # %bb.0:
+; BMI2-NEXT:    rorxl $7, {{[0-9]+}}(%esp), %eax
+; BMI2-NEXT:    retl
+;
+; X64-LABEL: fshr:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    rorl $7, %eax
+; X64-NEXT:    retq
+;
+; SHLD64-LABEL: fshr:
+; SHLD64:       # %bb.0:
+; SHLD64-NEXT:    movl %edi, %eax
+; SHLD64-NEXT:    shrdl $7, %edi, %eax
+; SHLD64-NEXT:    retq
+;
+; BMI264-LABEL: fshr:
+; BMI264:       # %bb.0:
+; BMI264-NEXT:    rorxl $7, %edi, %eax
+; BMI264-NEXT:    retq
   %f = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 7)
   ret i32 %f
 }
 declare i32 @llvm.fshr.i32(i32, i32, i32)
 
 define i32 @fshr1(i32 %x) nounwind {
-; CHECK32-LABEL: fshr1:
-; CHECK32:       # %bb.0:
-; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; CHECK32-NEXT:    rorl $1, %eax
-; CHECK32-NEXT:    retl
-;
-; CHECK64-LABEL: fshr1:
-; CHECK64:       # %bb.0:
-; CHECK64-NEXT:    movl %edi, %eax
-; CHECK64-NEXT:    rorl $1, %eax
-; CHECK64-NEXT:    retq
+; X86-LABEL: fshr1:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    rorl $1, %eax
+; X86-NEXT:    retl
+;
+; SHLD-LABEL: fshr1:
+; SHLD:       # %bb.0:
+; SHLD-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; SHLD-NEXT:    shrdl $1, %eax, %eax
+; SHLD-NEXT:    retl
+;
+; BMI2-LABEL: fshr1:
+; BMI2:       # %bb.0:
+; BMI2-NEXT:    rorxl $1, {{[0-9]+}}(%esp), %eax
+; BMI2-NEXT:    retl
+;
+; X64-LABEL: fshr1:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    rorl $1, %eax
+; X64-NEXT:    retq
+;
+; SHLD64-LABEL: fshr1:
+; SHLD64:       # %bb.0:
+; SHLD64-NEXT:    movl %edi, %eax
+; SHLD64-NEXT:    shrdl $1, %edi, %eax
+; SHLD64-NEXT:    retq
+;
+; BMI264-LABEL: fshr1:
+; BMI264:       # %bb.0:
+; BMI264-NEXT:    rorxl $1, %edi, %eax
+; BMI264-NEXT:    retq
   %f = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 1)
   ret i32 %f
 }
 
 define i32 @fshr31(i32 %x) nounwind {
-; CHECK32-LABEL: fshr31:
-; CHECK32:       # %bb.0:
-; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; CHECK32-NEXT:    rorl $31, %eax
-; CHECK32-NEXT:    retl
-;
-; CHECK64-LABEL: fshr31:
-; CHECK64:       # %bb.0:
-; CHECK64-NEXT:    movl %edi, %eax
-; CHECK64-NEXT:    rorl $31, %eax
-; CHECK64-NEXT:    retq
+; X86-LABEL: fshr31:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    roll %eax
+; X86-NEXT:    retl
+;
+; SHLD-LABEL: fshr31:
+; SHLD:       # %bb.0:
+; SHLD-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; SHLD-NEXT:    shrdl $31, %eax, %eax
+; SHLD-NEXT:    retl
+;
+; BMI2-LABEL: fshr31:
+; BMI2:       # %bb.0:
+; BMI2-NEXT:    rorxl $31, {{[0-9]+}}(%esp), %eax
+; BMI2-NEXT:    retl
+;
+; X64-LABEL: fshr31:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    roll %eax
+; X64-NEXT:    retq
+;
+; SHLD64-LABEL: fshr31:
+; SHLD64:       # %bb.0:
+; SHLD64-NEXT:    movl %edi, %eax
+; SHLD64-NEXT:    shrdl $31, %edi, %eax
+; SHLD64-NEXT:    retq
+;
+; BMI264-LABEL: fshr31:
+; BMI264:       # %bb.0:
+; BMI264-NEXT:    rorxl $31, %edi, %eax
+; BMI264-NEXT:    retq
   %f = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 31)
   ret i32 %f
 }
 
 define i32 @fshr_load(i32* %p) nounwind {
-; CHECK32-LABEL: fshr_load:
-; CHECK32:       # %bb.0:
-; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; CHECK32-NEXT:    movl (%eax), %eax
-; CHECK32-NEXT:    rorl $7, %eax
-; CHECK32-NEXT:    retl
-;
-; CHECK64-LABEL: fshr_load:
-; CHECK64:       # %bb.0:
-; CHECK64-NEXT:    movl (%rdi), %eax
-; CHECK64-NEXT:    rorl $7, %eax
-; CHECK64-NEXT:    retq
+; X86-LABEL: fshr_load:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl (%eax), %eax
+; X86-NEXT:    rorl $7, %eax
+; X86-NEXT:    retl
+;
+; SHLD-LABEL: fshr_load:
+; SHLD:       # %bb.0:
+; SHLD-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; SHLD-NEXT:    movl (%eax), %eax
+; SHLD-NEXT:    shrdl $7, %eax, %eax
+; SHLD-NEXT:    retl
+;
+; BMI2-LABEL: fshr_load:
+; BMI2:       # %bb.0:
+; BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; BMI2-NEXT:    rorxl $7, (%eax), %eax
+; BMI2-NEXT:    retl
+;
+; X64-LABEL: fshr_load:
+; X64:       # %bb.0:
+; X64-NEXT:    movl (%rdi), %eax
+; X64-NEXT:    rorl $7, %eax
+; X64-NEXT:    retq
+;
+; SHLD64-LABEL: fshr_load:
+; SHLD64:       # %bb.0:
+; SHLD64-NEXT:    movl (%rdi), %eax
+; SHLD64-NEXT:    shrdl $7, %eax, %eax
+; SHLD64-NEXT:    retq
+;
+; BMI264-LABEL: fshr_load:
+; BMI264:       # %bb.0:
+; BMI264-NEXT:    rorxl $7, (%rdi), %eax
+; BMI264-NEXT:    retq
   %x = load i32, i32* %p
   %f = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 7)
   ret i32 %f

Modified: llvm/trunk/test/CodeGen/X86/rot64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/rot64.ll?rev=356121&r1=356120&r2=356121&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/rot64.ll (original)
+++ llvm/trunk/test/CodeGen/X86/rot64.ll Thu Mar 14 00:07:26 2019
@@ -278,42 +278,86 @@ define i64 @fshl_load(i64* %p) nounwind
 }
 
 define i64 @fshr(i64 %x) nounwind {
-; ALL-LABEL: fshr:
-; ALL:       # %bb.0:
-; ALL-NEXT:    movq %rdi, %rax
-; ALL-NEXT:    rorq $7, %rax
-; ALL-NEXT:    retq
+; X64-LABEL: fshr:
+; X64:       # %bb.0:
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    rorq $7, %rax
+; X64-NEXT:    retq
+;
+; SHLD-LABEL: fshr:
+; SHLD:       # %bb.0:
+; SHLD-NEXT:    movq %rdi, %rax
+; SHLD-NEXT:    shrdq $7, %rdi, %rax
+; SHLD-NEXT:    retq
+;
+; BMI2-LABEL: fshr:
+; BMI2:       # %bb.0:
+; BMI2-NEXT:    rorxq $7, %rdi, %rax
+; BMI2-NEXT:    retq
   %f = call i64 @llvm.fshr.i64(i64 %x, i64 %x, i64 7)
   ret i64 %f
 }
 declare i64 @llvm.fshr.i64(i64, i64, i64)
 
 define i64 @fshr1(i64 %x) nounwind {
-; ALL-LABEL: fshr1:
-; ALL:       # %bb.0:
-; ALL-NEXT:    movq %rdi, %rax
-; ALL-NEXT:    rorq $1, %rax
-; ALL-NEXT:    retq
+; X64-LABEL: fshr1:
+; X64:       # %bb.0:
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    rorq $1, %rax
+; X64-NEXT:    retq
+;
+; SHLD-LABEL: fshr1:
+; SHLD:       # %bb.0:
+; SHLD-NEXT:    movq %rdi, %rax
+; SHLD-NEXT:    shrdq $1, %rdi, %rax
+; SHLD-NEXT:    retq
+;
+; BMI2-LABEL: fshr1:
+; BMI2:       # %bb.0:
+; BMI2-NEXT:    rorxq $1, %rdi, %rax
+; BMI2-NEXT:    retq
   %f = call i64 @llvm.fshr.i64(i64 %x, i64 %x, i64 1)
   ret i64 %f
 }
 
 define i64 @fshr63(i64 %x) nounwind {
-; ALL-LABEL: fshr63:
-; ALL:       # %bb.0:
-; ALL-NEXT:    movq %rdi, %rax
-; ALL-NEXT:    rorq $63, %rax
-; ALL-NEXT:    retq
+; X64-LABEL: fshr63:
+; X64:       # %bb.0:
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    rolq %rax
+; X64-NEXT:    retq
+;
+; SHLD-LABEL: fshr63:
+; SHLD:       # %bb.0:
+; SHLD-NEXT:    movq %rdi, %rax
+; SHLD-NEXT:    shrdq $63, %rdi, %rax
+; SHLD-NEXT:    retq
+;
+; BMI2-LABEL: fshr63:
+; BMI2:       # %bb.0:
+; BMI2-NEXT:    rorxq $63, %rdi, %rax
+; BMI2-NEXT:    retq
   %f = call i64 @llvm.fshr.i64(i64 %x, i64 %x, i64 63)
   ret i64 %f
 }
 
 define i64 @fshr_load(i64* %p) nounwind {
-; ALL-LABEL: fshr_load:
-; ALL:       # %bb.0:
-; ALL-NEXT:    movq (%rdi), %rax
-; ALL-NEXT:    rorq $7, %rax
-; ALL-NEXT:    retq
+; X64-LABEL: fshr_load:
+; X64:       # %bb.0:
+; X64-NEXT:    movq (%rdi), %rax
+; X64-NEXT:    rorq $7, %rax
+; X64-NEXT:    retq
+;
+; SHLD-LABEL: fshr_load:
+; SHLD:       # %bb.0:
+; SHLD-NEXT:    movq (%rdi), %rax
+; SHLD-NEXT:    shrdq $7, %rax, %rax
+; SHLD-NEXT:    retq
+;
+; BMI2-LABEL: fshr_load:
+; BMI2:       # %bb.0:
+; BMI2-NEXT:    rorxq $7, (%rdi), %rax
+; BMI2-NEXT:    retq
   %x = load i64, i64* %p
   %f = call i64 @llvm.fshr.i64(i64 %x, i64 %x, i64 7)
   ret i64 %f




More information about the llvm-commits mailing list