[llvm] r337308 - [x86/SLH] Flesh out the data-invariant instruction table a bit based on feedback from Craig.

Tue Jul 17 11:07:59 PDT 2018

Author: chandlerc
Date: Tue Jul 17 11:07:59 2018
New Revision: 337308

URL: http://llvm.org/viewvc/llvm-project?rev=337308&view=rev
Log:
[x86/SLH] Flesh out the data-invariant instruction table a bit based on feedback from Craig.

Summary:
The only thing he suggested that I've skipped here is the double-wide
multiply instructions. Multiply is an area I'm nervous about there being
some hidden data-dependent behavior, and it doesn't seem important for
any benchmarks I have, so skipping it and sticking with the minimal
multiply support that matches what I know is widely used in existing
crypto libraries. We can always add double-wide multiply when we have
clarity from vendors about its behavior and guarantees.

I've tried to at least cover the fundamentals here with tests, although
I've not tried to cover every width or permutation. I can add more tests
where folks think it would be helpful.

Reviewers: craig.topper

Subscribers: sanjoy, mcrosier, hiraditya, llvm-commits

Differential Revision: https://reviews.llvm.org/D49413

Modified:
    llvm/trunk/lib/Target/X86/X86SpeculativeLoadHardening.cpp
    llvm/trunk/test/CodeGen/X86/speculative-load-hardening.ll

Modified: llvm/trunk/lib/Target/X86/X86SpeculativeLoadHardening.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SpeculativeLoadHardening.cpp?rev=337308&r1=337307&r2=337308&view=diff
==============================================================================

--- llvm/trunk/lib/Target/X86/X86SpeculativeLoadHardening.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86SpeculativeLoadHardening.cpp Tue Jul 17 11:07:59 2018
@@ -865,6 +865,27 @@ static bool isDataInvariant(MachineInstr
   case X86::BZHI32rr:
   case X86::BZHI64rr:
 
+  // Shift and rotate.
+  case X86::ROL8r1:  case X86::ROL16r1:  case X86::ROL32r1:  case X86::ROL64r1:
+  case X86::ROL8rCL: case X86::ROL16rCL: case X86::ROL32rCL: case X86::ROL64rCL:
+  case X86::ROL8ri:  case X86::ROL16ri:  case X86::ROL32ri:  case X86::ROL64ri:
+  case X86::ROR8r1:  case X86::ROR16r1:  case X86::ROR32r1:  case X86::ROR64r1:
+  case X86::ROR8rCL: case X86::ROR16rCL: case X86::ROR32rCL: case X86::ROR64rCL:
+  case X86::ROR8ri:  case X86::ROR16ri:  case X86::ROR32ri:  case X86::ROR64ri:
+  case X86::SAR8r1:  case X86::SAR16r1:  case X86::SAR32r1:  case X86::SAR64r1:
+  case X86::SAR8rCL: case X86::SAR16rCL: case X86::SAR32rCL: case X86::SAR64rCL:
+  case X86::SAR8ri:  case X86::SAR16ri:  case X86::SAR32ri:  case X86::SAR64ri:
+  case X86::SHL8r1:  case X86::SHL16r1:  case X86::SHL32r1:  case X86::SHL64r1:
+  case X86::SHL8rCL: case X86::SHL16rCL: case X86::SHL32rCL: case X86::SHL64rCL:
+  case X86::SHL8ri:  case X86::SHL16ri:  case X86::SHL32ri:  case X86::SHL64ri:
+  case X86::SHR8r1:  case X86::SHR16r1:  case X86::SHR32r1:  case X86::SHR64r1:
+  case X86::SHR8rCL: case X86::SHR16rCL: case X86::SHR32rCL: case X86::SHR64rCL:
+  case X86::SHR8ri:  case X86::SHR16ri:  case X86::SHR32ri:  case X86::SHR64ri:
+  case X86::SHLD16rrCL: case X86::SHLD32rrCL: case X86::SHLD64rrCL:
+  case X86::SHLD16rri8: case X86::SHLD32rri8: case X86::SHLD64rri8:
+  case X86::SHRD16rrCL: case X86::SHRD32rrCL: case X86::SHRD64rrCL:
+  case X86::SHRD16rri8: case X86::SHRD32rri8: case X86::SHRD64rri8:
+
   // Basic arithmetic is constant time on the input but does set flags.
   case X86::ADC8rr:   case X86::ADC8ri:
   case X86::ADC16rr:  case X86::ADC16ri:   case X86::ADC16ri8:
@@ -898,9 +919,10 @@ static bool isDataInvariant(MachineInstr
   case X86::ADCX32rr: case X86::ADCX64rr:
   case X86::ADOX32rr: case X86::ADOX64rr:
   case X86::ANDN32rr: case X86::ANDN64rr:
-  // Just one operand for inc and dec.
-  case X86::INC8r: case X86::INC16r: case X86::INC32r: case X86::INC64r:
+  // Unary arithmetic operations.
   case X86::DEC8r: case X86::DEC16r: case X86::DEC32r: case X86::DEC64r:
+  case X86::INC8r: case X86::INC16r: case X86::INC32r: case X86::INC64r:
+  case X86::NEG8r: case X86::NEG16r: case X86::NEG32r: case X86::NEG64r:
     // Check whether the EFLAGS implicit-def is dead. We assume that this will
     // always find the implicit-def because this code should only be reached
     // for instructions that do in fact implicitly def this.
@@ -915,11 +937,19 @@ static bool isDataInvariant(MachineInstr
     // don't set EFLAGS.
     LLVM_FALLTHROUGH;
 
-  // Integer multiply w/o affecting flags is still believed to be constant
-  // time on x86. Called out separately as this is among the most surprising
-  // instructions to exhibit that behavior.
-  case X86::MULX32rr:
-  case X86::MULX64rr:
+  // Unlike other arithmetic, NOT doesn't set EFLAGS.
+  case X86::NOT8r: case X86::NOT16r: case X86::NOT32r: case X86::NOT64r:
+
+  // Various move instructions used to zero or sign extend things. Note that we
+  // intentionally don't support the _NOREX variants as we can't handle that
+  // register constraint anyways.
+  case X86::MOVSX16rr8:
+  case X86::MOVSX32rr8: case X86::MOVSX32rr16:
+  case X86::MOVSX64rr8: case X86::MOVSX64rr16: case X86::MOVSX64rr32:
+  case X86::MOVZX16rr8:
+  case X86::MOVZX32rr8: case X86::MOVZX32rr16:
+  case X86::MOVZX64rr8: case X86::MOVZX64rr16:
+  case X86::MOV32rr:
 
   // Arithmetic instructions that are both constant time and don't set flags.
   case X86::RORX32ri:

Modified: llvm/trunk/test/CodeGen/X86/speculative-load-hardening.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/speculative-load-hardening.ll?rev=337308&r1=337307&r2=337308&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/speculative-load-hardening.ll (original)
+++ llvm/trunk/test/CodeGen/X86/speculative-load-hardening.ll Tue Jul 17 11:07:59 2018
@@ -69,8 +69,8 @@ define void @test_basic_conditions(i32 %
 ; X64-NEXT:    cmovneq %rbx, %rax
 ; X64-NEXT:    movl (%rcx), %ecx
 ; X64-NEXT:    addl (%r15), %ecx
-; X64-NEXT:    orl %eax, %ecx
 ; X64-NEXT:    movslq %ecx, %rdi
+; X64-NEXT:    orq %rax, %rdi
 ; X64-NEXT:    movl (%r15,%rdi,4), %esi
 ; X64-NEXT:    orl %eax, %esi
 ; X64-NEXT:    movq (%r9), %r14
@@ -516,8 +516,8 @@ define void @test_basic_eh(i32 %a, i32*
 ; X64-NEXT:    sarq $63, %rcx
 ; X64-NEXT:    movl (%rax), %eax
 ; X64-NEXT:    addl (%rbx), %eax
-; X64-NEXT:    orl %ecx, %eax
 ; X64-NEXT:    cltq
+; X64-NEXT:    orq %rcx, %rax
 ; X64-NEXT:    movl (%r14,%rax,4), %edi
 ; X64-NEXT:    orl %ecx, %edi
 ; X64-NEXT:    shlq $47, %rcx
@@ -892,6 +892,34 @@ define void @test_deferred_hardening(i32
 ; X64-NEXT:    callq sink
 ; X64-NEXT:    movq %rsp, %rax
 ; X64-NEXT:    sarq $63, %rax
+; X64-NEXT:    movl (%rbx), %edi
+; X64-NEXT:    shll $7, %edi
+; X64-NEXT:    orl %eax, %edi
+; X64-NEXT:    shlq $47, %rax
+; X64-NEXT:    orq %rax, %rsp
+; X64-NEXT:    callq sink
+; X64-NEXT:    movq %rsp, %rax
+; X64-NEXT:    sarq $63, %rax
+; X64-NEXT:    movzwl (%rbx), %ecx
+; X64-NEXT:    sarw $7, %cx
+; X64-NEXT:    movzwl %cx, %edi
+; X64-NEXT:    notl %edi
+; X64-NEXT:    orl %eax, %edi
+; X64-NEXT:    shlq $47, %rax
+; X64-NEXT:    orq %rax, %rsp
+; X64-NEXT:    callq sink
+; X64-NEXT:    movq %rsp, %rax
+; X64-NEXT:    sarq $63, %rax
+; X64-NEXT:    movzwl (%rbx), %ecx
+; X64-NEXT:    rolw $9, %cx
+; X64-NEXT:    movswl %cx, %edi
+; X64-NEXT:    negl %edi
+; X64-NEXT:    orl %eax, %edi
+; X64-NEXT:    shlq $47, %rax
+; X64-NEXT:    orq %rax, %rsp
+; X64-NEXT:    callq sink
+; X64-NEXT:    movq %rsp, %rax
+; X64-NEXT:    sarq $63, %rax
 ; X64-NEXT:    shlq $47, %rax
 ; X64-NEXT:    orq %rax, %rsp
 ; X64-NEXT:    addq $8, %rsp
@@ -914,6 +942,19 @@ define void @test_deferred_hardening(i32
 ; X64-LFENCE-NEXT:    movl (%r14), %ecx
 ; X64-LFENCE-NEXT:    leal 1(%rax,%rcx), %edi
 ; X64-LFENCE-NEXT:    callq sink
+; X64-LFENCE-NEXT:    movl (%rbx), %edi
+; X64-LFENCE-NEXT:    shll $7, %edi
+; X64-LFENCE-NEXT:    callq sink
+; X64-LFENCE-NEXT:    movzwl (%rbx), %eax
+; X64-LFENCE-NEXT:    sarw $7, %ax
+; X64-LFENCE-NEXT:    movzwl %ax, %edi
+; X64-LFENCE-NEXT:    notl %edi
+; X64-LFENCE-NEXT:    callq sink
+; X64-LFENCE-NEXT:    movzwl (%rbx), %eax
+; X64-LFENCE-NEXT:    rolw $9, %ax
+; X64-LFENCE-NEXT:    movswl %ax, %edi
+; X64-LFENCE-NEXT:    negl %edi
+; X64-LFENCE-NEXT:    callq sink
 ; X64-LFENCE-NEXT:    addq $8, %rsp
 ; X64-LFENCE-NEXT:    popq %rbx
 ; X64-LFENCE-NEXT:    popq %r14
@@ -928,5 +969,23 @@ entry:
   %b3 = load i32, i32* %ptr2
   %b4 = add i32 %b2, %b3
   call void @sink(i32 %b4)
+  %c1 = load i32, i32* %ptr1
+  %c2 = shl i32 %c1, 7
+  call void @sink(i32 %c2)
+  %d1 = load i32, i32* %ptr1
+  ; Check trunc and integer ops narrower than i32.
+  %d2 = trunc i32 %d1 to i16
+  %d3 = ashr i16 %d2, 7
+  %d4 = zext i16 %d3 to i32
+  %d5 = xor i32 %d4, -1
+  call void @sink(i32 %d5)
+  %e1 = load i32, i32* %ptr1
+  %e2 = trunc i32 %e1 to i16
+  %e3 = lshr i16 %e2, 7
+  %e4 = shl i16 %e2, 9
+  %e5 = or i16 %e3, %e4
+  %e6 = sext i16 %e5 to i32
+  %e7 = sub i32 0, %e6
+  call void @sink(i32 %e7)
   ret void
 }