[llvm] r337160 - [x86/SLH] Fix a bug where we would try to post-load harden non-GPRs.

Mon Jul 16 04:38:48 PDT 2018

Author: chandlerc
Date: Mon Jul 16 04:38:48 2018
New Revision: 337160

URL: http://llvm.org/viewvc/llvm-project?rev=337160&view=rev
Log:
[x86/SLH] Fix a bug where we would try to post-load harden non-GPRs.

Found cases that hit the assert I added. This patch factors the validity
checking into a nice helper routine and calls it when deciding to harden
post-load, and asserts it when doing so later.

I've added tests for the various ways of loading a floating point type,
as well as loading all vector permutations. Even though many of these go
to identical instructions, it seems good to somewhat comprehensively
test them.

I'm confident there will be more fixes needed here, I'll try to add
tests each time as I get this predicate adjusted.

Modified:
    llvm/trunk/lib/Target/X86/X86SpeculativeLoadHardening.cpp
    llvm/trunk/test/CodeGen/X86/speculative-load-hardening.ll

Modified: llvm/trunk/lib/Target/X86/X86SpeculativeLoadHardening.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SpeculativeLoadHardening.cpp?rev=337160&r1=337159&r2=337160&view=diff
==============================================================================

--- llvm/trunk/lib/Target/X86/X86SpeculativeLoadHardening.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86SpeculativeLoadHardening.cpp Mon Jul 16 04:38:48 2018
@@ -173,6 +173,7 @@ private:
   MachineInstr *
   sinkPostLoadHardenedInst(MachineInstr &MI,
                            SmallPtrSetImpl<MachineInstr *> &HardenedLoads);
+  bool canHardenPostLoad(MachineInstr &MI);
   void hardenPostLoad(MachineInstr &MI, MachineSSAUpdater &PredStateSSA);
   void checkReturnInstr(MachineInstr &MI, MachineSSAUpdater &PredStateSSA);
   void checkCallInstr(MachineInstr &MI, MachineSSAUpdater &PredStateSSA);
@@ -1115,12 +1116,12 @@ void X86SpeculativeLoadHardeningPass::ch
           (IndexReg && LoadDepRegs.test(IndexReg)))
         continue;
 
-      // If post-load hardening is enabled, this load is known to be
-      // data-invariant, and we aren't already going to harden one of the
+      // If post-load hardening is enabled, this load is compatible with
+      // post-load hardening, and we aren't already going to harden one of the
       // address registers, queue it up to be hardened post-load. Notably, even
       // once hardened this won't introduce a useful dependency that could prune
       // out subsequent loads.
-      if (EnablePostLoadHardening && isDataInvariantLoad(MI) &&
+      if (EnablePostLoadHardening && canHardenPostLoad(MI) &&
           !HardenedAddrRegs.count(BaseReg) &&
           !HardenedAddrRegs.count(IndexReg)) {
         HardenPostLoad.insert(&MI);
@@ -1602,6 +1603,25 @@ MachineInstr *X86SpeculativeLoadHardenin
   return MI;
 }
 
+bool X86SpeculativeLoadHardeningPass::canHardenPostLoad(MachineInstr &MI) {
+  if (!isDataInvariantLoad(MI))
+    return false;
+
+  auto &DefOp = MI.getOperand(0);
+  unsigned OldDefReg = DefOp.getReg();
+
+  auto *DefRC = MRI->getRegClass(OldDefReg);
+  int DefRegBytes = TRI->getRegSizeInBits(*DefRC) / 8;
+  if (DefRegBytes > 8)
+    // We don't support post-load hardening of vectors.
+    return false;
+
+  const TargetRegisterClass *GPRRegClasses[] = {
+      &X86::GR8RegClass, &X86::GR16RegClass, &X86::GR32RegClass,
+      &X86::GR64RegClass};
+  return DefRC->hasSuperClassEq(GPRRegClasses[Log2_32(DefRegBytes)]);
+}
+
 // We can harden non-leaking loads into register without touching the address
 // by just hiding all of the loaded bits. We use an `or` instruction to do
 // this because having the poison value be all ones allows us to use the same
@@ -1609,8 +1629,8 @@ MachineInstr *X86SpeculativeLoadHardenin
 // execution and coercing them to one is sufficient.
 void X86SpeculativeLoadHardeningPass::hardenPostLoad(
     MachineInstr &MI, MachineSSAUpdater &PredStateSSA) {
-  assert(isDataInvariantLoad(MI) &&
-         "Cannot get here with a non-invariant load!");
+  assert(canHardenPostLoad(MI) &&
+         "Invalid instruction for post-load hardening!");
 
   MachineBasicBlock &MBB = *MI.getParent();
   DebugLoc Loc = MI.getDebugLoc();
@@ -1625,14 +1645,6 @@ void X86SpeculativeLoadHardeningPass::ha
   unsigned OrOpCodes[] = {X86::OR8rr, X86::OR16rr, X86::OR32rr, X86::OR64rr};
   unsigned OrOpCode = OrOpCodes[Log2_32(DefRegBytes)];
 
-#ifndef NDEBUG
-  const TargetRegisterClass *OrRegClasses[] = {
-      &X86::GR8RegClass, &X86::GR16RegClass, &X86::GR32RegClass,
-      &X86::GR64RegClass};
-  assert(DefRC->hasSuperClassEq(OrRegClasses[Log2_32(DefRegBytes)]) &&
-         "Cannot define this register with OR instruction!");
-#endif
-
   unsigned SubRegImms[] = {X86::sub_8bit, X86::sub_16bit, X86::sub_32bit};
 
   auto GetStateRegInRC = [&](const TargetRegisterClass &RC) {

Modified: llvm/trunk/test/CodeGen/X86/speculative-load-hardening.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/speculative-load-hardening.ll?rev=337160&r1=337159&r2=337160&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/speculative-load-hardening.ll (original)
+++ llvm/trunk/test/CodeGen/X86/speculative-load-hardening.ll Mon Jul 16 04:38:48 2018
@@ -590,3 +590,275 @@ lpad:
   call void @sink(i32 %leak)
   unreachable
 }
+
+declare void @sink_float(float)
+declare void @sink_double(double)
+
+; Test direct and converting loads of floating point values.
+define void @test_fp_loads(float* %fptr, double* %dptr, i32* %i32ptr, i64* %i64ptr) nounwind {
+; X64-LABEL: test_fp_loads:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    pushq %r15
+; X64-NEXT:    pushq %r14
+; X64-NEXT:    pushq %r12
+; X64-NEXT:    pushq %rbx
+; X64-NEXT:    pushq %rax
+; X64-NEXT:    movq %rsp, %rax
+; X64-NEXT:    movq %rcx, %r15
+; X64-NEXT:    movq %rdx, %r14
+; X64-NEXT:    movq %rsi, %rbx
+; X64-NEXT:    movq %rdi, %r12
+; X64-NEXT:    movq $-1, %rcx
+; X64-NEXT:    sarq $63, %rax
+; X64-NEXT:    orq %rax, %r12
+; X64-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X64-NEXT:    shlq $47, %rax
+; X64-NEXT:    orq %rax, %rsp
+; X64-NEXT:    callq sink_float
+; X64-NEXT:    movq %rsp, %rax
+; X64-NEXT:    sarq $63, %rax
+; X64-NEXT:    orq %rax, %rbx
+; X64-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; X64-NEXT:    shlq $47, %rax
+; X64-NEXT:    orq %rax, %rsp
+; X64-NEXT:    callq sink_double
+; X64-NEXT:    movq %rsp, %rax
+; X64-NEXT:    sarq $63, %rax
+; X64-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; X64-NEXT:    cvtsd2ss %xmm0, %xmm0
+; X64-NEXT:    shlq $47, %rax
+; X64-NEXT:    orq %rax, %rsp
+; X64-NEXT:    callq sink_float
+; X64-NEXT:    movq %rsp, %rax
+; X64-NEXT:    sarq $63, %rax
+; X64-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X64-NEXT:    cvtss2sd %xmm0, %xmm0
+; X64-NEXT:    shlq $47, %rax
+; X64-NEXT:    orq %rax, %rsp
+; X64-NEXT:    callq sink_double
+; X64-NEXT:    movq %rsp, %rax
+; X64-NEXT:    sarq $63, %rax
+; X64-NEXT:    orq %rax, %r14
+; X64-NEXT:    xorps %xmm0, %xmm0
+; X64-NEXT:    cvtsi2ssl (%r14), %xmm0
+; X64-NEXT:    shlq $47, %rax
+; X64-NEXT:    orq %rax, %rsp
+; X64-NEXT:    callq sink_float
+; X64-NEXT:    movq %rsp, %rax
+; X64-NEXT:    sarq $63, %rax
+; X64-NEXT:    orq %rax, %r15
+; X64-NEXT:    xorps %xmm0, %xmm0
+; X64-NEXT:    cvtsi2sdq (%r15), %xmm0
+; X64-NEXT:    shlq $47, %rax
+; X64-NEXT:    orq %rax, %rsp
+; X64-NEXT:    callq sink_double
+; X64-NEXT:    movq %rsp, %rax
+; X64-NEXT:    sarq $63, %rax
+; X64-NEXT:    xorps %xmm0, %xmm0
+; X64-NEXT:    cvtsi2ssq (%r15), %xmm0
+; X64-NEXT:    shlq $47, %rax
+; X64-NEXT:    orq %rax, %rsp
+; X64-NEXT:    callq sink_float
+; X64-NEXT:    movq %rsp, %rax
+; X64-NEXT:    sarq $63, %rax
+; X64-NEXT:    xorps %xmm0, %xmm0
+; X64-NEXT:    cvtsi2sdl (%r14), %xmm0
+; X64-NEXT:    shlq $47, %rax
+; X64-NEXT:    orq %rax, %rsp
+; X64-NEXT:    callq sink_double
+; X64-NEXT:    movq %rsp, %rax
+; X64-NEXT:    sarq $63, %rax
+; X64-NEXT:    shlq $47, %rax
+; X64-NEXT:    orq %rax, %rsp
+; X64-NEXT:    addq $8, %rsp
+; X64-NEXT:    popq %rbx
+; X64-NEXT:    popq %r12
+; X64-NEXT:    popq %r14
+; X64-NEXT:    popq %r15
+; X64-NEXT:    retq
+;
+; X64-LFENCE-LABEL: test_fp_loads:
+; X64-LFENCE:       # %bb.0: # %entry
+; X64-LFENCE-NEXT:    pushq %r15
+; X64-LFENCE-NEXT:    pushq %r14
+; X64-LFENCE-NEXT:    pushq %r12
+; X64-LFENCE-NEXT:    pushq %rbx
+; X64-LFENCE-NEXT:    pushq %rax
+; X64-LFENCE-NEXT:    movq %rcx, %r15
+; X64-LFENCE-NEXT:    movq %rdx, %r14
+; X64-LFENCE-NEXT:    movq %rsi, %rbx
+; X64-LFENCE-NEXT:    movq %rdi, %r12
+; X64-LFENCE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X64-LFENCE-NEXT:    callq sink_float
+; X64-LFENCE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; X64-LFENCE-NEXT:    callq sink_double
+; X64-LFENCE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; X64-LFENCE-NEXT:    cvtsd2ss %xmm0, %xmm0
+; X64-LFENCE-NEXT:    callq sink_float
+; X64-LFENCE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X64-LFENCE-NEXT:    cvtss2sd %xmm0, %xmm0
+; X64-LFENCE-NEXT:    callq sink_double
+; X64-LFENCE-NEXT:    xorps %xmm0, %xmm0
+; X64-LFENCE-NEXT:    cvtsi2ssl (%r14), %xmm0
+; X64-LFENCE-NEXT:    callq sink_float
+; X64-LFENCE-NEXT:    xorps %xmm0, %xmm0
+; X64-LFENCE-NEXT:    cvtsi2sdq (%r15), %xmm0
+; X64-LFENCE-NEXT:    callq sink_double
+; X64-LFENCE-NEXT:    xorps %xmm0, %xmm0
+; X64-LFENCE-NEXT:    cvtsi2ssq (%r15), %xmm0
+; X64-LFENCE-NEXT:    callq sink_float
+; X64-LFENCE-NEXT:    xorps %xmm0, %xmm0
+; X64-LFENCE-NEXT:    cvtsi2sdl (%r14), %xmm0
+; X64-LFENCE-NEXT:    callq sink_double
+; X64-LFENCE-NEXT:    addq $8, %rsp
+; X64-LFENCE-NEXT:    popq %rbx
+; X64-LFENCE-NEXT:    popq %r12
+; X64-LFENCE-NEXT:    popq %r14
+; X64-LFENCE-NEXT:    popq %r15
+; X64-LFENCE-NEXT:    retq
+entry:
+  %f1 = load float, float* %fptr
+  call void @sink_float(float %f1)
+  %d1 = load double, double* %dptr
+  call void @sink_double(double %d1)
+  %f2.d = load double, double* %dptr
+  %f2 = fptrunc double %f2.d to float
+  call void @sink_float(float %f2)
+  %d2.f = load float, float* %fptr
+  %d2 = fpext float %d2.f to double
+  call void @sink_double(double %d2)
+  %f3.i = load i32, i32* %i32ptr
+  %f3 = sitofp i32 %f3.i to float
+  call void @sink_float(float %f3)
+  %d3.i = load i64, i64* %i64ptr
+  %d3 = sitofp i64 %d3.i to double
+  call void @sink_double(double %d3)
+  %f4.i = load i64, i64* %i64ptr
+  %f4 = sitofp i64 %f4.i to float
+  call void @sink_float(float %f4)
+  %d4.i = load i32, i32* %i32ptr
+  %d4 = sitofp i32 %d4.i to double
+  call void @sink_double(double %d4)
+  ret void
+}
+
+declare void @sink_v4f32(<4 x float>)
+declare void @sink_v2f64(<2 x double>)
+declare void @sink_v16i8(<16 x i8>)
+declare void @sink_v8i16(<8 x i16>)
+declare void @sink_v4i32(<4 x i32>)
+declare void @sink_v2i64(<2 x i64>)
+
+; Test loads of vectors.
+define void @test_vec_loads(<4 x float>* %v4f32ptr, <2 x double>* %v2f64ptr, <16 x i8>* %v16i8ptr, <8 x i16>* %v8i16ptr, <4 x i32>* %v4i32ptr, <2 x i64>* %v2i64ptr) nounwind {
+; X64-LABEL: test_vec_loads:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    pushq %r15
+; X64-NEXT:    pushq %r14
+; X64-NEXT:    pushq %r13
+; X64-NEXT:    pushq %r12
+; X64-NEXT:    pushq %rbx
+; X64-NEXT:    movq %rsp, %rax
+; X64-NEXT:    movq %r9, %r14
+; X64-NEXT:    movq %r8, %r15
+; X64-NEXT:    movq %rcx, %r12
+; X64-NEXT:    movq %rdx, %r13
+; X64-NEXT:    movq %rsi, %rbx
+; X64-NEXT:    movq $-1, %rcx
+; X64-NEXT:    sarq $63, %rax
+; X64-NEXT:    orq %rax, %rdi
+; X64-NEXT:    movaps (%rdi), %xmm0
+; X64-NEXT:    shlq $47, %rax
+; X64-NEXT:    orq %rax, %rsp
+; X64-NEXT:    callq sink_v4f32
+; X64-NEXT:    movq %rsp, %rax
+; X64-NEXT:    sarq $63, %rax
+; X64-NEXT:    orq %rax, %rbx
+; X64-NEXT:    movaps (%rbx), %xmm0
+; X64-NEXT:    shlq $47, %rax
+; X64-NEXT:    orq %rax, %rsp
+; X64-NEXT:    callq sink_v2f64
+; X64-NEXT:    movq %rsp, %rax
+; X64-NEXT:    sarq $63, %rax
+; X64-NEXT:    orq %rax, %r13
+; X64-NEXT:    movaps (%r13), %xmm0
+; X64-NEXT:    shlq $47, %rax
+; X64-NEXT:    orq %rax, %rsp
+; X64-NEXT:    callq sink_v16i8
+; X64-NEXT:    movq %rsp, %rax
+; X64-NEXT:    sarq $63, %rax
+; X64-NEXT:    orq %rax, %r12
+; X64-NEXT:    movaps (%r12), %xmm0
+; X64-NEXT:    shlq $47, %rax
+; X64-NEXT:    orq %rax, %rsp
+; X64-NEXT:    callq sink_v8i16
+; X64-NEXT:    movq %rsp, %rax
+; X64-NEXT:    sarq $63, %rax
+; X64-NEXT:    orq %rax, %r15
+; X64-NEXT:    movaps (%r15), %xmm0
+; X64-NEXT:    shlq $47, %rax
+; X64-NEXT:    orq %rax, %rsp
+; X64-NEXT:    callq sink_v4i32
+; X64-NEXT:    movq %rsp, %rax
+; X64-NEXT:    sarq $63, %rax
+; X64-NEXT:    orq %rax, %r14
+; X64-NEXT:    movaps (%r14), %xmm0
+; X64-NEXT:    shlq $47, %rax
+; X64-NEXT:    orq %rax, %rsp
+; X64-NEXT:    callq sink_v2i64
+; X64-NEXT:    movq %rsp, %rax
+; X64-NEXT:    sarq $63, %rax
+; X64-NEXT:    shlq $47, %rax
+; X64-NEXT:    orq %rax, %rsp
+; X64-NEXT:    popq %rbx
+; X64-NEXT:    popq %r12
+; X64-NEXT:    popq %r13
+; X64-NEXT:    popq %r14
+; X64-NEXT:    popq %r15
+; X64-NEXT:    retq
+;
+; X64-LFENCE-LABEL: test_vec_loads:
+; X64-LFENCE:       # %bb.0: # %entry
+; X64-LFENCE-NEXT:    pushq %r15
+; X64-LFENCE-NEXT:    pushq %r14
+; X64-LFENCE-NEXT:    pushq %r13
+; X64-LFENCE-NEXT:    pushq %r12
+; X64-LFENCE-NEXT:    pushq %rbx
+; X64-LFENCE-NEXT:    movq %r9, %r14
+; X64-LFENCE-NEXT:    movq %r8, %r15
+; X64-LFENCE-NEXT:    movq %rcx, %r12
+; X64-LFENCE-NEXT:    movq %rdx, %r13
+; X64-LFENCE-NEXT:    movq %rsi, %rbx
+; X64-LFENCE-NEXT:    movaps (%rdi), %xmm0
+; X64-LFENCE-NEXT:    callq sink_v4f32
+; X64-LFENCE-NEXT:    movaps (%rbx), %xmm0
+; X64-LFENCE-NEXT:    callq sink_v2f64
+; X64-LFENCE-NEXT:    movaps (%r13), %xmm0
+; X64-LFENCE-NEXT:    callq sink_v16i8
+; X64-LFENCE-NEXT:    movaps (%r12), %xmm0
+; X64-LFENCE-NEXT:    callq sink_v8i16
+; X64-LFENCE-NEXT:    movaps (%r15), %xmm0
+; X64-LFENCE-NEXT:    callq sink_v4i32
+; X64-LFENCE-NEXT:    movaps (%r14), %xmm0
+; X64-LFENCE-NEXT:    callq sink_v2i64
+; X64-LFENCE-NEXT:    popq %rbx
+; X64-LFENCE-NEXT:    popq %r12
+; X64-LFENCE-NEXT:    popq %r13
+; X64-LFENCE-NEXT:    popq %r14
+; X64-LFENCE-NEXT:    popq %r15
+; X64-LFENCE-NEXT:    retq
+entry:
+  %x1 = load <4 x float>, <4 x float>* %v4f32ptr
+  call void @sink_v4f32(<4 x float> %x1)
+  %x2 = load <2 x double>, <2 x double>* %v2f64ptr
+  call void @sink_v2f64(<2 x double> %x2)
+  %x3 = load <16 x i8>, <16 x i8>* %v16i8ptr
+  call void @sink_v16i8(<16 x i8> %x3)
+  %x4 = load <8 x i16>, <8 x i16>* %v8i16ptr
+  call void @sink_v8i16(<8 x i16> %x4)
+  %x5 = load <4 x i32>, <4 x i32>* %v4i32ptr
+  call void @sink_v4i32(<4 x i32> %x5)
+  %x6 = load <2 x i64>, <2 x i64>* %v2i64ptr
+  call void @sink_v2i64(<2 x i64> %x6)
+  ret void
+}