[llvm] r264845 - [x86] Fix a horrible bug in our lowering of x86 floating point atomic

Wed Mar 30 01:41:59 PDT 2016

Author: chandlerc
Date: Wed Mar 30 03:41:59 2016
New Revision: 264845

URL: http://llvm.org/viewvc/llvm-project?rev=264845&view=rev
Log:
[x86] Fix a horrible bug in our lowering of x86 floating point atomic
operations.

Specifically, we had code that tried to badly approximate reconstructing
all of the possible variations on addressing modes in two x86
instructions based on those in one pseudo instruction. This is not the
first bug uncovered with doing this, so stop doing it altogether.
Instead generically and pedantically copy every operand from the address
over to both new instructions, and strip kill flags from any register
operands.

This fixes a subtle bug seen in the wild where we would mysteriously
drop parts of the addressing mode, causing for example the index
argument in the added test case to just be completely ignored.

Hypothetically, this was an extremely bad miscompile because it actually
caused a predictable and leveragable write of a 64bit quantity to an
unintended offset (the first element of the array intead of whatever
other element was intended). As a consequence, in theory this could even
have introduced security vulnerabilities.

However, this was only something that could happen with an atomic
floating point add. No other operation could trigger this bug, so it
seems extremely unlikely to have occured widely in the wild.

But it did in fact occur, and frequently in scientific applications
which were using relaxed atomic updates of a floating point value after
adding a delta. Those would end up being quite badly miscompiled by
LLVM, which is how we found this. Of course, this often looks like
a race condition in the code, but it was actually a miscompile.

I suspect that this whole RELEASE_FADD thing was a complete mistake.
There is no such operation, and I worry that anything other than add
will get remarkably worse codegeneration. But that's not for this
change....

Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/test/CodeGen/X86/atomic_mi.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=264845&r1=264844&r2=264845&view=diff
==============================================================================

--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Wed Mar 30 03:41:59 2016
@@ -22943,34 +22943,37 @@ X86TargetLowering::EmitLoweredAtomicFP(M
   unsigned MOp, FOp;
   switch (MI->getOpcode()) {
   default: llvm_unreachable("unexpected instr type for EmitLoweredAtomicFP");
-  case X86::RELEASE_FADD32mr: MOp = X86::MOVSSmr; FOp = X86::ADDSSrm; break;
-  case X86::RELEASE_FADD64mr: MOp = X86::MOVSDmr; FOp = X86::ADDSDrm; break;
+  case X86::RELEASE_FADD32mr:
+    FOp = X86::ADDSSrm;
+    MOp = X86::MOVSSmr;
+    break;
+  case X86::RELEASE_FADD64mr:
+    FOp = X86::ADDSDrm;
+    MOp = X86::MOVSDmr;
+    break;
   }
   const X86InstrInfo *TII = Subtarget.getInstrInfo();
   DebugLoc DL = MI->getDebugLoc();
   MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
-  MachineOperand MSrc = MI->getOperand(0);
-  unsigned VSrc = MI->getOperand(5).getReg();
-  const MachineOperand &Disp = MI->getOperand(3);
-  MachineOperand ZeroDisp = MachineOperand::CreateImm(0);
-  bool hasDisp = Disp.isGlobal() || Disp.isImm();
-  if (hasDisp && MSrc.isReg())
-    MSrc.setIsKill(false);
-  MachineInstrBuilder MIM = BuildMI(*BB, MI, DL, TII->get(MOp))
-                                .addOperand(/*Base=*/MSrc)
-                                .addImm(/*Scale=*/1)
-                                .addReg(/*Index=*/0)
-                                .addDisp(hasDisp ? Disp : ZeroDisp, /*off=*/0)
-                                .addReg(0);
-  MachineInstr *MIO = BuildMI(*BB, (MachineInstr *)MIM, DL, TII->get(FOp),
-                              MRI.createVirtualRegister(MRI.getRegClass(VSrc)))
-                          .addReg(VSrc)
-                          .addOperand(/*Base=*/MSrc)
-                          .addImm(/*Scale=*/1)
-                          .addReg(/*Index=*/0)
-                          .addDisp(hasDisp ? Disp : ZeroDisp, /*off=*/0)
-                          .addReg(/*Segment=*/0);
-  MIM.addReg(MIO->getOperand(0).getReg(), RegState::Kill);
+  unsigned ValOpIdx = X86::AddrNumOperands;
+  unsigned VSrc = MI->getOperand(ValOpIdx).getReg();
+  MachineInstrBuilder MIB =
+      BuildMI(*BB, MI, DL, TII->get(FOp),
+              MRI.createVirtualRegister(MRI.getRegClass(VSrc)))
+          .addReg(VSrc);
+  for (int i = 0; i < X86::AddrNumOperands; ++i) {
+    MachineOperand &Operand = MI->getOperand(i);
+    // Clear any kill flags on register operands as we'll create a second
+    // instruction using the same address operands.
+    if (Operand.isReg())
+      Operand.setIsKill(false);
+    MIB.addOperand(Operand);
+  }
+  MachineInstr *FOpMI = MIB;
+  MIB = BuildMI(*BB, MI, DL, TII->get(MOp));
+  for (int i = 0; i < X86::AddrNumOperands; ++i)
+    MIB.addOperand(MI->getOperand(i));
+  MIB.addReg(FOpMI->getOperand(0).getReg(), RegState::Kill);
   MI->eraseFromParent(); // The pseudo instruction is gone now.
   return BB;
 }

Modified: llvm/trunk/test/CodeGen/X86/atomic_mi.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/atomic_mi.ll?rev=264845&r1=264844&r2=264845&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/atomic_mi.ll (original)
+++ llvm/trunk/test/CodeGen/X86/atomic_mi.ll Wed Mar 30 03:41:59 2016
@@ -979,3 +979,20 @@ define void @fadd_64stack() {
   store atomic i64 %bc1, i64* %ptr release, align 8
   ret void
 }
+
+define void @fadd_array(i64* %arg, double %arg1, i64 %arg2) {
+; X64-LABEL: fadd_array:
+; X64-NOT: lock
+; X64: addsd ([[ADDR:%r..,%r..,8]]), %[[XMM:xmm[0-9]+]]
+; X64-NEXT: movsd %[[XMM]], ([[ADDR]])
+; X32-LABEL: fadd_array:
+; Don't check x86-32 (see comment above).
+bb:
+  %tmp4 = getelementptr inbounds i64, i64* %arg, i64 %arg2
+  %tmp6 = load atomic i64, i64* %tmp4 monotonic, align 8
+  %tmp7 = bitcast i64 %tmp6 to double
+  %tmp8 = fadd double %tmp7, %arg1
+  %tmp9 = bitcast double %tmp8 to i64
+  store atomic i64 %tmp9, i64* %tmp4 monotonic, align 8
+  ret void
+}