[llvm] 36003c2 - [X86] Selecting fld0 for undefined value in fast ISEL.

via llvm-commits llvm-commits at lists.llvm.org
Fri Jun 25 17:43:41 PDT 2021


Author: Luo, Yuanke
Date: 2021-06-26T08:43:09+08:00
New Revision: 36003c20ada6abf636c112869e3405189cab903d

URL: https://github.com/llvm/llvm-project/commit/36003c20ada6abf636c112869e3405189cab903d
DIFF: https://github.com/llvm/llvm-project/commit/36003c20ada6abf636c112869e3405189cab903d.diff

LOG: [X86] Selecting fld0 for undefined value in fast ISEL.

When set opt-bisect-limit to some value that is less than ISel pass
in command line and CurBisectNum expired, "DAG to DAG" pass lower
its opt level to O0. However "processimpdefs" and "X86 FP Stackifier"
is not stopped due to the CurBisectNum expiration. So undefined fp0
is generated. This cause crash in the "X86 FP Stackifier" pass,
because Stackifier doesn't expect any undefined fp value.

Here is the scenario that cause compiler crash.

  successors: %bb.26
  liveins: $r14
    ST_FPrr $st0, implicit-def $fpsw, implicit $fpcw
    renamable $rdi = MOV64ri @.str.3.16422
    renamable $rdx = LEA64r %stack.6, 1, $noreg, 0, $noreg
    ADJCALLSTACKDOWN64 0, 0, 0, implicit-def $rsp, implicit-def dead
    $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp
    dead $esi = MOV32r0 implicit-def dead $eflags, implicit-def $rsi
    CALL64pcrel32 @foo, implicit $rsp, implicit $ssp, implicit $rdi,
    implicit $rsi, implicit $rdx, implicit-def dead $fp0
    renamable $xmm0 = MOVSDrm_alt %stack.10, 1, $noreg, 0, $noreg :: (load 8
    from %stack.10)
    ADJCALLSTACKUP64 0, 0, implicit-def $rsp, implicit-def dead $eflags,
    implicit-def $ssp, implicit $rsp, implicit $ssp
    renamable $fp2 = CHS_Fp80 killed undef renamable $fp0, implicit-def
    $fpsw
    JMP_1 %bb.26
The CALL64pcrel32 mark fp0 dead, so llvm free the stack slot for fp0
and the stack become empty. In the late instruction CHS_Fp80, it use
undefined register fp0, the original code assume there must be a stack
slot for the src register (fp0) without respecting it is undefined,
so llvm report error.

We have some discussion in https://reviews.llvm.org/D104440 and we
decide to fix it in fast ISel. The fix is to lower undefined fp value to
zero value, so that it release the burden of "X86 FP Stackifier" pass.
Thank Craig for the suggestion and the initial patch to fix it.

Differential Revision: https://reviews.llvm.org/D104678

Added: 
    llvm/test/CodeGen/X86/fast-isel-undef-fp.ll

Modified: 
    llvm/lib/Target/X86/X86FastISel.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp
index 44670a9e0f31..bb95ed3ccdc5 100644
--- a/llvm/lib/Target/X86/X86FastISel.cpp
+++ b/llvm/lib/Target/X86/X86FastISel.cpp
@@ -3842,6 +3842,31 @@ unsigned X86FastISel::fastMaterializeConstant(const Constant *C) {
     return X86MaterializeFP(CFP, VT);
   else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
     return X86MaterializeGV(GV, VT);
+  else if (isa<UndefValue>(C)) {
+    unsigned Opc = 0;
+    switch (VT.SimpleTy) {
+    default:
+      break;
+    case MVT::f32:
+      if (!X86ScalarSSEf32)
+        Opc = X86::LD_Fp032;
+      break;
+    case MVT::f64:
+      if (!X86ScalarSSEf64)
+        Opc = X86::LD_Fp064;
+      break;
+    case MVT::f80:
+      Opc = X86::LD_Fp080;
+      break;
+    }
+
+    if (Opc) {
+      Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
+      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc),
+              ResultReg);
+      return ResultReg;
+    }
+  }
 
   return 0;
 }

diff  --git a/llvm/test/CodeGen/X86/fast-isel-undef-fp.ll b/llvm/test/CodeGen/X86/fast-isel-undef-fp.ll
new file mode 100644
index 000000000000..3611cbca54f3
--- /dev/null
+++ b/llvm/test/CodeGen/X86/fast-isel-undef-fp.ll
@@ -0,0 +1,51 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc %s -o - -verify-machineinstrs -fast-isel=true -mattr=-sse | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define dso_local x86_fp80 @test_f80() {
+; CHECK-LABEL: test_f80:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    fldz
+; CHECK-NEXT:    fchs
+; CHECK-NEXT:    retq
+entry:
+  %fneg1 = fneg contract x86_fp80 undef
+  br label %exit
+
+exit:                                             ; preds = %entry
+  ret x86_fp80 %fneg1
+}
+
+define dso_local void @test_f32(float *%p) {
+; CHECK-LABEL: test_f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    fldz
+; CHECK-NEXT:    fchs
+; CHECK-NEXT:    fstps (%rdi)
+; CHECK-NEXT:    retq
+entry:
+  %fneg1 = fneg contract float undef
+  br label %exit
+
+exit:                                             ; preds = %entry
+  store float %fneg1, float *%p
+  ret void
+}
+
+define dso_local void @test_f64(double *%p) {
+; CHECK-LABEL: test_f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    fldz
+; CHECK-NEXT:    fchs
+; CHECK-NEXT:    fstpl (%rdi)
+; CHECK-NEXT:    retq
+entry:
+  %fneg1 = fneg contract double undef
+  br label %exit
+
+exit:                                             ; preds = %entry
+  store double %fneg1, double *%p
+  ret void
+}


        


More information about the llvm-commits mailing list