[llvm] 8224486 - [Hexagon] Fix MachineSink not to hoist FP instructions that update USR.

Krzysztof Parzyszek via llvm-commits llvm-commits at lists.llvm.org
Tue Jan 4 15:56:15 PST 2022


Author: Sumanth Gundapaneni
Date: 2022-01-04T15:55:22-08:00
New Revision: 822448635edc95cdf742e7b86630dc24c239032c

URL: https://github.com/llvm/llvm-project/commit/822448635edc95cdf742e7b86630dc24c239032c
DIFF: https://github.com/llvm/llvm-project/commit/822448635edc95cdf742e7b86630dc24c239032c.diff

LOG: [Hexagon] Fix MachineSink not to hoist FP instructions that update USR.

Ideally we should make USR as Def for these floating point instructions.
However, it violates some assembler MCChecker rules. This patch fixes
the issue by marking these FP instructions as non-sinkable.

Added: 
    llvm/test/CodeGen/Hexagon/machine-sink-float-usr.mir

Modified: 
    llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
    llvm/lib/Target/Hexagon/HexagonInstrInfo.h

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
index 1aedff9a2cc3..a38e43709132 100644
--- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -171,6 +171,23 @@ bool HexagonInstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
   return MI.isAsCheapAsAMove();
 }
 
+// Do not sink floating point instructions that updates USR register.
+// Example:
+//    feclearexcept
+//    F2_conv_w2sf
+//    fetestexcept
+// MachineSink sinks F2_conv_w2sf and we are not able to catch exceptions.
+// TODO: On some of these floating point instructions, USR is marked as Use.
+// In reality, these instructions also Def the USR. If USR is marked as Def,
+// some of the assumptions in assembler packetization are broken.
+bool HexagonInstrInfo::shouldSink(const MachineInstr &MI) const {
+  // Assumption: A floating point instruction that reads the USR will write
+  // the USR as well.
+  if (isFloat(MI) && MI.hasRegisterImplicitUseOperand(Hexagon::USR))
+    return false;
+  return true;
+}
+
 /// Find the hardware loop instruction used to set-up the specified loop.
 /// On Hexagon, we have two instructions used to set-up the hardware loop
 /// (LOOP0, LOOP1) with corresponding endloop (ENDLOOP0, ENDLOOP1) instructions

diff  --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.h b/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
index 05cdf6c98643..2af09c857d86 100644
--- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
+++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
@@ -337,6 +337,12 @@ class HexagonInstrInfo : public HexagonGenInstrInfo {
   bool isTailCall(const MachineInstr &MI) const override;
   bool isAsCheapAsAMove(const MachineInstr &MI) const override;
 
+  // Return true if the instruction should be sunk by MachineSink.
+  // MachineSink determines on its own whether the instruction is safe to sink;
+  // this gives the target a hook to override the default behavior with regards
+  // to which instructions should be sunk.
+  bool shouldSink(const MachineInstr &MI) const override;
+
   /// HexagonInstrInfo specifics.
 
   unsigned createVR(MachineFunction *MF, MVT VT) const;

diff  --git a/llvm/test/CodeGen/Hexagon/machine-sink-float-usr.mir b/llvm/test/CodeGen/Hexagon/machine-sink-float-usr.mir
new file mode 100644
index 000000000000..ba023bde9251
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/machine-sink-float-usr.mir
@@ -0,0 +1,325 @@
+# RUN: llc -march=hexagon -run-pass machine-sink -o - %s | FileCheck %s
+
+# Test that MachineSink does not sink F2_conv_w2sf.
+# CHECK: name:{{.*}} main
+# CHECK: J2_call @feclearexcept
+# CHECK: F2_conv_w2sf
+# CHECK: J2_call @fetestexcept
+--- |
+  target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048"
+  target triple = "hexagon"
+
+  @.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
+
+  ; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn
+  define dso_local i32 @syst_int32_to_float32(i32 %a) local_unnamed_addr #0 {
+  entry:
+    %conv = sitofp i32 %a to float
+    %0 = bitcast float %conv to i32
+    ret i32 %0
+  }
+
+  ; Function Attrs: argmemonly mustprogress nofree nosync nounwind willreturn
+  declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1
+
+  ; Function Attrs: argmemonly mustprogress nofree nosync nounwind willreturn
+  declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1
+
+  ; Function Attrs: nounwind
+  define dso_local i32 @main() local_unnamed_addr #2 {
+  entry:
+    %a = alloca i32, align 4
+    %b = alloca i32, align 4
+    %c = alloca i32, align 4
+    %a.0.a.0.a.0.a.0..sroa_cast = bitcast i32* %a to i8*
+    call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %a.0.a.0.a.0.a.0..sroa_cast)
+    store volatile i32 -16777235, i32* %a, align 4, !tbaa !3
+    %b.0.b.0.b.0.b.0..sroa_cast = bitcast i32* %b to i8*
+    call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %b.0.b.0.b.0.b.0..sroa_cast)
+    store volatile i32 34, i32* %b, align 4, !tbaa !3
+    %c.0.c.0.c.0.c.0..sroa_cast = bitcast i32* %c to i8*
+    call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %c.0.c.0.c.0.c.0..sroa_cast)
+    store volatile i32 34, i32* %c, align 4, !tbaa !3
+    %b.0.b.0.b.0.b.0.29 = load volatile i32, i32* %b, align 4, !tbaa !3
+    %cmp30 = icmp sgt i32 %b.0.b.0.b.0.b.0.29, 0
+    br i1 %cmp30, label %for.body, label %if.end
+
+  for.cond.for.cond.cleanup_crit_edge:              ; preds = %for.body
+    %conv.i.le = sitofp i32 %a.0.a.0.a.0.a.0. to float
+    %0 = bitcast float %conv.i.le to i32
+    %phi.cmp = icmp ugt i32 %0, 100
+    br i1 %phi.cmp, label %if.then, label %if.end
+
+  for.body:                                         ; preds = %entry, %for.body
+    %i.031 = phi i32 [ %inc4, %for.body ], [ 0, %entry ]
+    %c.0.c.0.c.0.c.0. = load volatile i32, i32* %c, align 4, !tbaa !3
+    %inc = add nsw i32 %c.0.c.0.c.0.c.0., 1
+    store volatile i32 %inc, i32* %c, align 4, !tbaa !3
+    %call = tail call i32 @feclearexcept(i32 31) #5
+    %a.0.a.0.a.0.a.0. = load volatile i32, i32* %a, align 4, !tbaa !3
+    %call2 = tail call i32 @fetestexcept(i32 31) #5
+    %call3 = tail call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i32 %call2) #5
+    %inc4 = add nuw nsw i32 %i.031, 1
+    %b.0.b.0.b.0.b.0. = load volatile i32, i32* %b, align 4, !tbaa !3
+    %cmp = icmp slt i32 %inc4, %b.0.b.0.b.0.b.0.
+    br i1 %cmp, label %for.body, label %for.cond.for.cond.cleanup_crit_edge, !llvm.loop !7
+
+  if.then:                                          ; preds = %for.cond.for.cond.cleanup_crit_edge
+    %a.0.a.0.a.0.a.0.23 = load volatile i32, i32* %a, align 4, !tbaa !3
+    %b.0.b.0.b.0.b.0.20 = load volatile i32, i32* %b, align 4, !tbaa !3
+    %add = add nsw i32 %b.0.b.0.b.0.b.0.20, %a.0.a.0.a.0.a.0.23
+    %c.0.c.0.c.0.c.0.17 = load volatile i32, i32* %c, align 4, !tbaa !3
+    %add7 = add nsw i32 %add, %c.0.c.0.c.0.c.0.17
+    br label %cleanup
+
+  if.end:                                           ; preds = %entry, %for.cond.for.cond.cleanup_crit_edge
+    %a.0.a.0.a.0.a.0.24 = load volatile i32, i32* %a, align 4, !tbaa !3
+    %b.0.b.0.b.0.b.0.21 = load volatile i32, i32* %b, align 4, !tbaa !3
+    %mul.neg = mul i32 %b.0.b.0.b.0.b.0.21, -6
+    %sub = add i32 %mul.neg, %a.0.a.0.a.0.a.0.24
+    %c.0.c.0.c.0.c.0.18 = load volatile i32, i32* %c, align 4, !tbaa !3
+    %mul8 = mul nsw i32 %c.0.c.0.c.0.c.0.18, 3
+    %add9 = add nsw i32 %sub, %mul8
+    br label %cleanup
+
+  cleanup:                                          ; preds = %if.end, %if.then
+    %retval.0 = phi i32 [ %add7, %if.then ], [ %add9, %if.end ]
+    %1 = bitcast i32* %c to i8*
+    %2 = bitcast i32* %b to i8*
+    %3 = bitcast i32* %a to i8*
+    call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %1)
+    call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %2)
+    call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %3)
+    ret i32 %retval.0
+  }
+
+  declare dso_local i32 @feclearexcept(i32) local_unnamed_addr #3
+
+  declare dso_local i32 @fetestexcept(i32) local_unnamed_addr #3
+
+  ; Function Attrs: nofree nounwind
+  declare dso_local noundef i32 @printf(i8* nocapture noundef readonly, ...) local_unnamed_addr #4
+
+  attributes #0 = { mustprogress nofree norecurse nosync nounwind readnone willreturn "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv68" "target-features"="+v68,-long-calls" }
+  attributes #1 = { argmemonly mustprogress nofree nosync nounwind willreturn }
+  attributes #2 = { nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv68" "target-features"="+v68,-long-calls" }
+  attributes #3 = { "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv68" "target-features"="+v68,-long-calls" }
+  attributes #4 = { nofree nounwind "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv68" "target-features"="+v68,-long-calls" }
+  attributes #5 = { nounwind }
+
+  !llvm.module.flags = !{!0, !1}
+
+  !0 = !{i32 1, !"wchar_size", i32 4}
+  !1 = !{i32 7, !"frame-pointer", i32 2}
+  !3 = !{!4, !4, i64 0}
+  !4 = !{!"int", !5, i64 0}
+  !5 = !{!"omnipotent char", !6, i64 0}
+  !6 = !{!"Simple C/C++ TBAA"}
+  !7 = distinct !{!7, !8}
+  !8 = !{!"llvm.loop.mustprogress"}
+
+...
+---
+name:            syst_int32_to_float32
+alignment:       16
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+failedISel:      false
+tracksRegLiveness: true
+hasWinCFI:       false
+registers:
+  - { id: 0, class: intregs, preferred-register: '' }
+  - { id: 1, class: intregs, preferred-register: '' }
+liveins:
+  - { reg: '$r0', virtual-reg: '%0' }
+frameInfo:
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       0
+  offsetAdjustment: 0
+  maxAlignment:    1
+  adjustsStack:    false
+  hasCalls:        false
+  stackProtector:  ''
+  maxCallFrameSize: 4294967295
+  cvBytesOfCalleeSavedRegisters: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+  hasTailCall:     false
+  localFrameSize:  0
+  savePoint:       ''
+  restorePoint:    ''
+fixedStack:      []
+stack:           []
+callSites:       []
+debugValueSubstitutions: []
+constants:       []
+machineFunctionInfo: {}
+body:             |
+  bb.0.entry:
+    liveins: $r0
+
+    %0:intregs = COPY $r0
+    %1:intregs = F2_conv_w2sf %0, implicit $usr
+    $r0 = COPY %1
+    PS_jmpret $r31, implicit-def dead $pc, implicit $r0
+
+...
+---
+name:            main
+alignment:       16
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+failedISel:      false
+tracksRegLiveness: true
+hasWinCFI:       false
+registers:
+  - { id: 0, class: intregs, preferred-register: '' }
+  - { id: 1, class: intregs, preferred-register: '' }
+  - { id: 2, class: intregs, preferred-register: '' }
+  - { id: 3, class: intregs, preferred-register: '' }
+  - { id: 4, class: intregs, preferred-register: '' }
+  - { id: 5, class: intregs, preferred-register: '' }
+  - { id: 6, class: intregs, preferred-register: '' }
+  - { id: 7, class: intregs, preferred-register: '' }
+  - { id: 8, class: predregs, preferred-register: '' }
+  - { id: 9, class: intregs, preferred-register: '' }
+  - { id: 10, class: intregs, preferred-register: '' }
+  - { id: 11, class: intregs, preferred-register: '' }
+  - { id: 12, class: intregs, preferred-register: '' }
+  - { id: 13, class: intregs, preferred-register: '' }
+  - { id: 14, class: intregs, preferred-register: '' }
+  - { id: 15, class: intregs, preferred-register: '' }
+  - { id: 16, class: predregs, preferred-register: '' }
+  - { id: 17, class: intregs, preferred-register: '' }
+  - { id: 18, class: predregs, preferred-register: '' }
+  - { id: 19, class: intregs, preferred-register: '' }
+  - { id: 20, class: intregs, preferred-register: '' }
+  - { id: 21, class: intregs, preferred-register: '' }
+  - { id: 22, class: intregs, preferred-register: '' }
+  - { id: 23, class: intregs, preferred-register: '' }
+  - { id: 24, class: intregs, preferred-register: '' }
+  - { id: 25, class: intregs, preferred-register: '' }
+  - { id: 26, class: intregs, preferred-register: '' }
+  - { id: 27, class: intregs, preferred-register: '' }
+liveins:         []
+frameInfo:
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       0
+  offsetAdjustment: 0
+  maxAlignment:    4
+  adjustsStack:    false
+  hasCalls:        true
+  stackProtector:  ''
+  maxCallFrameSize: 4294967295
+  cvBytesOfCalleeSavedRegisters: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+  hasTailCall:     false
+  localFrameSize:  0
+  savePoint:       ''
+  restorePoint:    ''
+fixedStack:      []
+stack:
+  - { id: 0, name: a, type: default, offset: 0, size: 4, alignment: 4,
+      stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+  - { id: 1, name: b, type: default, offset: 0, size: 4, alignment: 4,
+      stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+  - { id: 2, name: c, type: default, offset: 0, size: 4, alignment: 4,
+      stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+callSites:       []
+debugValueSubstitutions: []
+constants:       []
+machineFunctionInfo: {}
+body:             |
+  bb.0.entry:
+    successors: %bb.6(0x50000000), %bb.4(0x30000000)
+
+    S4_storeiri_io %stack.0.a, 0, -16777235 :: (volatile store (s32) into %ir.a, !tbaa !3)
+    S4_storeiri_io %stack.1.b, 0, 34 :: (volatile store (s32) into %ir.b, !tbaa !3)
+    S4_storeiri_io %stack.2.c, 0, 34 :: (volatile store (s32) into %ir.c, !tbaa !3)
+    %7:intregs = L2_loadri_io %stack.1.b, 0 :: (volatile dereferenceable load (s32) from %ir.b, !tbaa !3)
+    %8:predregs = C2_cmpgti %7, 0
+    %6:intregs = A2_tfrsi 0
+    J2_jumpf %8, %bb.4, implicit-def $pc
+
+  bb.6:
+    successors: %bb.2(0x80000000)
+
+    %9:intregs = A2_tfrsi 31
+    %13:intregs = A2_tfrsi @.str
+    J2_jump %bb.2, implicit-def $pc
+
+  bb.1.for.cond.for.cond.cleanup_crit_edge:
+    successors: %bb.4(0x40000000)
+
+    J2_jump %bb.4, implicit-def dead $pc
+
+  bb.2.for.body:
+    successors: %bb.2(0x7c000000), %bb.1(0x04000000)
+
+    %0:intregs = PHI %6, %bb.6, %2, %bb.2
+    L4_iadd_memopw_io %stack.2.c, 0, 1 :: (volatile store (s32) into %ir.c, !tbaa !3), (volatile dereferenceable load (s32) from %ir.c, !tbaa !3)
+    ADJCALLSTACKDOWN 0, 0, implicit-def $r29, implicit-def dead $r30, implicit $r31, implicit $r30, implicit $r29
+    $r0 = COPY %9
+    J2_call @feclearexcept, hexagoncsr, implicit-def dead $pc, implicit-def dead $r31, implicit $r29, implicit $r0, implicit-def $r29, implicit-def $r0
+    ADJCALLSTACKUP 0, 0, implicit-def dead $r29, implicit-def dead $r30, implicit-def dead $r31, implicit $r29
+    %1:intregs = L2_loadri_io %stack.0.a, 0 :: (volatile dereferenceable load (s32) from %ir.a, !tbaa !3)
+    ADJCALLSTACKDOWN 0, 0, implicit-def $r29, implicit-def dead $r30, implicit $r31, implicit $r30, implicit $r29
+    %17:intregs = F2_conv_w2sf %1, implicit $usr
+    $r0 = COPY %9
+    J2_call @fetestexcept, hexagoncsr, implicit-def dead $pc, implicit-def dead $r31, implicit $r29, implicit $r0, implicit-def $r29, implicit-def $r0
+    ADJCALLSTACKUP 0, 0, implicit-def dead $r29, implicit-def dead $r30, implicit-def dead $r31, implicit $r29
+    %11:intregs = COPY $r0
+    %12:intregs = COPY $r29
+    S2_storeri_io %12, 0, %11 :: (store (s32) into stack)
+    ADJCALLSTACKDOWN 4, 0, implicit-def $r29, implicit-def dead $r30, implicit $r31, implicit $r30, implicit $r29
+    $r0 = COPY %13
+    J2_call @printf, hexagoncsr, implicit-def dead $pc, implicit-def dead $r31, implicit $r29, implicit $r0, implicit-def $r29, implicit-def $r0
+    ADJCALLSTACKUP 4, 0, implicit-def dead $r29, implicit-def dead $r30, implicit-def dead $r31, implicit $r29
+    %2:intregs = nuw nsw A2_addi %0, 1
+    %15:intregs = L2_loadri_io %stack.1.b, 0 :: (volatile dereferenceable load (s32) from %ir.b, !tbaa !3)
+    %16:predregs = C2_cmpgt %15, %2
+    J2_jumpt %16, %bb.2, implicit-def dead $pc
+    J2_jump %bb.1, implicit-def dead $pc
+
+  bb.3.if.then:
+    successors: %bb.5(0x80000000)
+
+    %18:predregs = C2_cmpgtui %17, 100
+    %24:intregs = L2_loadri_io %stack.0.a, 0 :: (volatile dereferenceable load (s32) from %ir.a, !tbaa !3)
+    %25:intregs = L2_loadri_io %stack.1.b, 0 :: (volatile dereferenceable load (s32) from %ir.b, !tbaa !3)
+    %26:intregs = L2_loadri_io %stack.2.c, 0 :: (volatile dereferenceable load (s32) from %ir.c, !tbaa !3)
+    %3:intregs = nsw M2_acci %26, %25, %24
+    J2_jumpf %18, %bb.5, implicit-def dead $pc
+    J2_jump %bb.5, implicit-def dead $pc
+
+  bb.4.if.end:
+    successors: %bb.5(0x80000000)
+
+    %19:intregs = L2_loadri_io %stack.0.a, 0 :: (volatile dereferenceable load (s32) from %ir.a, !tbaa !3)
+    %20:intregs = L2_loadri_io %stack.1.b, 0 :: (volatile dereferenceable load (s32) from %ir.b, !tbaa !3)
+    %27:intregs = M2_macsin %19, %20, 6
+    %23:intregs = L2_loadri_io %stack.2.c, 0 :: (volatile dereferenceable load (s32) from %ir.c, !tbaa !3)
+    %4:intregs = nsw M2_macsip %27, %23, 3
+
+  bb.5.cleanup:
+    %5:intregs = PHI %4, %bb.4, %3, %bb.3
+    $r0 = COPY %5
+    PS_jmpret $r31, implicit-def dead $pc, implicit $r0
+
+...


        


More information about the llvm-commits mailing list