[llvm] 4e9ae72 - [MachineCSE] Add a test with mutiple blocks between instructions.
Serguei Katkov via llvm-commits
llvm-commits at lists.llvm.org
Tue Apr 18 01:33:01 PDT 2023
Author: Serguei Katkov
Date: 2023-04-18T15:32:50+07:00
New Revision: 4e9ae7255b7afcda53551d6fa2e9fa3acf77b1c9
URL: https://github.com/llvm/llvm-project/commit/4e9ae7255b7afcda53551d6fa2e9fa3acf77b1c9
DIFF: https://github.com/llvm/llvm-project/commit/4e9ae7255b7afcda53551d6fa2e9fa3acf77b1c9.diff
LOG: [MachineCSE] Add a test with mutiple blocks between instructions.
Due to limitation that we consider only one alone predecessor
we cannot generate the code like
ucomiss
jp
ja
jne
Added:
llvm/test/CodeGen/X86/cse-two-preds.mir
Modified:
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/X86/cse-two-preds.mir b/llvm/test/CodeGen/X86/cse-two-preds.mir
new file mode 100644
index 000000000000..bc1bad5f3daf
--- /dev/null
+++ b/llvm/test/CodeGen/X86/cse-two-preds.mir
@@ -0,0 +1,210 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2
+# RUN: llc -mtriple=x86_64 -verify-machineinstrs --run-pass=machine-cse -o - %s | FileCheck %s
+--- |
+ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+
+ define float @max(float noundef %a, float noundef %b) #0 {
+ entry:
+ %U = fcmp uno float %a, %b
+ br i1 %U, label %UL, label %NU
+
+ NU: ; preds = %entry
+ %GT = fcmp ogt float %a, %b
+ br i1 %GT, label %EXIT, label %NGT
+
+ NGT: ; preds = %NU
+ %LT = fcmp one float %a, %b
+ br i1 %LT, label %EXIT, label %EQ
+
+ EQ: ; preds = %NGT
+ %bc = bitcast float %a to i32
+ %cmp = icmp slt i32 %bc, 0
+ %eq = select i1 %cmp, float %a, float %b
+ br label %EXIT
+
+ UL: ; preds = %entry
+ %AU = fcmp uno float %a, %a
+ br i1 %AU, label %EXIT, label %ULB
+
+ ULB: ; preds = %UL
+ br label %EXIT
+
+ EXIT: ; preds = %ULB, %UL, %EQ, %NGT, %NU
+ %res = phi float [ %a, %NU ], [ %b, %NGT ], [ %a, %UL ], [ %eq, %EQ ], [ %b, %ULB ]
+ ret float %res
+ }
+
+ attributes #0 = { "target-cpu"="skylake" }
+
+...
+---
+name: max
+alignment: 16
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+failedISel: false
+tracksRegLiveness: true
+hasWinCFI: false
+callsEHReturn: false
+callsUnwindInit: false
+hasEHCatchret: false
+hasEHScopes: false
+hasEHFunclets: false
+isOutlined: false
+debugInstrRef: true
+failsVerification: false
+tracksDebugUserValues: false
+registers:
+ - { id: 0, class: fr32, preferred-register: '' }
+ - { id: 1, class: fr32, preferred-register: '' }
+ - { id: 2, class: fr32, preferred-register: '' }
+ - { id: 3, class: fr32, preferred-register: '' }
+ - { id: 4, class: gr32, preferred-register: '' }
+liveins:
+ - { reg: '$xmm0', virtual-reg: '%2' }
+ - { reg: '$xmm1', virtual-reg: '%3' }
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 0
+ offsetAdjustment: 0
+ maxAlignment: 1
+ adjustsStack: false
+ hasCalls: false
+ stackProtector: ''
+ functionContext: ''
+ maxCallFrameSize: 4294967295
+ cvBytesOfCalleeSavedRegisters: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+ hasTailCall: false
+ localFrameSize: 0
+ savePoint: ''
+ restorePoint: ''
+fixedStack: []
+stack: []
+callSites: []
+debugValueSubstitutions: []
+constants: []
+machineFunctionInfo: {}
+body: |
+ ; CHECK-LABEL: name: max
+ ; CHECK: bb.0.entry:
+ ; CHECK-NEXT: successors: %bb.6(0x00000800), %bb.1(0x7ffff800)
+ ; CHECK-NEXT: liveins: $xmm0, $xmm1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:fr32 = COPY $xmm1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fr32 = COPY $xmm0
+ ; CHECK-NEXT: nofpexcept VUCOMISSrr [[COPY1]], [[COPY]], implicit-def $eflags, implicit $mxcsr
+ ; CHECK-NEXT: JCC_1 %bb.6, 10, implicit $eflags
+ ; CHECK-NEXT: JMP_1 %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1.NU:
+ ; CHECK-NEXT: successors: %bb.8(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: liveins: $eflags
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: JCC_1 %bb.8, 7, implicit $eflags
+ ; CHECK-NEXT: JMP_1 %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2.NGT:
+ ; CHECK-NEXT: successors: %bb.8(0x50000000), %bb.3(0x30000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: nofpexcept VUCOMISSrr [[COPY1]], [[COPY]], implicit-def $eflags, implicit $mxcsr
+ ; CHECK-NEXT: JCC_1 %bb.8, 5, implicit $eflags
+ ; CHECK-NEXT: JMP_1 %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3.EQ:
+ ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.5(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[VMOVSS2DIrr:%[0-9]+]]:gr32 = VMOVSS2DIrr [[COPY1]]
+ ; CHECK-NEXT: TEST32rr [[VMOVSS2DIrr]], [[VMOVSS2DIrr]], implicit-def $eflags
+ ; CHECK-NEXT: JCC_1 %bb.5, 8, implicit $eflags
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4.EQ:
+ ; CHECK-NEXT: successors: %bb.5(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.5.EQ:
+ ; CHECK-NEXT: successors: %bb.8(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI:%[0-9]+]]:fr32 = PHI [[COPY]], %bb.4, [[COPY1]], %bb.3
+ ; CHECK-NEXT: JMP_1 %bb.8
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.6.UL:
+ ; CHECK-NEXT: successors: %bb.8(0x00000800), %bb.7(0x7ffff800)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: nofpexcept VUCOMISSrr [[COPY1]], [[COPY1]], implicit-def $eflags, implicit $mxcsr
+ ; CHECK-NEXT: JCC_1 %bb.8, 10, implicit $eflags
+ ; CHECK-NEXT: JMP_1 %bb.7
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.7.ULB:
+ ; CHECK-NEXT: successors: %bb.8(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.8.EXIT:
+ ; CHECK-NEXT: [[PHI1:%[0-9]+]]:fr32 = PHI [[COPY1]], %bb.1, [[COPY]], %bb.2, [[PHI]], %bb.5, [[COPY1]], %bb.6, [[COPY]], %bb.7
+ ; CHECK-NEXT: $xmm0 = COPY [[PHI1]]
+ ; CHECK-NEXT: RET 0, $xmm0
+ bb.0.entry:
+ successors: %bb.4(0x00000800), %bb.1(0x7ffff800)
+ liveins: $xmm0, $xmm1
+
+ %3:fr32 = COPY $xmm1
+ %2:fr32 = COPY $xmm0
+ nofpexcept VUCOMISSrr %2, %3, implicit-def $eflags, implicit $mxcsr
+ JCC_1 %bb.4, 10, implicit $eflags
+ JMP_1 %bb.1
+
+ bb.1.NU:
+ successors: %bb.6(0x40000000), %bb.2(0x40000000)
+
+ nofpexcept VUCOMISSrr %2, %3, implicit-def $eflags, implicit $mxcsr
+ JCC_1 %bb.6, 7, implicit $eflags
+ JMP_1 %bb.2
+
+ bb.2.NGT:
+ successors: %bb.6(0x50000000), %bb.3(0x30000000)
+
+ nofpexcept VUCOMISSrr %2, %3, implicit-def $eflags, implicit $mxcsr
+ JCC_1 %bb.6, 5, implicit $eflags
+ JMP_1 %bb.3
+
+ bb.3.EQ:
+ successors: %bb.7(0x40000000), %bb.8(0x40000000)
+
+ %4:gr32 = VMOVSS2DIrr %2
+ TEST32rr %4, %4, implicit-def $eflags
+ JCC_1 %bb.8, 8, implicit $eflags
+
+ bb.7.EQ:
+ successors: %bb.8(0x80000000)
+
+
+ bb.8.EQ:
+ successors: %bb.6(0x80000000)
+
+ %0:fr32 = PHI %3, %bb.7, %2, %bb.3
+ JMP_1 %bb.6
+
+ bb.4.UL:
+ successors: %bb.6(0x00000800), %bb.5(0x7ffff800)
+
+ nofpexcept VUCOMISSrr %2, %2, implicit-def $eflags, implicit $mxcsr
+ JCC_1 %bb.6, 10, implicit $eflags
+ JMP_1 %bb.5
+
+ bb.5.ULB:
+ successors: %bb.6(0x80000000)
+
+
+ bb.6.EXIT:
+ %1:fr32 = PHI %2, %bb.1, %3, %bb.2, %0, %bb.8, %2, %bb.4, %3, %bb.5
+ $xmm0 = COPY %1
+ RET 0, $xmm0
+
+...
More information about the llvm-commits
mailing list