[llvm] Imprecise switch case (PR #82795)

Fri Feb 23 09:04:45 PST 2024

https://github.com/Acim-Maravic created https://github.com/llvm/llvm-project/pull/82795

Next step is to gather all blocks that can be safely merged be merged into 21 in order for me to create selects and to allow non constant values in helpers.

>From 24742e2a1c260bf44965c619cab613df8e0a5be2 Mon Sep 17 00:00:00 2001
From: Acim Maravic <acim.maravic at syrmia.com>
Date: Fri, 23 Feb 2024 16:47:20 +0100
Subject: [PATCH 1/2] [NFC][LLVM] Imprecise code for switch case lowering

Added tests that will show changes.
---
 .../SimplifyCFG/AMDGPU/switch-to-select.ll    | 317 ++++++++++++++++++
 1 file changed, 317 insertions(+)
 create mode 100644 llvm/test/Transforms/SimplifyCFG/AMDGPU/switch-to-select.ll

diff --git a/llvm/test/Transforms/SimplifyCFG/AMDGPU/switch-to-select.ll b/llvm/test/Transforms/SimplifyCFG/AMDGPU/switch-to-select.ll
new file mode 100644
index 00000000000000..c8fef79352e75b
--- /dev/null
+++ b/llvm/test/Transforms/SimplifyCFG/AMDGPU/switch-to-select.ll
@@ -0,0 +1,317 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -passes=simplifycfg < %s | FileCheck -check-prefix=ALL %s
+
+
+define float @SimpleTestTwoCasesAndDefault(<2 x float> noundef %PerspInterpCenter, i32 inreg noundef %PrimMask) {
+; ALL-LABEL: @SimpleTestTwoCasesAndDefault(
+; ALL-NEXT:  .entry:
+; ALL-NEXT:    [[PERSPINTERPCENTER_I1:%.*]] = extractelement <2 x float> [[PERSPINTERPCENTER:%.*]], i64 1
+; ALL-NEXT:    [[PERSPINTERPCENTER_I0:%.*]] = extractelement <2 x float> [[PERSPINTERPCENTER]], i64 0
+; ALL-NEXT:    [[TMP0:%.*]] = call float @llvm.amdgcn.interp.p1(float [[PERSPINTERPCENTER_I0]], i32 immarg 1, i32 immarg 0, i32 [[PRIMMASK:%.*]])
+; ALL-NEXT:    [[TMP1:%.*]] = call float @llvm.amdgcn.interp.p2(float [[TMP0]], float [[PERSPINTERPCENTER_I1]], i32 immarg 1, i32 immarg 0, i32 [[PRIMMASK]])
+; ALL-NEXT:    [[TMP2:%.*]] = fmul reassoc nnan nsz arcp contract afn float [[TMP1]], 3.000000e+00
+; ALL-NEXT:    [[TMP3:%.*]] = call reassoc nnan nsz arcp contract afn float @llvm.floor.f32(float [[TMP2]])
+; ALL-NEXT:    [[TMP4:%.*]] = fptosi float [[TMP3]] to i32
+; ALL-NEXT:    [[DOTFR:%.*]] = freeze i32 [[TMP4]]
+; ALL-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[DOTFR]], 1
+; ALL-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP12:%.*]]
+; ALL:       6:
+; ALL-NEXT:    [[TMP7:%.*]] = call float @llvm.amdgcn.interp.p1(float [[PERSPINTERPCENTER_I0]], i32 immarg 0, i32 immarg 0, i32 [[PRIMMASK]])
+; ALL-NEXT:    [[TMP8:%.*]] = call float @llvm.amdgcn.interp.p2(float [[TMP7]], float [[PERSPINTERPCENTER_I1]], i32 immarg 0, i32 immarg 0, i32 [[PRIMMASK]])
+; ALL-NEXT:    [[TMP9:%.*]] = fmul reassoc nnan nsz arcp contract afn float [[TMP8]], 4.000000e+00
+; ALL-NEXT:    [[TMP10:%.*]] = call reassoc nnan nsz arcp contract afn float @llvm.floor.f32(float [[TMP9]])
+; ALL-NEXT:    [[TMP11:%.*]] = fptosi float [[TMP10]] to i32
+; ALL-NEXT:    [[COND_FREEZE1:%.*]] = freeze i32 [[TMP11]]
+; ALL-NEXT:    [[SWITCH_SELECTCMP:%.*]] = icmp eq i32 [[COND_FREEZE1]], 1
+; ALL-NEXT:    [[SWITCH_SELECT:%.*]] = select i1 [[SWITCH_SELECTCMP]], float 2.000000e+00, float 4.000000e+00
+; ALL-NEXT:    [[SWITCH_SELECTCMP1:%.*]] = icmp eq i32 [[COND_FREEZE1]], 0
+; ALL-NEXT:    [[SWITCH_SELECT2:%.*]] = select i1 [[SWITCH_SELECTCMP1]], float 1.000000e+00, float [[SWITCH_SELECT]]
+; ALL-NEXT:    br label [[TMP12]]
+; ALL:       12:
+; ALL-NEXT:    [[SAMPLEPOS_1_I0:%.*]] = phi float [ 0.000000e+00, [[DOTENTRY:%.*]] ], [ [[SWITCH_SELECT2]], [[TMP6]] ]
+; ALL-NEXT:    ret float [[SAMPLEPOS_1_I0]]
+;
+.entry:
+  %PerspInterpCenter.i1 = extractelement <2 x float> %PerspInterpCenter, i64 1
+  %PerspInterpCenter.i0 = extractelement <2 x float> %PerspInterpCenter, i64 0
+  %0 = call float @llvm.amdgcn.interp.p1(float %PerspInterpCenter.i0, i32 immarg 1, i32 immarg 0, i32 %PrimMask) #1
+  %1 = call float @llvm.amdgcn.interp.p2(float %0, float %PerspInterpCenter.i1, i32 immarg 1, i32 immarg 0, i32 %PrimMask) #1
+  %2 = fmul reassoc nnan nsz arcp contract afn float %1, 3.000000e00
+  %3 = call reassoc nnan nsz arcp contract afn float @llvm.floor.f32(float %2)
+  %4 = fptosi float %3 to i32
+  %.fr = freeze i32 %4
+  %5 = icmp eq i32 %.fr, 1
+  br i1 %5, label %6, label %14
+
+6:                                                ; preds = %.entry
+  %7 = call float @llvm.amdgcn.interp.p1(float %PerspInterpCenter.i0, i32 immarg 0, i32 immarg 0, i32 %PrimMask) #1
+  %8 = call float @llvm.amdgcn.interp.p2(float %7, float %PerspInterpCenter.i1, i32 immarg 0, i32 immarg 0, i32 %PrimMask) #1
+  %9 = fmul reassoc nnan nsz arcp contract afn float %8, 4.000000e00
+  %10 = call reassoc nnan nsz arcp contract afn float @llvm.floor.f32(float %9)
+  %11 = fptosi float %10 to i32
+  %cond.freeze1 = freeze i32 %11
+  switch i32 %cond.freeze1, label %14 [
+  i32 0, label %12
+  i32 1, label %13
+  ]
+
+12:                                               ; preds = %6
+  br label %14
+
+13:                                               ; preds = %6
+  br label %14
+
+14:                                               ; preds = %12, %13, %6, %.entry
+  %samplePos.1.i0 = phi float [ 0.000000e00, %.entry ], [ 2.000000e00, %13 ], [ 1.000000e00, %12 ], [ 4.000000e00, %6 ]
+  ret float %samplePos.1.i0
+}
+
+define float @SimpleTestTwoCases(<2 x float> noundef %PerspInterpCenter, i32 inreg noundef %PrimMask) {
+; ALL-LABEL: @SimpleTestTwoCases(
+; ALL-NEXT:  .entry:
+; ALL-NEXT:    [[PERSPINTERPCENTER_I1:%.*]] = extractelement <2 x float> [[PERSPINTERPCENTER:%.*]], i64 1
+; ALL-NEXT:    [[PERSPINTERPCENTER_I0:%.*]] = extractelement <2 x float> [[PERSPINTERPCENTER]], i64 0
+; ALL-NEXT:    [[TMP0:%.*]] = call float @llvm.amdgcn.interp.p1(float [[PERSPINTERPCENTER_I0]], i32 immarg 1, i32 immarg 0, i32 [[PRIMMASK:%.*]])
+; ALL-NEXT:    [[TMP1:%.*]] = call float @llvm.amdgcn.interp.p2(float [[TMP0]], float [[PERSPINTERPCENTER_I1]], i32 immarg 1, i32 immarg 0, i32 [[PRIMMASK]])
+; ALL-NEXT:    [[TMP2:%.*]] = fmul reassoc nnan nsz arcp contract afn float [[TMP1]], 3.000000e+00
+; ALL-NEXT:    [[TMP3:%.*]] = call reassoc nnan nsz arcp contract afn float @llvm.floor.f32(float [[TMP2]])
+; ALL-NEXT:    [[TMP4:%.*]] = fptosi float [[TMP3]] to i32
+; ALL-NEXT:    [[DOTFR:%.*]] = freeze i32 [[TMP4]]
+; ALL-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[DOTFR]], 1
+; ALL-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP12:%.*]]
+; ALL:       6:
+; ALL-NEXT:    [[TMP7:%.*]] = call float @llvm.amdgcn.interp.p1(float [[PERSPINTERPCENTER_I0]], i32 immarg 0, i32 immarg 0, i32 [[PRIMMASK]])
+; ALL-NEXT:    [[TMP8:%.*]] = call float @llvm.amdgcn.interp.p2(float [[TMP7]], float [[PERSPINTERPCENTER_I1]], i32 immarg 0, i32 immarg 0, i32 [[PRIMMASK]])
+; ALL-NEXT:    [[TMP9:%.*]] = fmul reassoc nnan nsz arcp contract afn float [[TMP8]], 4.000000e+00
+; ALL-NEXT:    [[TMP10:%.*]] = call reassoc nnan nsz arcp contract afn float @llvm.floor.f32(float [[TMP9]])
+; ALL-NEXT:    [[TMP11:%.*]] = fptosi float [[TMP10]] to i32
+; ALL-NEXT:    [[COND_FREEZE1:%.*]] = freeze i32 [[TMP11]]
+; ALL-NEXT:    [[SWITCH_SELECTCMP:%.*]] = icmp eq i32 [[COND_FREEZE1]], 1
+; ALL-NEXT:    [[SWITCH_SELECT:%.*]] = select i1 [[SWITCH_SELECTCMP]], float 2.000000e+00, float 0.000000e+00
+; ALL-NEXT:    [[SWITCH_SELECTCMP1:%.*]] = icmp eq i32 [[COND_FREEZE1]], 0
+; ALL-NEXT:    [[SWITCH_SELECT2:%.*]] = select i1 [[SWITCH_SELECTCMP1]], float 1.000000e+00, float [[SWITCH_SELECT]]
+; ALL-NEXT:    br label [[TMP12]]
+; ALL:       12:
+; ALL-NEXT:    [[SAMPLEPOS_1_I0:%.*]] = phi float [ 0.000000e+00, [[DOTENTRY:%.*]] ], [ [[SWITCH_SELECT2]], [[TMP6]] ]
+; ALL-NEXT:    ret float [[SAMPLEPOS_1_I0]]
+;
+.entry:
+  %PerspInterpCenter.i1 = extractelement <2 x float> %PerspInterpCenter, i64 1
+  %PerspInterpCenter.i0 = extractelement <2 x float> %PerspInterpCenter, i64 0
+  %0 = call float @llvm.amdgcn.interp.p1(float %PerspInterpCenter.i0, i32 immarg 1, i32 immarg 0, i32 %PrimMask) #1
+  %1 = call float @llvm.amdgcn.interp.p2(float %0, float %PerspInterpCenter.i1, i32 immarg 1, i32 immarg 0, i32 %PrimMask) #1
+  %2 = fmul reassoc nnan nsz arcp contract afn float %1, 3.000000e00
+  %3 = call reassoc nnan nsz arcp contract afn float @llvm.floor.f32(float %2)
+  %4 = fptosi float %3 to i32
+  %.fr = freeze i32 %4
+  %5 = icmp eq i32 %.fr, 1
+  br i1 %5, label %6, label %14
+
+6:                                                ; preds = %.entry
+  %7 = call float @llvm.amdgcn.interp.p1(float %PerspInterpCenter.i0, i32 immarg 0, i32 immarg 0, i32 %PrimMask) #1
+  %8 = call float @llvm.amdgcn.interp.p2(float %7, float %PerspInterpCenter.i1, i32 immarg 0, i32 immarg 0, i32 %PrimMask) #1
+  %9 = fmul reassoc nnan nsz arcp contract afn float %8, 4.000000e00
+  %10 = call reassoc nnan nsz arcp contract afn float @llvm.floor.f32(float %9)
+  %11 = fptosi float %10 to i32
+  %cond.freeze1 = freeze i32 %11
+  switch i32 %cond.freeze1, label %14 [
+  i32 0, label %12
+  i32 1, label %13
+  ]
+
+12:                                               ; preds = %6
+  br label %14
+
+13:                                               ; preds = %6
+  br label %14
+
+14:                                               ; preds = %6, %12, %13, %.entry
+  %samplePos.1.i0 = phi float [ 0.000000e00, %.entry ], [ 0.000000e00, %6 ], [ 2.000000e00, %13 ], [ 1.000000e00, %12 ]
+  ret float %samplePos.1.i0
+}
+
+
+
+define float @SimpleTestSwitch(<2 x float> noundef %PerspInterpCenter, i32 inreg noundef %PrimMask) {
+; ALL-LABEL: @SimpleTestSwitch(
+; ALL-NEXT:  .entry:
+; ALL-NEXT:    [[PERSPINTERPCENTER_I1:%.*]] = extractelement <2 x float> [[PERSPINTERPCENTER:%.*]], i64 1
+; ALL-NEXT:    [[PERSPINTERPCENTER_I0:%.*]] = extractelement <2 x float> [[PERSPINTERPCENTER]], i64 0
+; ALL-NEXT:    [[TMP0:%.*]] = call float @llvm.amdgcn.interp.p1(float [[PERSPINTERPCENTER_I0]], i32 immarg 1, i32 immarg 0, i32 [[PRIMMASK:%.*]])
+; ALL-NEXT:    [[TMP1:%.*]] = call float @llvm.amdgcn.interp.p2(float [[TMP0]], float [[PERSPINTERPCENTER_I1]], i32 immarg 1, i32 immarg 0, i32 [[PRIMMASK]])
+; ALL-NEXT:    [[TMP2:%.*]] = fmul reassoc nnan nsz arcp contract afn float [[TMP1]], 3.000000e+00
+; ALL-NEXT:    [[TMP3:%.*]] = call reassoc nnan nsz arcp contract afn float @llvm.floor.f32(float [[TMP2]])
+; ALL-NEXT:    [[TMP4:%.*]] = fptosi float [[TMP3]] to i32
+; ALL-NEXT:    [[DOTFR:%.*]] = freeze i32 [[TMP4]]
+; ALL-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[DOTFR]], 1
+; ALL-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP16:%.*]]
+; ALL:       6:
+; ALL-NEXT:    [[TMP7:%.*]] = call float @llvm.amdgcn.interp.p1(float [[PERSPINTERPCENTER_I0]], i32 immarg 0, i32 immarg 0, i32 [[PRIMMASK]])
+; ALL-NEXT:    [[TMP8:%.*]] = call float @llvm.amdgcn.interp.p2(float [[TMP7]], float [[PERSPINTERPCENTER_I1]], i32 immarg 0, i32 immarg 0, i32 [[PRIMMASK]])
+; ALL-NEXT:    [[TMP9:%.*]] = fmul reassoc nnan nsz arcp contract afn float [[TMP8]], 4.000000e+00
+; ALL-NEXT:    [[TMP10:%.*]] = call reassoc nnan nsz arcp contract afn float @llvm.floor.f32(float [[TMP9]])
+; ALL-NEXT:    [[TMP11:%.*]] = fptosi float [[TMP10]] to i32
+; ALL-NEXT:    [[COND_FREEZE1:%.*]] = freeze i32 [[TMP11]]
+; ALL-NEXT:    switch i32 [[COND_FREEZE1]], label [[TMP16]] [
+; ALL-NEXT:      i32 0, label [[TMP12:%.*]]
+; ALL-NEXT:      i32 1, label [[TMP13:%.*]]
+; ALL-NEXT:      i32 2, label [[TMP14:%.*]]
+; ALL-NEXT:      i32 3, label [[TMP15:%.*]]
+; ALL-NEXT:    ]
+; ALL:       12:
+; ALL-NEXT:    br label [[TMP16]]
+; ALL:       13:
+; ALL-NEXT:    br label [[TMP16]]
+; ALL:       14:
+; ALL-NEXT:    br label [[TMP16]]
+; ALL:       15:
+; ALL-NEXT:    br label [[TMP16]]
+; ALL:       16:
+; ALL-NEXT:    [[SAMPLEPOS_1_I0:%.*]] = phi float [ 0.000000e+00, [[DOTENTRY:%.*]] ], [ 0.000000e+00, [[TMP6]] ], [ 4.000000e+00, [[TMP15]] ], [ 3.000000e+00, [[TMP14]] ], [ 2.000000e+00, [[TMP13]] ], [ 1.000000e+00, [[TMP12]] ]
+; ALL-NEXT:    ret float [[SAMPLEPOS_1_I0]]
+;
+.entry:
+  %PerspInterpCenter.i1 = extractelement <2 x float> %PerspInterpCenter, i64 1
+  %PerspInterpCenter.i0 = extractelement <2 x float> %PerspInterpCenter, i64 0
+  %0 = call float @llvm.amdgcn.interp.p1(float %PerspInterpCenter.i0, i32 immarg 1, i32 immarg 0, i32 %PrimMask) #1
+  %1 = call float @llvm.amdgcn.interp.p2(float %0, float %PerspInterpCenter.i1, i32 immarg 1, i32 immarg 0, i32 %PrimMask) #1
+  %2 = fmul reassoc nnan nsz arcp contract afn float %1, 3.000000e00
+  %3 = call reassoc nnan nsz arcp contract afn float @llvm.floor.f32(float %2)
+  %4 = fptosi float %3 to i32
+  %.fr = freeze i32 %4
+  %5 = icmp eq i32 %.fr, 1
+  br i1 %5, label %6, label %16
+
+6:                                                ; preds = %.entry
+  %7 = call float @llvm.amdgcn.interp.p1(float %PerspInterpCenter.i0, i32 immarg 0, i32 immarg 0, i32 %PrimMask) #1
+  %8 = call float @llvm.amdgcn.interp.p2(float %7, float %PerspInterpCenter.i1, i32 immarg 0, i32 immarg 0, i32 %PrimMask) #1
+  %9 = fmul reassoc nnan nsz arcp contract afn float %8, 4.000000e00
+  %10 = call reassoc nnan nsz arcp contract afn float @llvm.floor.f32(float %9)
+  %11 = fptosi float %10 to i32
+  %cond.freeze1 = freeze i32 %11
+  switch i32 %cond.freeze1, label %16 [
+  i32 0, label %12
+  i32 1, label %13
+  i32 2, label %14
+  i32 3, label %15
+  ]
+
+12:                                               ; preds = %6
+  br label %16
+
+13:                                               ; preds = %6
+  br label %16
+
+14:                                               ; preds = %6
+  br label %16
+
+15:                                               ; preds = %6
+  br label %16
+
+16:                                               ; preds = %6, %12, %13, %14, %15, %.entry
+  %samplePos.1.i0 = phi float [ 0.000000e00, %.entry ], [ 0.000000e00, %6 ], [ 4.000000e00, %15 ], [ 3.000000e00, %14 ], [ 2.000000e00, %13 ], [ 1.000000e00, %12 ]
+  ret float %samplePos.1.i0
+}
+
+define float @LittleMoreComplexSwitch(<2 x float> noundef %PerspInterpCenter, i32 inreg noundef %PrimMask) {
+; ALL-LABEL: @LittleMoreComplexSwitch(
+; ALL-NEXT:  .entry:
+; ALL-NEXT:    [[PERSPINTERPCENTER_I1:%.*]] = extractelement <2 x float> [[PERSPINTERPCENTER:%.*]], i64 1
+; ALL-NEXT:    [[PERSPINTERPCENTER_I0:%.*]] = extractelement <2 x float> [[PERSPINTERPCENTER]], i64 0
+; ALL-NEXT:    [[TMP0:%.*]] = call float @llvm.amdgcn.interp.p1(float [[PERSPINTERPCENTER_I0]], i32 immarg 1, i32 immarg 0, i32 [[PRIMMASK:%.*]])
+; ALL-NEXT:    [[TMP1:%.*]] = call float @llvm.amdgcn.interp.p2(float [[TMP0]], float [[PERSPINTERPCENTER_I1]], i32 immarg 1, i32 immarg 0, i32 [[PRIMMASK]])
+; ALL-NEXT:    [[TMP2:%.*]] = fmul reassoc nnan nsz arcp contract afn float [[TMP1]], 3.000000e+00
+; ALL-NEXT:    [[TMP3:%.*]] = call reassoc nnan nsz arcp contract afn float @llvm.floor.f32(float [[TMP2]])
+; ALL-NEXT:    [[TMP4:%.*]] = fptosi float [[TMP3]] to i32
+; ALL-NEXT:    [[DOTFR:%.*]] = freeze i32 [[TMP4]]
+; ALL-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[DOTFR]], 1
+; ALL-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP21:%.*]]
+; ALL:       6:
+; ALL-NEXT:    [[TMP7:%.*]] = call float @llvm.amdgcn.interp.p1(float [[PERSPINTERPCENTER_I0]], i32 immarg 0, i32 immarg 0, i32 [[PRIMMASK]])
+; ALL-NEXT:    [[TMP8:%.*]] = call float @llvm.amdgcn.interp.p2(float [[TMP7]], float [[PERSPINTERPCENTER_I1]], i32 immarg 0, i32 immarg 0, i32 [[PRIMMASK]])
+; ALL-NEXT:    [[TMP9:%.*]] = fmul reassoc nnan nsz arcp contract afn float [[TMP8]], 4.000000e+00
+; ALL-NEXT:    [[TMP10:%.*]] = call reassoc nnan nsz arcp contract afn float @llvm.floor.f32(float [[TMP9]])
+; ALL-NEXT:    [[TMP11:%.*]] = fptosi float [[TMP10]] to i32
+; ALL-NEXT:    [[COND_FREEZE1:%.*]] = freeze i32 [[TMP11]]
+; ALL-NEXT:    [[TMP12:%.*]] = sitofp i32 [[COND_FREEZE1]] to float
+; ALL-NEXT:    [[TMP13:%.*]] = fsub reassoc nnan nsz arcp contract afn float [[TMP9]], [[TMP12]]
+; ALL-NEXT:    [[SCALE_I0:%.*]] = fmul reassoc nnan nsz arcp contract afn float [[TMP13]], 2.000000e+00
+; ALL-NEXT:    [[TMP14:%.*]] = fmul reassoc nnan nsz arcp contract afn float [[TMP1]], 6.000000e+00
+; ALL-NEXT:    [[DOTI0:%.*]] = fadd reassoc nnan nsz arcp contract afn float [[SCALE_I0]], -1.000000e+00
+; ALL-NEXT:    [[DOTI1:%.*]] = fadd reassoc nnan nsz arcp contract afn float [[TMP14]], -3.000000e+00
+; ALL-NEXT:    switch i32 [[COND_FREEZE1]], label [[TMP21]] [
+; ALL-NEXT:      i32 0, label [[TMP15:%.*]]
+; ALL-NEXT:      i32 1, label [[TMP16:%.*]]
+; ALL-NEXT:      i32 2, label [[TMP17:%.*]]
+; ALL-NEXT:      i32 3, label [[TMP19:%.*]]
+; ALL-NEXT:    ]
+; ALL:       15:
+; ALL-NEXT:    br label [[TMP21]]
+; ALL:       16:
+; ALL-NEXT:    br label [[TMP21]]
+; ALL:       17:
+; ALL-NEXT:    [[TMP18:%.*]] = fneg reassoc nnan nsz arcp contract afn float [[DOTI0]]
+; ALL-NEXT:    br label [[TMP21]]
+; ALL:       19:
+; ALL-NEXT:    [[TMP20:%.*]] = fneg reassoc nnan nsz arcp contract afn float [[DOTI0]]
+; ALL-NEXT:    br label [[TMP21]]
+; ALL:       21:
+; ALL-NEXT:    [[SAMPLEPOS_1_I0:%.*]] = phi float [ 0.000000e+00, [[DOTENTRY:%.*]] ], [ 0.000000e+00, [[TMP6]] ], [ [[TMP20]], [[TMP19]] ], [ 1.000000e+00, [[TMP17]] ], [ [[DOTI0]], [[TMP16]] ], [ 1.000000e+00, [[TMP15]] ]
+; ALL-NEXT:    [[SAMPLEPOS_1_I1:%.*]] = phi float [ 0.000000e+00, [[DOTENTRY]] ], [ 0.000000e+00, [[TMP6]] ], [ [[DOTI1]], [[TMP19]] ], [ [[DOTI1]], [[TMP17]] ], [ [[DOTI1]], [[TMP16]] ], [ 1.000000e+00, [[TMP15]] ]
+; ALL-NEXT:    [[SAMPLEPOS_1_I2:%.*]] = phi float [ 0.000000e+00, [[DOTENTRY]] ], [ 0.000000e+00, [[TMP6]] ], [ -1.000000e+00, [[TMP19]] ], [ [[TMP18]], [[TMP17]] ], [ 1.000000e+00, [[TMP16]] ], [ 1.000000e+00, [[TMP15]] ]
+; ALL-NEXT:    [[TMP22:%.*]] = call reassoc nnan nsz arcp contract afn float @llvm.amdgcn.cubesc(float [[SAMPLEPOS_1_I0]], float [[SAMPLEPOS_1_I1]], float [[SAMPLEPOS_1_I2]])
+; ALL-NEXT:    ret float [[TMP22]]
+;
+  .entry:
+  %PerspInterpCenter.i1 = extractelement <2 x float> %PerspInterpCenter, i64 1
+  %PerspInterpCenter.i0 = extractelement <2 x float> %PerspInterpCenter, i64 0
+  %0 = call float @llvm.amdgcn.interp.p1(float %PerspInterpCenter.i0, i32 immarg 1, i32 immarg 0, i32 %PrimMask) #1
+  %1 = call float @llvm.amdgcn.interp.p2(float %0, float %PerspInterpCenter.i1, i32 immarg 1, i32 immarg 0, i32 %PrimMask) #1
+  %2 = fmul reassoc nnan nsz arcp contract afn float %1, 3.000000e+00
+  %3 = call reassoc nnan nsz arcp contract afn float @llvm.floor.f32(float %2)
+  %4 = fptosi float %3 to i32
+  %.fr = freeze i32 %4
+  %5 = icmp eq i32 %.fr, 1
+  br i1 %5, label %6, label %21
+
+6:                                                ; preds = %.entry
+  %7 = call float @llvm.amdgcn.interp.p1(float %PerspInterpCenter.i0, i32 immarg 0, i32 immarg 0, i32 %PrimMask) #1
+  %8 = call float @llvm.amdgcn.interp.p2(float %7, float %PerspInterpCenter.i1, i32 immarg 0, i32 immarg 0, i32 %PrimMask) #1
+  %9 = fmul reassoc nnan nsz arcp contract afn float %8, 4.000000e+00
+  %10 = call reassoc nnan nsz arcp contract afn float @llvm.floor.f32(float %9)
+  %11 = fptosi float %10 to i32
+  %cond.freeze1 = freeze i32 %11
+  %12 = sitofp i32 %cond.freeze1 to float
+  %13 = fsub reassoc nnan nsz arcp contract afn float %9, %12
+  %scale.i0 = fmul reassoc nnan nsz arcp contract afn float %13, 2.000000e+00
+  %14 = fmul reassoc nnan nsz arcp contract afn float %1, 6.000000e+00
+  %.i0 = fadd reassoc nnan nsz arcp contract afn float %scale.i0, -1.000000e+00
+  %.i1 = fadd reassoc nnan nsz arcp contract afn float %14, -3.000000e+00
+  switch i32 %cond.freeze1, label %21 [
+  i32 0, label %15
+  i32 1, label %16
+  i32 2, label %17
+  i32 3, label %19
+  ]
+
+15:                                               ; preds = %6
+  br label %21
+
+16:                                               ; preds = %6
+  br label %21
+
+17:                                               ; preds = %6
+  %18 = fneg reassoc nnan nsz arcp contract afn float %.i0
+  br label %21
+
+19:                                               ; preds = %6
+  %20 = fneg reassoc nnan nsz arcp contract afn float %.i0
+  br label %21
+
+21:                                               ; preds = %6, %15, %16, %17, %19, %.entry
+  %samplePos.1.i0 = phi float [ 0.000000e+00, %.entry ], [ 0.000000e+00, %6 ], [ %20, %19 ], [ 1.000000e+00, %17 ], [ %.i0, %16 ], [ 1.000000e+00, %15 ]
+  %samplePos.1.i1 = phi float [ 0.000000e+00, %.entry ], [ 0.000000e+00, %6 ], [ %.i1, %19 ], [ %.i1, %17 ], [ %.i1, %16 ], [ 1.000000e+00, %15 ]
+  %samplePos.1.i2 = phi float [ 0.000000e+00, %.entry ], [ 0.000000e+00, %6 ], [ -1.000000e+00, %19 ], [ %18, %17 ], [ 1.000000e+00, %16 ], [ 1.000000e+00, %15 ]
+  %22 = call reassoc nnan nsz arcp contract afn float @llvm.amdgcn.cubesc(float %samplePos.1.i0, float %samplePos.1.i1, float %samplePos.1.i2)
+  ret float %22
+}

>From c8f92644ca8fa05f2f63198caa9cef6339b982d7 Mon Sep 17 00:00:00 2001
From: Acim Maravic <acim.maravic at syrmia.com>
Date: Fri, 23 Feb 2024 17:56:15 +0100
Subject: [PATCH 2/2] [LLVM] Imprecise code for switch case lowering

I have added suport for simple switch cases where all values in the PHI
node are constants, so such a switch now can be lowered into cmp + select
---
 .../Transforms/Utils/SimplifyCFGOptions.h     |  5 ++
 llvm/lib/Passes/PassBuilder.cpp               |  2 +
 llvm/lib/Passes/PassRegistry.def              |  1 +
 .../lib/Transforms/Scalar/SimplifyCFGPass.cpp |  6 ++
 llvm/lib/Transforms/Utils/SimplifyCFG.cpp     | 63 ++++++++++++++++---
 .../SimplifyCFG/AMDGPU/switch-to-select.ll    | 29 ++++-----
 6 files changed, 81 insertions(+), 25 deletions(-)

diff --git a/llvm/include/llvm/Transforms/Utils/SimplifyCFGOptions.h b/llvm/include/llvm/Transforms/Utils/SimplifyCFGOptions.h
index 8008fc6e8422d3..cb3ef663408153 100644
--- a/llvm/include/llvm/Transforms/Utils/SimplifyCFGOptions.h
+++ b/llvm/include/llvm/Transforms/Utils/SimplifyCFGOptions.h
@@ -30,6 +30,7 @@ struct SimplifyCFGOptions {
   bool SinkCommonInsts = false;
   bool SimplifyCondBranch = true;
   bool SpeculateBlocks = true;
+  bool ConvertSwitchToSelect = false;
 
   AssumptionCache *AC = nullptr;
 
@@ -46,6 +47,10 @@ struct SimplifyCFGOptions {
     ConvertSwitchRangeToICmp = B;
     return *this;
   }
+  SimplifyCFGOptions &convertSwitchToSelect(bool B) {
+    ConvertSwitchToSelect = B;
+    return *this;
+  }
   SimplifyCFGOptions &convertSwitchToLookupTable(bool B) {
     ConvertSwitchToLookupTable = B;
     return *this;
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index c934ec42f6eb15..360dc4da3ca91d 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -819,6 +819,8 @@ Expected<SimplifyCFGOptions> parseSimplifyCFGOptions(StringRef Params) {
       Result.forwardSwitchCondToPhi(Enable);
     } else if (ParamName == "switch-range-to-icmp") {
       Result.convertSwitchRangeToICmp(Enable);
+    } else if (ParamName == "switch-to-select") {
+      Result.convertSwitchToSelect(Enable);
     } else if (ParamName == "switch-to-lookup") {
       Result.convertSwitchToLookupTable(Enable);
     } else if (ParamName == "keep-loops") {
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index 44511800ccff8d..a3783afdf3be68 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -548,6 +548,7 @@ FUNCTION_PASS_WITH_PARAMS(
     "no-forward-switch-cond;forward-switch-cond;no-switch-range-to-icmp;"
     "switch-range-to-icmp;no-switch-to-lookup;switch-to-lookup;no-keep-loops;"
     "keep-loops;no-hoist-common-insts;hoist-common-insts;no-sink-common-insts;"
+    "switch-to-select;"
     "sink-common-insts;bonus-inst-threshold=N")
 FUNCTION_PASS_WITH_PARAMS(
     "speculative-execution", "SpeculativeExecutionPass",
diff --git a/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index 7017f6adf3a2bb..eafedef2af7d2e 100644
--- a/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -61,6 +61,10 @@ static cl::opt<bool> UserSwitchRangeToICmp(
     cl::desc(
         "Convert switches into an integer range comparison (default = false)"));
 
+static cl::opt<bool> UserSwitchToSelect(
+    "switch-to-select", cl::Hidden, cl::init(false),
+    cl::desc("Convert switches into icmp + select (default = false)"));
+
 static cl::opt<bool> UserSwitchToLookup(
     "switch-to-lookup", cl::Hidden, cl::init(false),
     cl::desc("Convert switches to lookup tables (default = false)"));
@@ -323,6 +327,8 @@ static void applyCommandLineOverridesToOptions(SimplifyCFGOptions &Options) {
     Options.HoistCommonInsts = UserHoistCommonInsts;
   if (UserSinkCommonInsts.getNumOccurrences())
     Options.SinkCommonInsts = UserSinkCommonInsts;
+  if (UserSwitchToSelect.getNumOccurrences())
+    Options.ConvertSwitchToSelect = UserSwitchToSelect;
 }
 
 SimplifyCFGPass::SimplifyCFGPass() {
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 254795ec244534..3572c2fe68a538 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -5916,7 +5916,7 @@ static bool initializeUniqueCases(SwitchInst *SI, PHINode *&PHI,
       return false;
 
     // Only one value per case is permitted.
-    if (Results.size() > 1)
+    if (Results.size() > 3) // How many PHI instructions are hendled
       return false;
 
     // Add the case->result mapping to UniqueResults.
@@ -5953,12 +5953,31 @@ static bool initializeUniqueCases(SwitchInst *SI, PHINode *&PHI,
   return true;
 }
 
+Value *createSelectChain(Value *Condition, Constant *DefaultResult,
+                         const SwitchCaseResultVectorTy &ResultVector,
+                         unsigned StartIndex, IRBuilder<> &Builder) {
+  if (StartIndex >= ResultVector.size() && DefaultResult) {
+    return DefaultResult;
+  }
+
+  ConstantInt *CurrentCase = ResultVector[StartIndex].second[0];
+  Value *ValueCompare =
+      Builder.CreateICmpEQ(Condition, CurrentCase, "switch.selectcmp");
+
+  Value *NextSelect = createSelectChain(Condition, DefaultResult, ResultVector,
+                                        StartIndex + 1, Builder);
+
+  return Builder.CreateSelect(ValueCompare, ResultVector[StartIndex].first,
+                              NextSelect, "switch.select");
+}
+
 // Helper function that checks if it is possible to transform a switch with only
 // two cases (or two cases + default) that produces a result into a select.
 // TODO: Handle switches with more than 2 cases that map to the same result.
 static Value *foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector,
                                  Constant *DefaultResult, Value *Condition,
-                                 IRBuilder<> &Builder) {
+                                 IRBuilder<> &Builder,
+                                 bool IsComplexSwitchTransform = false) {
   // If we are selecting between only two cases transform into a simple
   // select or a two-way select if default is possible.
   // Example:
@@ -5967,6 +5986,22 @@ static Value *foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector,
   //   case 20: return 2;   ---->  %2 = icmp eq i32 %a, 20
   //   default: return 4;          %3 = select i1 %2, i32 2, i32 %1
   // }
+
+  if (IsComplexSwitchTransform) {
+    bool IsSizeOkay = true;
+
+    for (int i = 0; i < ResultVector.size(); i++)
+      if (ResultVector[i].second.size() != 1)
+        IsSizeOkay = false;
+
+    if (IsSizeOkay && ResultVector.size() > 2) {
+      Value *FinalSelect =
+          createSelectChain(Condition, DefaultResult, ResultVector, 0, Builder);
+      if (FinalSelect)
+        return FinalSelect;
+    }
+  }
+
   if (ResultVector.size() == 2 && ResultVector[0].second.size() == 1 &&
       ResultVector[1].second.size() == 1) {
     ConstantInt *FirstCase = ResultVector[0].second[0];
@@ -6071,21 +6106,29 @@ static void removeSwitchAfterSelectFold(SwitchInst *SI, PHINode *PHI,
 /// switch with a select. Returns true if the fold was made.
 static bool trySwitchToSelect(SwitchInst *SI, IRBuilder<> &Builder,
                               DomTreeUpdater *DTU, const DataLayout &DL,
-                              const TargetTransformInfo &TTI) {
+                              const TargetTransformInfo &TTI,
+                              bool IsComplexSwitchTransform = false) {
   Value *const Cond = SI->getCondition();
   PHINode *PHI = nullptr;
   BasicBlock *CommonDest = nullptr;
   Constant *DefaultResult;
   SwitchCaseResultVectorTy UniqueResults;
   // Collect all the cases that will deliver the same value from the switch.
-  if (!initializeUniqueCases(SI, PHI, CommonDest, UniqueResults, DefaultResult,
-                             DL, TTI, /*MaxUniqueResults*/ 2))
+  if (!initializeUniqueCases(
+          SI, PHI, CommonDest, UniqueResults, DefaultResult, DL, TTI,
+          /*MaxUniqueResults*/ 7)) // I think that the next step is to expand
+                                   // this function to return a list of basic
+                                   // blocks that can be merged
     return false;
 
   assert(PHI != nullptr && "PHI for value select not found");
   Builder.SetInsertPoint(SI);
-  Value *SelectValue =
-      foldSwitchToSelect(UniqueResults, DefaultResult, Cond, Builder);
+  Value *SelectValue = foldSwitchToSelect(
+      UniqueResults, DefaultResult, Cond, Builder,
+      IsComplexSwitchTransform); //  Afterwards this function should just merge
+                                 //  these blocks with the predaccessor of the
+                                 //  switch. Also, UniqueResults would no longer
+                                 //  be just constants
   if (!SelectValue)
     return false;
 
@@ -7028,7 +7071,11 @@ bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
   if (eliminateDeadSwitchCases(SI, DTU, Options.AC, DL))
     return requestResimplify();
 
-  if (trySwitchToSelect(SI, Builder, DTU, DL, TTI))
+  bool IsSwitchToSelect = false;
+  if (Options.ConvertSwitchToSelect)
+    IsSwitchToSelect = true;
+
+  if (trySwitchToSelect(SI, Builder, DTU, DL, TTI, IsSwitchToSelect))
     return requestResimplify();
 
   if (Options.ForwardSwitchCondToPhi && ForwardSwitchConditionToPHI(SI))
diff --git a/llvm/test/Transforms/SimplifyCFG/AMDGPU/switch-to-select.ll b/llvm/test/Transforms/SimplifyCFG/AMDGPU/switch-to-select.ll
index c8fef79352e75b..fa783283b881f1 100644
--- a/llvm/test/Transforms/SimplifyCFG/AMDGPU/switch-to-select.ll
+++ b/llvm/test/Transforms/SimplifyCFG/AMDGPU/switch-to-select.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -S -passes=simplifycfg < %s | FileCheck -check-prefix=ALL %s
+; RUN: opt -S -passes=simplifycfg -switch-to-select < %s | FileCheck -check-prefix=ALL %s
 
 
 define float @SimpleTestTwoCasesAndDefault(<2 x float> noundef %PerspInterpCenter, i32 inreg noundef %PrimMask) {
@@ -144,7 +144,7 @@ define float @SimpleTestSwitch(<2 x float> noundef %PerspInterpCenter, i32 inreg
 ; ALL-NEXT:    [[TMP4:%.*]] = fptosi float [[TMP3]] to i32
 ; ALL-NEXT:    [[DOTFR:%.*]] = freeze i32 [[TMP4]]
 ; ALL-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[DOTFR]], 1
-; ALL-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP16:%.*]]
+; ALL-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP12:%.*]]
 ; ALL:       6:
 ; ALL-NEXT:    [[TMP7:%.*]] = call float @llvm.amdgcn.interp.p1(float [[PERSPINTERPCENTER_I0]], i32 immarg 0, i32 immarg 0, i32 [[PRIMMASK]])
 ; ALL-NEXT:    [[TMP8:%.*]] = call float @llvm.amdgcn.interp.p2(float [[TMP7]], float [[PERSPINTERPCENTER_I1]], i32 immarg 0, i32 immarg 0, i32 [[PRIMMASK]])
@@ -152,22 +152,17 @@ define float @SimpleTestSwitch(<2 x float> noundef %PerspInterpCenter, i32 inreg
 ; ALL-NEXT:    [[TMP10:%.*]] = call reassoc nnan nsz arcp contract afn float @llvm.floor.f32(float [[TMP9]])
 ; ALL-NEXT:    [[TMP11:%.*]] = fptosi float [[TMP10]] to i32
 ; ALL-NEXT:    [[COND_FREEZE1:%.*]] = freeze i32 [[TMP11]]
-; ALL-NEXT:    switch i32 [[COND_FREEZE1]], label [[TMP16]] [
-; ALL-NEXT:      i32 0, label [[TMP12:%.*]]
-; ALL-NEXT:      i32 1, label [[TMP13:%.*]]
-; ALL-NEXT:      i32 2, label [[TMP14:%.*]]
-; ALL-NEXT:      i32 3, label [[TMP15:%.*]]
-; ALL-NEXT:    ]
+; ALL-NEXT:    [[SWITCH_SELECTCMP:%.*]] = icmp eq i32 [[COND_FREEZE1]], 0
+; ALL-NEXT:    [[SWITCH_SELECTCMP1:%.*]] = icmp eq i32 [[COND_FREEZE1]], 1
+; ALL-NEXT:    [[SWITCH_SELECTCMP2:%.*]] = icmp eq i32 [[COND_FREEZE1]], 2
+; ALL-NEXT:    [[SWITCH_SELECTCMP3:%.*]] = icmp eq i32 [[COND_FREEZE1]], 3
+; ALL-NEXT:    [[SWITCHACIM_SELECT:%.*]] = select i1 [[SWITCH_SELECTCMP3]], float 4.000000e+00, float 0.000000e+00
+; ALL-NEXT:    [[SWITCHACIM_SELECT4:%.*]] = select i1 [[SWITCH_SELECTCMP2]], float 3.000000e+00, float [[SWITCHACIM_SELECT]]
+; ALL-NEXT:    [[SWITCHACIM_SELECT5:%.*]] = select i1 [[SWITCH_SELECTCMP1]], float 2.000000e+00, float [[SWITCHACIM_SELECT4]]
+; ALL-NEXT:    [[SWITCHACIM_SELECT6:%.*]] = select i1 [[SWITCH_SELECTCMP]], float 1.000000e+00, float [[SWITCHACIM_SELECT5]]
+; ALL-NEXT:    br label [[TMP12]]
 ; ALL:       12:
-; ALL-NEXT:    br label [[TMP16]]
-; ALL:       13:
-; ALL-NEXT:    br label [[TMP16]]
-; ALL:       14:
-; ALL-NEXT:    br label [[TMP16]]
-; ALL:       15:
-; ALL-NEXT:    br label [[TMP16]]
-; ALL:       16:
-; ALL-NEXT:    [[SAMPLEPOS_1_I0:%.*]] = phi float [ 0.000000e+00, [[DOTENTRY:%.*]] ], [ 0.000000e+00, [[TMP6]] ], [ 4.000000e+00, [[TMP15]] ], [ 3.000000e+00, [[TMP14]] ], [ 2.000000e+00, [[TMP13]] ], [ 1.000000e+00, [[TMP12]] ]
+; ALL-NEXT:    [[SAMPLEPOS_1_I0:%.*]] = phi float [ 0.000000e+00, [[DOTENTRY:%.*]] ], [ [[SWITCHACIM_SELECT6]], [[TMP6]] ]
 ; ALL-NEXT:    ret float [[SAMPLEPOS_1_I0]]
 ;
 .entry: