[llvm] [VectorCombine][AMDGPU] Narrow Phi of Shuffles. (PR #140188)

Leon Clark via llvm-commits llvm-commits at lists.llvm.org
Mon May 19 10:30:27 PDT 2025


https://github.com/PeddleSpam updated https://github.com/llvm/llvm-project/pull/140188

>From f331e87229a840f27d863b97c7e7fb69d9d54ead Mon Sep 17 00:00:00 2001
From: Leon Clark <leoclark at amd.com>
Date: Fri, 16 May 2025 05:10:43 +0100
Subject: [PATCH 1/5] [VectorCombine][AMDGPU] Narrow Phi of Shuffles.

---
 .../AMDGPU/narrow-phi-of-shuffles.ll          | 1266 +++++++++++++++++
 1 file changed, 1266 insertions(+)
 create mode 100644 llvm/test/Transforms/VectorCombine/AMDGPU/narrow-phi-of-shuffles.ll

diff --git a/llvm/test/Transforms/VectorCombine/AMDGPU/narrow-phi-of-shuffles.ll b/llvm/test/Transforms/VectorCombine/AMDGPU/narrow-phi-of-shuffles.ll
new file mode 100644
index 0000000000000..1b879d53202c0
--- /dev/null
+++ b/llvm/test/Transforms/VectorCombine/AMDGPU/narrow-phi-of-shuffles.ll
@@ -0,0 +1,1266 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -passes=vector-combine -S -mtriple=amdgcn-amd-amdhsa | FileCheck %s --check-prefixes=CHECK
+
+define <2 x i8> @shuffle_v2i8(<2 x i8> %arg0, i1 %cond) {
+; CHECK-LABEL: define <2 x i8> @shuffle_v2i8(
+; CHECK-SAME: <2 x i8> [[ARG0:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK:       [[THEN]]:
+; CHECK-NEXT:    [[VAL1:%.*]] = shufflevector <2 x i8> [[ARG0]], <2 x i8> poison, <2 x i32> <i32 1, i32 1>
+; CHECK-NEXT:    tail call void @func0()
+; CHECK-NEXT:    br label %[[FINALLY:.*]]
+; CHECK:       [[ELSE]]:
+; CHECK-NEXT:    [[VAL2:%.*]] = shufflevector <2 x i8> [[ARG0]], <2 x i8> poison, <2 x i32> <i32 2, i32 2>
+; CHECK-NEXT:    tail call void @func1()
+; CHECK-NEXT:    br label %[[FINALLY]]
+; CHECK:       [[FINALLY]]:
+; CHECK-NEXT:    [[VAL3:%.*]] = phi <2 x i8> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT:    ret <2 x i8> [[VAL3]]
+;
+entry:
+  br i1 %cond, label %then, label %else
+
+then:
+  %val1 = shufflevector <2 x i8> %arg0, <2 x i8> poison, <2 x i32> <i32 1, i32 1>
+  tail call void @func0()
+  br label %finally
+
+else:
+  %val2 = shufflevector <2 x i8> %arg0, <2 x i8> poison, <2 x i32> <i32 2, i32 2>
+  tail call void @func1()
+  br label %finally
+
+finally:
+  %val3 = phi <2 x i8> [ %val1, %then ], [ %val2, %else ]
+  ret <2 x i8> %val3
+}
+
+define <4 x i8> @shuffle_v4i8(<3 x i8> %arg0, i1 %cond) {
+; CHECK-LABEL: define <4 x i8> @shuffle_v4i8(
+; CHECK-SAME: <3 x i8> [[ARG0:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK:       [[THEN]]:
+; CHECK-NEXT:    [[VAL1:%.*]] = shufflevector <3 x i8> [[ARG0]], <3 x i8> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    tail call void @func0()
+; CHECK-NEXT:    br label %[[FINALLY:.*]]
+; CHECK:       [[ELSE]]:
+; CHECK-NEXT:    [[VAL2:%.*]] = shufflevector <3 x i8> [[ARG0]], <3 x i8> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    tail call void @func1()
+; CHECK-NEXT:    br label %[[FINALLY]]
+; CHECK:       [[FINALLY]]:
+; CHECK-NEXT:    [[VAL3:%.*]] = phi <4 x i8> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT:    ret <4 x i8> [[VAL3]]
+;
+entry:
+  br i1 %cond, label %then, label %else
+
+then:
+  %val1 = shufflevector <3 x i8> %arg0, <3 x i8> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  tail call void @func0()
+  br label %finally
+
+else:
+  %val2 = shufflevector <3 x i8> %arg0, <3 x i8> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
+  tail call void @func1()
+  br label %finally
+
+finally:
+  %val3 = phi <4 x i8> [ %val1, %then ], [ %val2, %else ]
+  ret <4 x i8> %val3
+}
+
+define <8 x i8> @shuffle_v8i8(<3 x i8> %arg0, i1 %cond) {
+; CHECK-LABEL: define <8 x i8> @shuffle_v8i8(
+; CHECK-SAME: <3 x i8> [[ARG0:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK:       [[THEN]]:
+; CHECK-NEXT:    [[VAL1:%.*]] = shufflevector <3 x i8> [[ARG0]], <3 x i8> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    tail call void @func0()
+; CHECK-NEXT:    br label %[[FINALLY:.*]]
+; CHECK:       [[ELSE]]:
+; CHECK-NEXT:    [[VAL2:%.*]] = shufflevector <3 x i8> [[ARG0]], <3 x i8> poison, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    tail call void @func1()
+; CHECK-NEXT:    br label %[[FINALLY]]
+; CHECK:       [[FINALLY]]:
+; CHECK-NEXT:    [[VAL3:%.*]] = phi <8 x i8> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT:    ret <8 x i8> [[VAL3]]
+;
+entry:
+  br i1 %cond, label %then, label %else
+
+then:
+  %val1 = shufflevector <3 x i8> %arg0, <3 x i8> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  tail call void @func0()
+  br label %finally
+
+else:
+  %val2 = shufflevector <3 x i8> %arg0, <3 x i8> poison, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+  tail call void @func1()
+  br label %finally
+
+finally:
+  %val3 = phi <8 x i8> [ %val1, %then ], [ %val2, %else ]
+  ret <8 x i8> %val3
+}
+
+define <16 x i8> @shuffle_v16i8(<3 x i8> %arg0, i1 %cond) {
+; CHECK-LABEL: define <16 x i8> @shuffle_v16i8(
+; CHECK-SAME: <3 x i8> [[ARG0:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK:       [[THEN]]:
+; CHECK-NEXT:    [[VAL1:%.*]] = shufflevector <3 x i8> [[ARG0]], <3 x i8> poison, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    tail call void @func0()
+; CHECK-NEXT:    br label %[[FINALLY:.*]]
+; CHECK:       [[ELSE]]:
+; CHECK-NEXT:    [[VAL2:%.*]] = shufflevector <3 x i8> [[ARG0]], <3 x i8> poison, <16 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    tail call void @func1()
+; CHECK-NEXT:    br label %[[FINALLY]]
+; CHECK:       [[FINALLY]]:
+; CHECK-NEXT:    [[VAL3:%.*]] = phi <16 x i8> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT:    ret <16 x i8> [[VAL3]]
+;
+entry:
+  br i1 %cond, label %then, label %else
+
+then:
+  %val1 = shufflevector <3 x i8> %arg0, <3 x i8> poison, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  tail call void @func0()
+  br label %finally
+
+else:
+  %val2 = shufflevector <3 x i8> %arg0, <3 x i8> poison, <16 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+  tail call void @func1()
+  br label %finally
+
+finally:
+  %val3 = phi <16 x i8> [ %val1, %then ], [ %val2, %else ]
+  ret <16 x i8> %val3
+}
+
+define <32 x i8> @shuffle_v32i8(<3 x i8> %arg0, i1 %cond) {
+; CHECK-LABEL: define <32 x i8> @shuffle_v32i8(
+; CHECK-SAME: <3 x i8> [[ARG0:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK:       [[THEN]]:
+; CHECK-NEXT:    [[VAL1:%.*]] = shufflevector <3 x i8> [[ARG0]], <3 x i8> poison, <32 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    tail call void @func0()
+; CHECK-NEXT:    br label %[[FINALLY:.*]]
+; CHECK:       [[ELSE]]:
+; CHECK-NEXT:    [[VAL2:%.*]] = shufflevector <3 x i8> [[ARG0]], <3 x i8> poison, <32 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    tail call void @func1()
+; CHECK-NEXT:    br label %[[FINALLY]]
+; CHECK:       [[FINALLY]]:
+; CHECK-NEXT:    [[VAL3:%.*]] = phi <32 x i8> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT:    ret <32 x i8> [[VAL3]]
+;
+entry:
+  br i1 %cond, label %then, label %else
+
+then:
+  %val1 = shufflevector <3 x i8> %arg0, <3 x i8> poison, <32 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  tail call void @func0()
+  br label %finally
+
+else:
+  %val2 = shufflevector <3 x i8> %arg0, <3 x i8> poison, <32 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+  tail call void @func1()
+  br label %finally
+
+finally:
+  %val3 = phi <32 x i8> [ %val1, %then ], [ %val2, %else ]
+  ret <32 x i8> %val3
+}
+
+define <2 x i16> @shuffle_v2i16(<2 x i16> %arg0, i1 %cond) {
+; CHECK-LABEL: define <2 x i16> @shuffle_v2i16(
+; CHECK-SAME: <2 x i16> [[ARG0:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK:       [[THEN]]:
+; CHECK-NEXT:    [[VAL1:%.*]] = shufflevector <2 x i16> [[ARG0]], <2 x i16> poison, <2 x i32> <i32 1, i32 1>
+; CHECK-NEXT:    tail call void @func0()
+; CHECK-NEXT:    br label %[[FINALLY:.*]]
+; CHECK:       [[ELSE]]:
+; CHECK-NEXT:    [[VAL2:%.*]] = shufflevector <2 x i16> [[ARG0]], <2 x i16> poison, <2 x i32> <i32 2, i32 2>
+; CHECK-NEXT:    tail call void @func1()
+; CHECK-NEXT:    br label %[[FINALLY]]
+; CHECK:       [[FINALLY]]:
+; CHECK-NEXT:    [[VAL3:%.*]] = phi <2 x i16> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT:    ret <2 x i16> [[VAL3]]
+;
+entry:
+  br i1 %cond, label %then, label %else
+
+then:
+  %val1 = shufflevector <2 x i16> %arg0, <2 x i16> poison, <2 x i32> <i32 1, i32 1>
+  tail call void @func0()
+  br label %finally
+
+else:
+  %val2 = shufflevector <2 x i16> %arg0, <2 x i16> poison, <2 x i32> <i32 2, i32 2>
+  tail call void @func1()
+  br label %finally
+
+finally:
+  %val3 = phi <2 x i16> [ %val1, %then ], [ %val2, %else ]
+  ret <2 x i16> %val3
+}
+
+define <4 x i16> @shuffle_v4i16(<3 x i16> %arg0, i1 %cond) {
+; CHECK-LABEL: define <4 x i16> @shuffle_v4i16(
+; CHECK-SAME: <3 x i16> [[ARG0:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK:       [[THEN]]:
+; CHECK-NEXT:    [[VAL1:%.*]] = shufflevector <3 x i16> [[ARG0]], <3 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    tail call void @func0()
+; CHECK-NEXT:    br label %[[FINALLY:.*]]
+; CHECK:       [[ELSE]]:
+; CHECK-NEXT:    [[VAL2:%.*]] = shufflevector <3 x i16> [[ARG0]], <3 x i16> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    tail call void @func1()
+; CHECK-NEXT:    br label %[[FINALLY]]
+; CHECK:       [[FINALLY]]:
+; CHECK-NEXT:    [[VAL3:%.*]] = phi <4 x i16> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT:    ret <4 x i16> [[VAL3]]
+;
+entry:
+  br i1 %cond, label %then, label %else
+
+then:
+  %val1 = shufflevector <3 x i16> %arg0, <3 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  tail call void @func0()
+  br label %finally
+
+else:
+  %val2 = shufflevector <3 x i16> %arg0, <3 x i16> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
+  tail call void @func1()
+  br label %finally
+
+finally:
+  %val3 = phi <4 x i16> [ %val1, %then ], [ %val2, %else ]
+  ret <4 x i16> %val3
+}
+
+define <8 x i16> @shuffle_v8i16(<3 x i16> %arg0, i1 %cond) {
+; CHECK-LABEL: define <8 x i16> @shuffle_v8i16(
+; CHECK-SAME: <3 x i16> [[ARG0:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK:       [[THEN]]:
+; CHECK-NEXT:    [[VAL1:%.*]] = shufflevector <3 x i16> [[ARG0]], <3 x i16> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    tail call void @func0()
+; CHECK-NEXT:    br label %[[FINALLY:.*]]
+; CHECK:       [[ELSE]]:
+; CHECK-NEXT:    [[VAL2:%.*]] = shufflevector <3 x i16> [[ARG0]], <3 x i16> poison, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    tail call void @func1()
+; CHECK-NEXT:    br label %[[FINALLY]]
+; CHECK:       [[FINALLY]]:
+; CHECK-NEXT:    [[VAL3:%.*]] = phi <8 x i16> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT:    ret <8 x i16> [[VAL3]]
+;
+entry:
+  br i1 %cond, label %then, label %else
+
+then:
+  %val1 = shufflevector <3 x i16> %arg0, <3 x i16> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  tail call void @func0()
+  br label %finally
+
+else:
+  %val2 = shufflevector <3 x i16> %arg0, <3 x i16> poison, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+  tail call void @func1()
+  br label %finally
+
+finally:
+  %val3 = phi <8 x i16> [ %val1, %then ], [ %val2, %else ]
+  ret <8 x i16> %val3
+}
+
+define <16 x i16> @shuffle_v16i16(<3 x i16> %arg0, i1 %cond) {
+; CHECK-LABEL: define <16 x i16> @shuffle_v16i16(
+; CHECK-SAME: <3 x i16> [[ARG0:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK:       [[THEN]]:
+; CHECK-NEXT:    [[VAL1:%.*]] = shufflevector <3 x i16> [[ARG0]], <3 x i16> poison, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    tail call void @func0()
+; CHECK-NEXT:    br label %[[FINALLY:.*]]
+; CHECK:       [[ELSE]]:
+; CHECK-NEXT:    [[VAL2:%.*]] = shufflevector <3 x i16> [[ARG0]], <3 x i16> poison, <16 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    tail call void @func1()
+; CHECK-NEXT:    br label %[[FINALLY]]
+; CHECK:       [[FINALLY]]:
+; CHECK-NEXT:    [[VAL3:%.*]] = phi <16 x i16> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT:    ret <16 x i16> [[VAL3]]
+;
+entry:
+  br i1 %cond, label %then, label %else
+
+then:
+  %val1 = shufflevector <3 x i16> %arg0, <3 x i16> poison, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  tail call void @func0()
+  br label %finally
+
+else:
+  %val2 = shufflevector <3 x i16> %arg0, <3 x i16> poison, <16 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+  tail call void @func1()
+  br label %finally
+
+finally:
+  %val3 = phi <16 x i16> [ %val1, %then ], [ %val2, %else ]
+  ret <16 x i16> %val3
+}
+
+define <32 x i16> @shuffle_v32i16(<3 x i16> %arg0, i1 %cond) {
+; CHECK-LABEL: define <32 x i16> @shuffle_v32i16(
+; CHECK-SAME: <3 x i16> [[ARG0:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK:       [[THEN]]:
+; CHECK-NEXT:    [[VAL1:%.*]] = shufflevector <3 x i16> [[ARG0]], <3 x i16> poison, <32 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    tail call void @func0()
+; CHECK-NEXT:    br label %[[FINALLY:.*]]
+; CHECK:       [[ELSE]]:
+; CHECK-NEXT:    [[VAL2:%.*]] = shufflevector <3 x i16> [[ARG0]], <3 x i16> poison, <32 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    tail call void @func1()
+; CHECK-NEXT:    br label %[[FINALLY]]
+; CHECK:       [[FINALLY]]:
+; CHECK-NEXT:    [[VAL3:%.*]] = phi <32 x i16> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT:    ret <32 x i16> [[VAL3]]
+;
+entry:
+  br i1 %cond, label %then, label %else
+
+then:
+  %val1 = shufflevector <3 x i16> %arg0, <3 x i16> poison, <32 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  tail call void @func0()
+  br label %finally
+
+else:
+  %val2 = shufflevector <3 x i16> %arg0, <3 x i16> poison, <32 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+  tail call void @func1()
+  br label %finally
+
+finally:
+  %val3 = phi <32 x i16> [ %val1, %then ], [ %val2, %else ]
+  ret <32 x i16> %val3
+}
+
+define <2 x i32> @shuffle_v2i32(<2 x i32> %arg0, i1 %cond) {
+; CHECK-LABEL: define <2 x i32> @shuffle_v2i32(
+; CHECK-SAME: <2 x i32> [[ARG0:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK:       [[THEN]]:
+; CHECK-NEXT:    [[VAL1:%.*]] = shufflevector <2 x i32> [[ARG0]], <2 x i32> poison, <2 x i32> <i32 1, i32 1>
+; CHECK-NEXT:    tail call void @func0()
+; CHECK-NEXT:    br label %[[FINALLY:.*]]
+; CHECK:       [[ELSE]]:
+; CHECK-NEXT:    [[VAL2:%.*]] = shufflevector <2 x i32> [[ARG0]], <2 x i32> poison, <2 x i32> <i32 2, i32 2>
+; CHECK-NEXT:    tail call void @func1()
+; CHECK-NEXT:    br label %[[FINALLY]]
+; CHECK:       [[FINALLY]]:
+; CHECK-NEXT:    [[VAL3:%.*]] = phi <2 x i32> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT:    ret <2 x i32> [[VAL3]]
+;
+entry:
+  br i1 %cond, label %then, label %else
+
+then:
+  %val1 = shufflevector <2 x i32> %arg0, <2 x i32> poison, <2 x i32> <i32 1, i32 1>
+  tail call void @func0()
+  br label %finally
+
+else:
+  %val2 = shufflevector <2 x i32> %arg0, <2 x i32> poison, <2 x i32> <i32 2, i32 2>
+  tail call void @func1()
+  br label %finally
+
+finally:
+  %val3 = phi <2 x i32> [ %val1, %then ], [ %val2, %else ]
+  ret <2 x i32> %val3
+}
+
+define <4 x i32> @shuffle_v4i32(<3 x i32> %arg0, i1 %cond) {
+; CHECK-LABEL: define <4 x i32> @shuffle_v4i32(
+; CHECK-SAME: <3 x i32> [[ARG0:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK:       [[THEN]]:
+; CHECK-NEXT:    [[VAL1:%.*]] = shufflevector <3 x i32> [[ARG0]], <3 x i32> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    tail call void @func0()
+; CHECK-NEXT:    br label %[[FINALLY:.*]]
+; CHECK:       [[ELSE]]:
+; CHECK-NEXT:    [[VAL2:%.*]] = shufflevector <3 x i32> [[ARG0]], <3 x i32> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    tail call void @func1()
+; CHECK-NEXT:    br label %[[FINALLY]]
+; CHECK:       [[FINALLY]]:
+; CHECK-NEXT:    [[VAL3:%.*]] = phi <4 x i32> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT:    ret <4 x i32> [[VAL3]]
+;
+entry:
+  br i1 %cond, label %then, label %else
+
+then:
+  %val1 = shufflevector <3 x i32> %arg0, <3 x i32> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  tail call void @func0()
+  br label %finally
+
+else:
+  %val2 = shufflevector <3 x i32> %arg0, <3 x i32> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
+  tail call void @func1()
+  br label %finally
+
+finally:
+  %val3 = phi <4 x i32> [ %val1, %then ], [ %val2, %else ]
+  ret <4 x i32> %val3
+}
+
+define <8 x i32> @shuffle_v8i32(<3 x i32> %arg0, i1 %cond) {
+; CHECK-LABEL: define <8 x i32> @shuffle_v8i32(
+; CHECK-SAME: <3 x i32> [[ARG0:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK:       [[THEN]]:
+; CHECK-NEXT:    [[VAL1:%.*]] = shufflevector <3 x i32> [[ARG0]], <3 x i32> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    tail call void @func0()
+; CHECK-NEXT:    br label %[[FINALLY:.*]]
+; CHECK:       [[ELSE]]:
+; CHECK-NEXT:    [[VAL2:%.*]] = shufflevector <3 x i32> [[ARG0]], <3 x i32> poison, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    tail call void @func1()
+; CHECK-NEXT:    br label %[[FINALLY]]
+; CHECK:       [[FINALLY]]:
+; CHECK-NEXT:    [[VAL3:%.*]] = phi <8 x i32> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT:    ret <8 x i32> [[VAL3]]
+;
+entry:
+  br i1 %cond, label %then, label %else
+
+then:
+  %val1 = shufflevector <3 x i32> %arg0, <3 x i32> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  tail call void @func0()
+  br label %finally
+
+else:
+  %val2 = shufflevector <3 x i32> %arg0, <3 x i32> poison, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+  tail call void @func1()
+  br label %finally
+
+finally:
+  %val3 = phi <8 x i32> [ %val1, %then ], [ %val2, %else ]
+  ret <8 x i32> %val3
+}
+
+define <16 x i32> @shuffle_v16i32(<3 x i32> %arg0, i1 %cond) {
+; CHECK-LABEL: define <16 x i32> @shuffle_v16i32(
+; CHECK-SAME: <3 x i32> [[ARG0:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK:       [[THEN]]:
+; CHECK-NEXT:    [[VAL1:%.*]] = shufflevector <3 x i32> [[ARG0]], <3 x i32> poison, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    tail call void @func0()
+; CHECK-NEXT:    br label %[[FINALLY:.*]]
+; CHECK:       [[ELSE]]:
+; CHECK-NEXT:    [[VAL2:%.*]] = shufflevector <3 x i32> [[ARG0]], <3 x i32> poison, <16 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    tail call void @func1()
+; CHECK-NEXT:    br label %[[FINALLY]]
+; CHECK:       [[FINALLY]]:
+; CHECK-NEXT:    [[VAL3:%.*]] = phi <16 x i32> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT:    ret <16 x i32> [[VAL3]]
+;
+entry:
+  br i1 %cond, label %then, label %else
+
+then:
+  %val1 = shufflevector <3 x i32> %arg0, <3 x i32> poison, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  tail call void @func0()
+  br label %finally
+
+else:
+  %val2 = shufflevector <3 x i32> %arg0, <3 x i32> poison, <16 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+  tail call void @func1()
+  br label %finally
+
+finally:
+  %val3 = phi <16 x i32> [ %val1, %then ], [ %val2, %else ]
+  ret <16 x i32> %val3
+}
+
+define <32 x i32> @shuffle_v32i32(<3 x i32> %arg0, i1 %cond) {
+; CHECK-LABEL: define <32 x i32> @shuffle_v32i32(
+; CHECK-SAME: <3 x i32> [[ARG0:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK:       [[THEN]]:
+; CHECK-NEXT:    [[VAL1:%.*]] = shufflevector <3 x i32> [[ARG0]], <3 x i32> poison, <32 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    tail call void @func0()
+; CHECK-NEXT:    br label %[[FINALLY:.*]]
+; CHECK:       [[ELSE]]:
+; CHECK-NEXT:    [[VAL2:%.*]] = shufflevector <3 x i32> [[ARG0]], <3 x i32> poison, <32 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    tail call void @func1()
+; CHECK-NEXT:    br label %[[FINALLY]]
+; CHECK:       [[FINALLY]]:
+; CHECK-NEXT:    [[VAL3:%.*]] = phi <32 x i32> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT:    ret <32 x i32> [[VAL3]]
+;
+entry:
+  br i1 %cond, label %then, label %else
+
+then:
+  %val1 = shufflevector <3 x i32> %arg0, <3 x i32> poison, <32 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  tail call void @func0()
+  br label %finally
+
+else:
+  %val2 = shufflevector <3 x i32> %arg0, <3 x i32> poison, <32 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+  tail call void @func1()
+  br label %finally
+
+finally:
+  %val3 = phi <32 x i32> [ %val1, %then ], [ %val2, %else ]
+  ret <32 x i32> %val3
+}
+
+define <2 x bfloat> @shuffle_v2bf16(<2 x bfloat> %arg0, i1 %cond) {
+; CHECK-LABEL: define <2 x bfloat> @shuffle_v2bf16(
+; CHECK-SAME: <2 x bfloat> [[ARG0:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK:       [[THEN]]:
+; CHECK-NEXT:    [[VAL1:%.*]] = shufflevector <2 x bfloat> [[ARG0]], <2 x bfloat> poison, <2 x i32> <i32 1, i32 1>
+; CHECK-NEXT:    tail call void @func0()
+; CHECK-NEXT:    br label %[[FINALLY:.*]]
+; CHECK:       [[ELSE]]:
+; CHECK-NEXT:    [[VAL2:%.*]] = shufflevector <2 x bfloat> [[ARG0]], <2 x bfloat> poison, <2 x i32> <i32 2, i32 2>
+; CHECK-NEXT:    tail call void @func1()
+; CHECK-NEXT:    br label %[[FINALLY]]
+; CHECK:       [[FINALLY]]:
+; CHECK-NEXT:    [[VAL3:%.*]] = phi <2 x bfloat> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT:    ret <2 x bfloat> [[VAL3]]
+;
+entry:
+  br i1 %cond, label %then, label %else
+
+then:
+  %val1 = shufflevector <2 x bfloat> %arg0, <2 x bfloat> poison, <2 x i32> <i32 1, i32 1>
+  tail call void @func0()
+  br label %finally
+
+else:
+  %val2 = shufflevector <2 x bfloat> %arg0, <2 x bfloat> poison, <2 x i32> <i32 2, i32 2>
+  tail call void @func1()
+  br label %finally
+
+finally:
+  %val3 = phi <2 x bfloat> [ %val1, %then ], [ %val2, %else ]
+  ret <2 x bfloat> %val3
+}
+
+define <3 x bfloat> @shuffle_v3bf16(<3 x bfloat> %arg0, i1 %cond) {
+; CHECK-LABEL: define <3 x bfloat> @shuffle_v3bf16(
+; CHECK-SAME: <3 x bfloat> [[ARG0:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK:       [[THEN]]:
+; CHECK-NEXT:    [[VAL1:%.*]] = shufflevector <3 x bfloat> [[ARG0]], <3 x bfloat> poison, <3 x i32> <i32 1, i32 1, i32 1>
+; CHECK-NEXT:    tail call void @func0()
+; CHECK-NEXT:    br label %[[FINALLY:.*]]
+; CHECK:       [[ELSE]]:
+; CHECK-NEXT:    [[VAL2:%.*]] = shufflevector <3 x bfloat> [[ARG0]], <3 x bfloat> poison, <3 x i32> <i32 2, i32 2, i32 2>
+; CHECK-NEXT:    tail call void @func1()
+; CHECK-NEXT:    br label %[[FINALLY]]
+; CHECK:       [[FINALLY]]:
+; CHECK-NEXT:    [[VAL3:%.*]] = phi <3 x bfloat> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT:    ret <3 x bfloat> [[VAL3]]
+;
+entry:
+  br i1 %cond, label %then, label %else
+
+then:
+  %val1 = shufflevector <3 x bfloat> %arg0, <3 x bfloat> poison, <3 x i32> <i32 1, i32 1, i32 1>
+  tail call void @func0()
+  br label %finally
+
+else:
+  %val2 = shufflevector <3 x bfloat> %arg0, <3 x bfloat> poison, <3 x i32> <i32 2, i32 2, i32 2>
+  tail call void @func1()
+  br label %finally
+
+finally:
+  %val3 = phi <3 x bfloat> [ %val1, %then ], [ %val2, %else ]
+  ret <3 x bfloat> %val3
+}
+
+define <4 x bfloat> @shuffle_v4bf16(<3 x bfloat> %arg0, i1 %cond) {
+; CHECK-LABEL: define <4 x bfloat> @shuffle_v4bf16(
+; CHECK-SAME: <3 x bfloat> [[ARG0:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK:       [[THEN]]:
+; CHECK-NEXT:    [[VAL1:%.*]] = shufflevector <3 x bfloat> [[ARG0]], <3 x bfloat> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    tail call void @func0()
+; CHECK-NEXT:    br label %[[FINALLY:.*]]
+; CHECK:       [[ELSE]]:
+; CHECK-NEXT:    [[VAL2:%.*]] = shufflevector <3 x bfloat> [[ARG0]], <3 x bfloat> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    tail call void @func1()
+; CHECK-NEXT:    br label %[[FINALLY]]
+; CHECK:       [[FINALLY]]:
+; CHECK-NEXT:    [[VAL3:%.*]] = phi <4 x bfloat> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT:    ret <4 x bfloat> [[VAL3]]
+;
+entry:
+  br i1 %cond, label %then, label %else
+
+then:
+  %val1 = shufflevector <3 x bfloat> %arg0, <3 x bfloat> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  tail call void @func0()
+  br label %finally
+
+else:
+  %val2 = shufflevector <3 x bfloat> %arg0, <3 x bfloat> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
+  tail call void @func1()
+  br label %finally
+
+finally:
+  %val3 = phi <4 x bfloat> [ %val1, %then ], [ %val2, %else ]
+  ret <4 x bfloat> %val3
+}
+
+define <6 x bfloat> @shuffle_v6bf16(<3 x bfloat> %arg0, i1 %cond) {
+; CHECK-LABEL: define <6 x bfloat> @shuffle_v6bf16(
+; CHECK-SAME: <3 x bfloat> [[ARG0:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK:       [[THEN]]:
+; CHECK-NEXT:    [[VAL1:%.*]] = shufflevector <3 x bfloat> [[ARG0]], <3 x bfloat> poison, <6 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    tail call void @func0()
+; CHECK-NEXT:    br label %[[FINALLY:.*]]
+; CHECK:       [[ELSE]]:
+; CHECK-NEXT:    [[VAL2:%.*]] = shufflevector <3 x bfloat> [[ARG0]], <3 x bfloat> poison, <6 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    tail call void @func1()
+; CHECK-NEXT:    br label %[[FINALLY]]
+; CHECK:       [[FINALLY]]:
+; CHECK-NEXT:    [[VAL3:%.*]] = phi <6 x bfloat> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT:    ret <6 x bfloat> [[VAL3]]
+;
+entry:
+  br i1 %cond, label %then, label %else
+
+then:
+  %val1 = shufflevector <3 x bfloat> %arg0, <3 x bfloat> poison, <6 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  tail call void @func0()
+  br label %finally
+
+else:
+  %val2 = shufflevector <3 x bfloat> %arg0, <3 x bfloat> poison, <6 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+  tail call void @func1()
+  br label %finally
+
+finally:
+  %val3 = phi <6 x bfloat> [ %val1, %then ], [ %val2, %else ]
+  ret <6 x bfloat> %val3
+}
+
+define <8 x bfloat> @shuffle_v8bf16(<3 x bfloat> %arg0, i1 %cond) {
+; CHECK-LABEL: define <8 x bfloat> @shuffle_v8bf16(
+; CHECK-SAME: <3 x bfloat> [[ARG0:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK:       [[THEN]]:
+; CHECK-NEXT:    [[VAL1:%.*]] = shufflevector <3 x bfloat> [[ARG0]], <3 x bfloat> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    tail call void @func0()
+; CHECK-NEXT:    br label %[[FINALLY:.*]]
+; CHECK:       [[ELSE]]:
+; CHECK-NEXT:    [[VAL2:%.*]] = shufflevector <3 x bfloat> [[ARG0]], <3 x bfloat> poison, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    tail call void @func1()
+; CHECK-NEXT:    br label %[[FINALLY]]
+; CHECK:       [[FINALLY]]:
+; CHECK-NEXT:    [[VAL3:%.*]] = phi <8 x bfloat> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT:    ret <8 x bfloat> [[VAL3]]
+;
+entry:
+  br i1 %cond, label %then, label %else
+
+then:
+  %val1 = shufflevector <3 x bfloat> %arg0, <3 x bfloat> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  tail call void @func0()
+  br label %finally
+
+else:
+  %val2 = shufflevector <3 x bfloat> %arg0, <3 x bfloat> poison, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+  tail call void @func1()
+  br label %finally
+
+finally:
+  %val3 = phi <8 x bfloat> [ %val1, %then ], [ %val2, %else ]
+  ret <8 x bfloat> %val3
+}
+
+define <16 x bfloat> @shuffle_v16bf16(<3 x bfloat> %arg0, i1 %cond) {
+; CHECK-LABEL: define <16 x bfloat> @shuffle_v16bf16(
+; CHECK-SAME: <3 x bfloat> [[ARG0:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK:       [[THEN]]:
+; CHECK-NEXT:    [[VAL1:%.*]] = shufflevector <3 x bfloat> [[ARG0]], <3 x bfloat> poison, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    tail call void @func0()
+; CHECK-NEXT:    br label %[[FINALLY:.*]]
+; CHECK:       [[ELSE]]:
+; CHECK-NEXT:    [[VAL2:%.*]] = shufflevector <3 x bfloat> [[ARG0]], <3 x bfloat> poison, <16 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    tail call void @func1()
+; CHECK-NEXT:    br label %[[FINALLY]]
+; CHECK:       [[FINALLY]]:
+; CHECK-NEXT:    [[VAL3:%.*]] = phi <16 x bfloat> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT:    ret <16 x bfloat> [[VAL3]]
+;
+entry:
+  br i1 %cond, label %then, label %else
+
+then:
+  %val1 = shufflevector <3 x bfloat> %arg0, <3 x bfloat> poison, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  tail call void @func0()
+  br label %finally
+
+else:
+  %val2 = shufflevector <3 x bfloat> %arg0, <3 x bfloat> poison, <16 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+  tail call void @func1()
+  br label %finally
+
+finally:
+  %val3 = phi <16 x bfloat> [ %val1, %then ], [ %val2, %else ]
+  ret <16 x bfloat> %val3
+}
+
+define <32 x bfloat> @shuffle_v32bf16(<3 x bfloat> %arg0, i1 %cond) {
+; CHECK-LABEL: define <32 x bfloat> @shuffle_v32bf16(
+; CHECK-SAME: <3 x bfloat> [[ARG0:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK:       [[THEN]]:
+; CHECK-NEXT:    [[VAL1:%.*]] = shufflevector <3 x bfloat> [[ARG0]], <3 x bfloat> poison, <32 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    tail call void @func0()
+; CHECK-NEXT:    br label %[[FINALLY:.*]]
+; CHECK:       [[ELSE]]:
+; CHECK-NEXT:    [[VAL2:%.*]] = shufflevector <3 x bfloat> [[ARG0]], <3 x bfloat> poison, <32 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    tail call void @func1()
+; CHECK-NEXT:    br label %[[FINALLY]]
+; CHECK:       [[FINALLY]]:
+; CHECK-NEXT:    [[VAL3:%.*]] = phi <32 x bfloat> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT:    ret <32 x bfloat> [[VAL3]]
+;
+entry:
+  br i1 %cond, label %then, label %else
+
+then:
+  %val1 = shufflevector <3 x bfloat> %arg0, <3 x bfloat> poison, <32 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  tail call void @func0()
+  br label %finally
+
+else:
+  %val2 = shufflevector <3 x bfloat> %arg0, <3 x bfloat> poison, <32 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+  tail call void @func1()
+  br label %finally
+
+finally:
+  %val3 = phi <32 x bfloat> [ %val1, %then ], [ %val2, %else ]
+  ret <32 x bfloat> %val3
+}
+
+define <2 x half> @shuffle_v2f16(<2 x half> %arg0, i1 %cond) {
+; CHECK-LABEL: define <2 x half> @shuffle_v2f16(
+; CHECK-SAME: <2 x half> [[ARG0:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK:       [[THEN]]:
+; CHECK-NEXT:    [[VAL1:%.*]] = shufflevector <2 x half> [[ARG0]], <2 x half> poison, <2 x i32> <i32 1, i32 1>
+; CHECK-NEXT:    tail call void @func0()
+; CHECK-NEXT:    br label %[[FINALLY:.*]]
+; CHECK:       [[ELSE]]:
+; CHECK-NEXT:    [[VAL2:%.*]] = shufflevector <2 x half> [[ARG0]], <2 x half> poison, <2 x i32> <i32 2, i32 2>
+; CHECK-NEXT:    tail call void @func1()
+; CHECK-NEXT:    br label %[[FINALLY]]
+; CHECK:       [[FINALLY]]:
+; CHECK-NEXT:    [[VAL3:%.*]] = phi <2 x half> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT:    ret <2 x half> [[VAL3]]
+;
+entry:
+  br i1 %cond, label %then, label %else
+
+then:
+  %val1 = shufflevector <2 x half> %arg0, <2 x half> poison, <2 x i32> <i32 1, i32 1>
+  tail call void @func0()
+  br label %finally
+
+else:
+  %val2 = shufflevector <2 x half> %arg0, <2 x half> poison, <2 x i32> <i32 2, i32 2>
+  tail call void @func1()
+  br label %finally
+
+finally:
+  %val3 = phi <2 x half> [ %val1, %then ], [ %val2, %else ]
+  ret <2 x half> %val3
+}
+
+define <3 x half> @shuffle_v3f16(<3 x half> %arg0, i1 %cond) {
+; CHECK-LABEL: define <3 x half> @shuffle_v3f16(
+; CHECK-SAME: <3 x half> [[ARG0:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK:       [[THEN]]:
+; CHECK-NEXT:    [[VAL1:%.*]] = shufflevector <3 x half> [[ARG0]], <3 x half> poison, <3 x i32> <i32 1, i32 1, i32 1>
+; CHECK-NEXT:    tail call void @func0()
+; CHECK-NEXT:    br label %[[FINALLY:.*]]
+; CHECK:       [[ELSE]]:
+; CHECK-NEXT:    [[VAL2:%.*]] = shufflevector <3 x half> [[ARG0]], <3 x half> poison, <3 x i32> <i32 2, i32 2, i32 2>
+; CHECK-NEXT:    tail call void @func1()
+; CHECK-NEXT:    br label %[[FINALLY]]
+; CHECK:       [[FINALLY]]:
+; CHECK-NEXT:    [[VAL3:%.*]] = phi <3 x half> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT:    ret <3 x half> [[VAL3]]
+;
+entry:
+  br i1 %cond, label %then, label %else
+
+then:
+  %val1 = shufflevector <3 x half> %arg0, <3 x half> poison, <3 x i32> <i32 1, i32 1, i32 1>
+  tail call void @func0()
+  br label %finally
+
+else:
+  %val2 = shufflevector <3 x half> %arg0, <3 x half> poison, <3 x i32> <i32 2, i32 2, i32 2>
+  tail call void @func1()
+  br label %finally
+
+finally:
+  %val3 = phi <3 x half> [ %val1, %then ], [ %val2, %else ]
+  ret <3 x half> %val3
+}
+
+define <4 x half> @shuffle_v4f16(<3 x half> %arg0, i1 %cond) {
+; CHECK-LABEL: define <4 x half> @shuffle_v4f16(
+; CHECK-SAME: <3 x half> [[ARG0:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK:       [[THEN]]:
+; CHECK-NEXT:    [[VAL1:%.*]] = shufflevector <3 x half> [[ARG0]], <3 x half> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    tail call void @func0()
+; CHECK-NEXT:    br label %[[FINALLY:.*]]
+; CHECK:       [[ELSE]]:
+; CHECK-NEXT:    [[VAL2:%.*]] = shufflevector <3 x half> [[ARG0]], <3 x half> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    tail call void @func1()
+; CHECK-NEXT:    br label %[[FINALLY]]
+; CHECK:       [[FINALLY]]:
+; CHECK-NEXT:    [[VAL3:%.*]] = phi <4 x half> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT:    ret <4 x half> [[VAL3]]
+;
+entry:
+  br i1 %cond, label %then, label %else
+
+then:
+  %val1 = shufflevector <3 x half> %arg0, <3 x half> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  tail call void @func0()
+  br label %finally
+
+else:
+  %val2 = shufflevector <3 x half> %arg0, <3 x half> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
+  tail call void @func1()
+  br label %finally
+
+finally:
+  %val3 = phi <4 x half> [ %val1, %then ], [ %val2, %else ]
+  ret <4 x half> %val3
+}
+
+define <6 x half> @shuffle_v6f16(<3 x half> %arg0, i1 %cond) {
+; CHECK-LABEL: define <6 x half> @shuffle_v6f16(
+; CHECK-SAME: <3 x half> [[ARG0:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK:       [[THEN]]:
+; CHECK-NEXT:    [[VAL1:%.*]] = shufflevector <3 x half> [[ARG0]], <3 x half> poison, <6 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    tail call void @func0()
+; CHECK-NEXT:    br label %[[FINALLY:.*]]
+; CHECK:       [[ELSE]]:
+; CHECK-NEXT:    [[VAL2:%.*]] = shufflevector <3 x half> [[ARG0]], <3 x half> poison, <6 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    tail call void @func1()
+; CHECK-NEXT:    br label %[[FINALLY]]
+; CHECK:       [[FINALLY]]:
+; CHECK-NEXT:    [[VAL3:%.*]] = phi <6 x half> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT:    ret <6 x half> [[VAL3]]
+;
+entry:
+  br i1 %cond, label %then, label %else
+
+then:
+  %val1 = shufflevector <3 x half> %arg0, <3 x half> poison, <6 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  tail call void @func0()
+  br label %finally
+
+else:
+  %val2 = shufflevector <3 x half> %arg0, <3 x half> poison, <6 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+  tail call void @func1()
+  br label %finally
+
+finally:
+  %val3 = phi <6 x half> [ %val1, %then ], [ %val2, %else ]
+  ret <6 x half> %val3
+}
+
+define <8 x half> @shuffle_v8f16(<3 x half> %arg0, i1 %cond) {
+; CHECK-LABEL: define <8 x half> @shuffle_v8f16(
+; CHECK-SAME: <3 x half> [[ARG0:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK:       [[THEN]]:
+; CHECK-NEXT:    [[VAL1:%.*]] = shufflevector <3 x half> [[ARG0]], <3 x half> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    tail call void @func0()
+; CHECK-NEXT:    br label %[[FINALLY:.*]]
+; CHECK:       [[ELSE]]:
+; CHECK-NEXT:    [[VAL2:%.*]] = shufflevector <3 x half> [[ARG0]], <3 x half> poison, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    tail call void @func1()
+; CHECK-NEXT:    br label %[[FINALLY]]
+; CHECK:       [[FINALLY]]:
+; CHECK-NEXT:    [[VAL3:%.*]] = phi <8 x half> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT:    ret <8 x half> [[VAL3]]
+;
+entry:
+  br i1 %cond, label %then, label %else
+
+then:
+  %val1 = shufflevector <3 x half> %arg0, <3 x half> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  tail call void @func0()
+  br label %finally
+
+else:
+  %val2 = shufflevector <3 x half> %arg0, <3 x half> poison, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+  tail call void @func1()
+  br label %finally
+
+finally:
+  %val3 = phi <8 x half> [ %val1, %then ], [ %val2, %else ]
+  ret <8 x half> %val3
+}
+
+define <16 x half> @shuffle_v16f16(<3 x half> %arg0, i1 %cond) {
+; CHECK-LABEL: define <16 x half> @shuffle_v16f16(
+; CHECK-SAME: <3 x half> [[ARG0:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK:       [[THEN]]:
+; CHECK-NEXT:    [[VAL1:%.*]] = shufflevector <3 x half> [[ARG0]], <3 x half> poison, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    tail call void @func0()
+; CHECK-NEXT:    br label %[[FINALLY:.*]]
+; CHECK:       [[ELSE]]:
+; CHECK-NEXT:    [[VAL2:%.*]] = shufflevector <3 x half> [[ARG0]], <3 x half> poison, <16 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    tail call void @func1()
+; CHECK-NEXT:    br label %[[FINALLY]]
+; CHECK:       [[FINALLY]]:
+; CHECK-NEXT:    [[VAL3:%.*]] = phi <16 x half> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT:    ret <16 x half> [[VAL3]]
+;
+entry:
+  br i1 %cond, label %then, label %else
+
+then:
+  %val1 = shufflevector <3 x half> %arg0, <3 x half> poison, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  tail call void @func0()
+  br label %finally
+
+else:
+  %val2 = shufflevector <3 x half> %arg0, <3 x half> poison, <16 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+  tail call void @func1()
+  br label %finally
+
+finally:
+  %val3 = phi <16 x half> [ %val1, %then ], [ %val2, %else ]
+  ret <16 x half> %val3
+}
+
+define <32 x half> @shuffle_v32f16(<3 x half> %arg0, i1 %cond) {
+; CHECK-LABEL: define <32 x half> @shuffle_v32f16(
+; CHECK-SAME: <3 x half> [[ARG0:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK:       [[THEN]]:
+; CHECK-NEXT:    [[VAL1:%.*]] = shufflevector <3 x half> [[ARG0]], <3 x half> poison, <32 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    tail call void @func0()
+; CHECK-NEXT:    br label %[[FINALLY:.*]]
+; CHECK:       [[ELSE]]:
+; CHECK-NEXT:    [[VAL2:%.*]] = shufflevector <3 x half> [[ARG0]], <3 x half> poison, <32 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    tail call void @func1()
+; CHECK-NEXT:    br label %[[FINALLY]]
+; CHECK:       [[FINALLY]]:
+; CHECK-NEXT:    [[VAL3:%.*]] = phi <32 x half> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT:    ret <32 x half> [[VAL3]]
+;
+entry:
+  br i1 %cond, label %then, label %else
+
+then:
+  %val1 = shufflevector <3 x half> %arg0, <3 x half> poison, <32 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  tail call void @func0()
+  br label %finally
+
+else:
+  %val2 = shufflevector <3 x half> %arg0, <3 x half> poison, <32 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+  tail call void @func1()
+  br label %finally
+
+finally:
+  %val3 = phi <32 x half> [ %val1, %then ], [ %val2, %else ]
+  ret <32 x half> %val3
+}
+
+define <2 x float> @shuffle_v2f32(<2 x float> %arg0, i1 %cond) {
+; CHECK-LABEL: define <2 x float> @shuffle_v2f32(
+; CHECK-SAME: <2 x float> [[ARG0:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK:       [[THEN]]:
+; CHECK-NEXT:    [[VAL1:%.*]] = shufflevector <2 x float> [[ARG0]], <2 x float> poison, <2 x i32> <i32 1, i32 1>
+; CHECK-NEXT:    tail call void @func0()
+; CHECK-NEXT:    br label %[[FINALLY:.*]]
+; CHECK:       [[ELSE]]:
+; CHECK-NEXT:    [[VAL2:%.*]] = shufflevector <2 x float> [[ARG0]], <2 x float> poison, <2 x i32> <i32 2, i32 2>
+; CHECK-NEXT:    tail call void @func1()
+; CHECK-NEXT:    br label %[[FINALLY]]
+; CHECK:       [[FINALLY]]:
+; CHECK-NEXT:    [[VAL3:%.*]] = phi <2 x float> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT:    ret <2 x float> [[VAL3]]
+;
+entry:
+  br i1 %cond, label %then, label %else
+
+then:
+  %val1 = shufflevector <2 x float> %arg0, <2 x float> poison, <2 x i32> <i32 1, i32 1>
+  tail call void @func0()
+  br label %finally
+
+else:
+  %val2 = shufflevector <2 x float> %arg0, <2 x float> poison, <2 x i32> <i32 2, i32 2>
+  tail call void @func1()
+  br label %finally
+
+finally:
+  %val3 = phi <2 x float> [ %val1, %then ], [ %val2, %else ]
+  ret <2 x float> %val3
+}
+
+define <3 x float> @shuffle_v3f32(<3 x float> %arg0, i1 %cond) {
+; CHECK-LABEL: define <3 x float> @shuffle_v3f32(
+; CHECK-SAME: <3 x float> [[ARG0:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK:       [[THEN]]:
+; CHECK-NEXT:    [[VAL1:%.*]] = shufflevector <3 x float> [[ARG0]], <3 x float> poison, <3 x i32> <i32 1, i32 1, i32 1>
+; CHECK-NEXT:    tail call void @func0()
+; CHECK-NEXT:    br label %[[FINALLY:.*]]
+; CHECK:       [[ELSE]]:
+; CHECK-NEXT:    [[VAL2:%.*]] = shufflevector <3 x float> [[ARG0]], <3 x float> poison, <3 x i32> <i32 2, i32 2, i32 2>
+; CHECK-NEXT:    tail call void @func1()
+; CHECK-NEXT:    br label %[[FINALLY]]
+; CHECK:       [[FINALLY]]:
+; CHECK-NEXT:    [[VAL3:%.*]] = phi <3 x float> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT:    ret <3 x float> [[VAL3]]
+;
+entry:
+  br i1 %cond, label %then, label %else
+
+then:
+  %val1 = shufflevector <3 x float> %arg0, <3 x float> poison, <3 x i32> <i32 1, i32 1, i32 1>
+  tail call void @func0()
+  br label %finally
+
+else:
+  %val2 = shufflevector <3 x float> %arg0, <3 x float> poison, <3 x i32> <i32 2, i32 2, i32 2>
+  tail call void @func1()
+  br label %finally
+
+finally:
+  %val3 = phi <3 x float> [ %val1, %then ], [ %val2, %else ]
+  ret <3 x float> %val3
+}
+
+define <4 x float> @shuffle_v4f32(<3 x float> %arg0, i1 %cond) {
+; CHECK-LABEL: define <4 x float> @shuffle_v4f32(
+; CHECK-SAME: <3 x float> [[ARG0:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK:       [[THEN]]:
+; CHECK-NEXT:    [[VAL1:%.*]] = shufflevector <3 x float> [[ARG0]], <3 x float> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    tail call void @func0()
+; CHECK-NEXT:    br label %[[FINALLY:.*]]
+; CHECK:       [[ELSE]]:
+; CHECK-NEXT:    [[VAL2:%.*]] = shufflevector <3 x float> [[ARG0]], <3 x float> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    tail call void @func1()
+; CHECK-NEXT:    br label %[[FINALLY]]
+; CHECK:       [[FINALLY]]:
+; CHECK-NEXT:    [[VAL3:%.*]] = phi <4 x float> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT:    ret <4 x float> [[VAL3]]
+;
+entry:
+  br i1 %cond, label %then, label %else
+
+then:
+  %val1 = shufflevector <3 x float> %arg0, <3 x float> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  tail call void @func0()
+  br label %finally
+
+else:
+  %val2 = shufflevector <3 x float> %arg0, <3 x float> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
+  tail call void @func1()
+  br label %finally
+
+finally:
+  %val3 = phi <4 x float> [ %val1, %then ], [ %val2, %else ]
+  ret <4 x float> %val3
+}
+
+define <6 x float> @shuffle_v6f32(<3 x float> %arg0, i1 %cond) {
+; CHECK-LABEL: define <6 x float> @shuffle_v6f32(
+; CHECK-SAME: <3 x float> [[ARG0:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK:       [[THEN]]:
+; CHECK-NEXT:    [[VAL1:%.*]] = shufflevector <3 x float> [[ARG0]], <3 x float> poison, <6 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    tail call void @func0()
+; CHECK-NEXT:    br label %[[FINALLY:.*]]
+; CHECK:       [[ELSE]]:
+; CHECK-NEXT:    [[VAL2:%.*]] = shufflevector <3 x float> [[ARG0]], <3 x float> poison, <6 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    tail call void @func1()
+; CHECK-NEXT:    br label %[[FINALLY]]
+; CHECK:       [[FINALLY]]:
+; CHECK-NEXT:    [[VAL3:%.*]] = phi <6 x float> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT:    ret <6 x float> [[VAL3]]
+;
+entry:
+  br i1 %cond, label %then, label %else
+
+then:
+  %val1 = shufflevector <3 x float> %arg0, <3 x float> poison, <6 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  tail call void @func0()
+  br label %finally
+
+else:
+  %val2 = shufflevector <3 x float> %arg0, <3 x float> poison, <6 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+  tail call void @func1()
+  br label %finally
+
+finally:
+  %val3 = phi <6 x float> [ %val1, %then ], [ %val2, %else ]
+  ret <6 x float> %val3
+}
+
+define <8 x float> @shuffle_v8f32(<3 x float> %arg0, i1 %cond) {
+; CHECK-LABEL: define <8 x float> @shuffle_v8f32(
+; CHECK-SAME: <3 x float> [[ARG0:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK:       [[THEN]]:
+; CHECK-NEXT:    [[VAL1:%.*]] = shufflevector <3 x float> [[ARG0]], <3 x float> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    tail call void @func0()
+; CHECK-NEXT:    br label %[[FINALLY:.*]]
+; CHECK:       [[ELSE]]:
+; CHECK-NEXT:    [[VAL2:%.*]] = shufflevector <3 x float> [[ARG0]], <3 x float> poison, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    tail call void @func1()
+; CHECK-NEXT:    br label %[[FINALLY]]
+; CHECK:       [[FINALLY]]:
+; CHECK-NEXT:    [[VAL3:%.*]] = phi <8 x float> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT:    ret <8 x float> [[VAL3]]
+;
+entry:
+  br i1 %cond, label %then, label %else
+
+then:
+  %val1 = shufflevector <3 x float> %arg0, <3 x float> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  tail call void @func0()
+  br label %finally
+
+else:
+  %val2 = shufflevector <3 x float> %arg0, <3 x float> poison, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+  tail call void @func1()
+  br label %finally
+
+finally:
+  %val3 = phi <8 x float> [ %val1, %then ], [ %val2, %else ]
+  ret <8 x float> %val3
+}
+
+define <16 x float> @shuffle_v16f32(<3 x float> %arg0, i1 %cond) {
+; CHECK-LABEL: define <16 x float> @shuffle_v16f32(
+; CHECK-SAME: <3 x float> [[ARG0:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK:       [[THEN]]:
+; CHECK-NEXT:    [[VAL1:%.*]] = shufflevector <3 x float> [[ARG0]], <3 x float> poison, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    tail call void @func0()
+; CHECK-NEXT:    br label %[[FINALLY:.*]]
+; CHECK:       [[ELSE]]:
+; CHECK-NEXT:    [[VAL2:%.*]] = shufflevector <3 x float> [[ARG0]], <3 x float> poison, <16 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    tail call void @func1()
+; CHECK-NEXT:    br label %[[FINALLY]]
+; CHECK:       [[FINALLY]]:
+; CHECK-NEXT:    [[VAL3:%.*]] = phi <16 x float> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT:    ret <16 x float> [[VAL3]]
+;
+entry:
+  br i1 %cond, label %then, label %else
+
+then:
+  %val1 = shufflevector <3 x float> %arg0, <3 x float> poison, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  tail call void @func0()
+  br label %finally
+
+else:
+  %val2 = shufflevector <3 x float> %arg0, <3 x float> poison, <16 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+  tail call void @func1()
+  br label %finally
+
+finally:
+  %val3 = phi <16 x float> [ %val1, %then ], [ %val2, %else ]
+  ret <16 x float> %val3
+}
+
+define <32 x float> @shuffle_v32f32(<3 x float> %arg0, i1 %cond) {
+; CHECK-LABEL: define <32 x float> @shuffle_v32f32(
+; CHECK-SAME: <3 x float> [[ARG0:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK:       [[THEN]]:
+; CHECK-NEXT:    [[VAL1:%.*]] = shufflevector <3 x float> [[ARG0]], <3 x float> poison, <32 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    tail call void @func0()
+; CHECK-NEXT:    br label %[[FINALLY:.*]]
+; CHECK:       [[ELSE]]:
+; CHECK-NEXT:    [[VAL2:%.*]] = shufflevector <3 x float> [[ARG0]], <3 x float> poison, <32 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    tail call void @func1()
+; CHECK-NEXT:    br label %[[FINALLY]]
+; CHECK:       [[FINALLY]]:
+; CHECK-NEXT:    [[VAL3:%.*]] = phi <32 x float> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT:    ret <32 x float> [[VAL3]]
+;
+entry:
+  br i1 %cond, label %then, label %else
+
+then:
+  %val1 = shufflevector <3 x float> %arg0, <3 x float> poison, <32 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  tail call void @func0()
+  br label %finally
+
+else:
+  %val2 = shufflevector <3 x float> %arg0, <3 x float> poison, <32 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+  tail call void @func1()
+  br label %finally
+
+finally:
+  %val3 = phi <32 x float> [ %val1, %then ], [ %val2, %else ]
+  ret <32 x float> %val3
+}
+
+declare void @func0() local_unnamed_addr
+
+declare void @func1() local_unnamed_addr

>From 4a6a0f2d5dd86fff69f08f1c5bae062079ff57c2 Mon Sep 17 00:00:00 2001
From: Leon Clark <leoclark at amd.com>
Date: Fri, 16 May 2025 05:50:36 +0100
Subject: [PATCH 2/5] Add VectorCombine transform and update tests.

---
 .../Transforms/Vectorize/VectorCombine.cpp    | 107 ++++++++++++++++++
 .../AMDGPU/narrow-phi-of-shuffles.ll          |  54 ++++-----
 2 files changed, 134 insertions(+), 27 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index c7d221e8d1e5c..fb052998c8790 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -131,6 +131,7 @@ class VectorCombine {
   bool foldSelectShuffle(Instruction &I, bool FromReduction = false);
   bool foldInterleaveIntrinsics(Instruction &I);
   bool shrinkType(Instruction &I);
+  bool shrinkPhiOfShuffles(Instruction &I);
 
   void replaceValue(Value &Old, Value &New) {
     LLVM_DEBUG(dbgs() << "VC: Replacing: " << Old << '\n');
@@ -3483,6 +3484,109 @@ bool VectorCombine::foldInterleaveIntrinsics(Instruction &I) {
   return true;
 }
 
+// Attempt to narrow a phi of shufflevector instructions where the two incoming
+// values have the same operands but different masks. If the two shuffle masks
+// are offsets of one another we can use one branch to rotate the incoming
+// vector and perform one larger shuffle after the phi.
+bool VectorCombine::shrinkPhiOfShuffles(Instruction &I) {
+  auto *Phi = dyn_cast<PHINode>(&I);
+  if (!Phi || Phi->getNumIncomingValues() != 2u)
+    return false;
+
+  auto *Shuf0 = dyn_cast<ShuffleVectorInst>(Phi->getOperand(0u));
+  auto *Shuf1 = dyn_cast<ShuffleVectorInst>(Phi->getOperand(1u));
+  if (!Shuf0 || !Shuf1)
+    return false;
+
+  if (!Shuf0->hasOneUse() && !Shuf1->hasOneUse())
+    return false;
+
+  auto *Shuf0Op0 = Shuf0->getOperand(0u);
+  auto *Shuf0Op1 = Shuf0->getOperand(1u);
+  auto *Shuf1Op0 = Shuf1->getOperand(0u);
+  auto *Shuf1Op1 = Shuf1->getOperand(1u);
+
+  auto IsPoison = [](Value *Val) -> bool {
+    return isa<PoisonValue>(Val) || isa<UndefValue>(Val);
+  };
+
+  if (Shuf0Op0 != Shuf1Op0 || !IsPoison(Shuf0Op1) || !IsPoison(Shuf1Op1))
+    return false;
+
+  // Ensure result vectors are wider than the argument vector.
+  auto *InputVT = cast<FixedVectorType>(Shuf0Op0->getType());
+  auto *ResultVT = cast<FixedVectorType>(Shuf0->getType());
+  auto const InputNumElements = InputVT->getNumElements();
+
+  if (InputNumElements >= ResultVT->getNumElements())
+    return false;
+
+  // Take the difference of the two shuffle masks at each index. Ignore poison
+  // values at the same index in both masks.
+  auto Mask0 = Shuf0->getShuffleMask();
+  auto Mask1 = Shuf1->getShuffleMask();
+  auto NewMask0 = std::vector<int>();
+  NewMask0.reserve(Mask0.size());
+
+  for (auto I = 0u; I < Mask0.size(); ++I) {
+    if (Mask0[I] >= 0 && Mask1[I] >= 0)
+      NewMask0.push_back(Mask0[I] - Mask1[I]);
+    else if (Mask0[I] == -1 && Mask1[I] == -1)
+      continue;
+    else
+      return false;
+  }
+
+  if (NewMask0.empty() ||
+      !std::equal(NewMask0.begin() + 1u, NewMask0.end(), NewMask0.begin()))
+    return false;
+
+  // Create new mask using difference of the two incoming masks.
+  auto MaskOffset = NewMask0[0u];
+  if (!Shuf0->hasOneUse()) {
+    std::swap(Shuf0, Shuf1);
+    std::swap(Mask0, Mask1);
+    MaskOffset *= -1;
+  }
+
+  auto Index = (InputNumElements - MaskOffset) % InputNumElements;
+  NewMask0.clear();
+
+  for (auto I = 0u; I < InputNumElements; ++I) {
+    NewMask0.push_back(Index);
+    Index = (Index + 1u) % InputNumElements;
+  }
+
+  // Calculate costs for worst cases and compare.
+  auto const Kind = TTI::SK_PermuteSingleSrc;
+  auto OldCost = std::max(TTI.getShuffleCost(Kind, InputVT, Mask0, CostKind),
+                          TTI.getShuffleCost(Kind, InputVT, Mask1, CostKind));
+  auto NewCost = TTI.getShuffleCost(Kind, InputVT, NewMask0, CostKind) +
+                 TTI.getShuffleCost(Kind, InputVT, Mask1, CostKind);
+
+  if (NewCost > OldCost)
+    return false;
+
+  // Create new shuffles and narrowed phi.
+  auto Builder = IRBuilder(&I);
+  Builder.SetInsertPoint(Shuf0);
+  Builder.SetCurrentDebugLocation(Shuf0->getDebugLoc());
+  auto *NewShuf0 = Builder.CreateShuffleVector(Shuf0Op0, Shuf0Op1, NewMask0);
+
+  Builder.SetInsertPoint(Phi);
+  Builder.SetCurrentDebugLocation(Phi->getDebugLoc());
+  auto *NewPhi = Builder.CreatePHI(NewShuf0->getType(), 2u);
+  NewPhi->addIncoming(NewShuf0, Phi->getIncomingBlock(0u));
+  NewPhi->addIncoming(Shuf1Op0, Phi->getIncomingBlock(1u));
+
+  Builder.SetInsertPoint(*NewPhi->getInsertionPointAfterDef());
+  auto *NewPoison = PoisonValue::get(NewPhi->getType());
+  auto *NewShuf2 = Builder.CreateShuffleVector(NewPhi, NewPoison, Mask1);
+
+  replaceValue(*Phi, *NewShuf2);
+  return true;
+}
+
 /// This is the entry point for all transforms. Pass manager differences are
 /// handled in the callers of this function.
 bool VectorCombine::run() {
@@ -3561,6 +3665,9 @@ bool VectorCombine::run() {
       case Instruction::BitCast:
         MadeChange |= foldBitcastShuffle(I);
         break;
+      case Instruction::PHI:
+        MadeChange |= shrinkPhiOfShuffles(I);
+        break;
       default:
         MadeChange |= shrinkType(I);
         break;
diff --git a/llvm/test/Transforms/VectorCombine/AMDGPU/narrow-phi-of-shuffles.ll b/llvm/test/Transforms/VectorCombine/AMDGPU/narrow-phi-of-shuffles.ll
index 1b879d53202c0..9b4c4fc9a702d 100644
--- a/llvm/test/Transforms/VectorCombine/AMDGPU/narrow-phi-of-shuffles.ll
+++ b/llvm/test/Transforms/VectorCombine/AMDGPU/narrow-phi-of-shuffles.ll
@@ -392,15 +392,15 @@ define <4 x i32> @shuffle_v4i32(<3 x i32> %arg0, i1 %cond) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
 ; CHECK:       [[THEN]]:
-; CHECK-NEXT:    [[VAL1:%.*]] = shufflevector <3 x i32> [[ARG0]], <3 x i32> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <3 x i32> [[ARG0]], <3 x i32> poison, <3 x i32> <i32 1, i32 2, i32 0>
 ; CHECK-NEXT:    tail call void @func0()
 ; CHECK-NEXT:    br label %[[FINALLY:.*]]
 ; CHECK:       [[ELSE]]:
-; CHECK-NEXT:    [[VAL2:%.*]] = shufflevector <3 x i32> [[ARG0]], <3 x i32> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
 ; CHECK-NEXT:    tail call void @func1()
 ; CHECK-NEXT:    br label %[[FINALLY]]
 ; CHECK:       [[FINALLY]]:
-; CHECK-NEXT:    [[VAL3:%.*]] = phi <4 x i32> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = phi <3 x i32> [ [[TMP0]], %[[THEN]] ], [ [[ARG0]], %[[ELSE]] ]
+; CHECK-NEXT:    [[VAL3:%.*]] = shufflevector <3 x i32> [[TMP1]], <3 x i32> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
 ; CHECK-NEXT:    ret <4 x i32> [[VAL3]]
 ;
 entry:
@@ -427,15 +427,15 @@ define <8 x i32> @shuffle_v8i32(<3 x i32> %arg0, i1 %cond) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
 ; CHECK:       [[THEN]]:
-; CHECK-NEXT:    [[VAL1:%.*]] = shufflevector <3 x i32> [[ARG0]], <3 x i32> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <3 x i32> [[ARG0]], <3 x i32> poison, <3 x i32> <i32 1, i32 2, i32 0>
 ; CHECK-NEXT:    tail call void @func0()
 ; CHECK-NEXT:    br label %[[FINALLY:.*]]
 ; CHECK:       [[ELSE]]:
-; CHECK-NEXT:    [[VAL2:%.*]] = shufflevector <3 x i32> [[ARG0]], <3 x i32> poison, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
 ; CHECK-NEXT:    tail call void @func1()
 ; CHECK-NEXT:    br label %[[FINALLY]]
 ; CHECK:       [[FINALLY]]:
-; CHECK-NEXT:    [[VAL3:%.*]] = phi <8 x i32> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = phi <3 x i32> [ [[TMP0]], %[[THEN]] ], [ [[ARG0]], %[[ELSE]] ]
+; CHECK-NEXT:    [[VAL3:%.*]] = shufflevector <3 x i32> [[TMP1]], <3 x i32> poison, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
 ; CHECK-NEXT:    ret <8 x i32> [[VAL3]]
 ;
 entry:
@@ -462,15 +462,15 @@ define <16 x i32> @shuffle_v16i32(<3 x i32> %arg0, i1 %cond) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
 ; CHECK:       [[THEN]]:
-; CHECK-NEXT:    [[VAL1:%.*]] = shufflevector <3 x i32> [[ARG0]], <3 x i32> poison, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <3 x i32> [[ARG0]], <3 x i32> poison, <3 x i32> <i32 1, i32 2, i32 0>
 ; CHECK-NEXT:    tail call void @func0()
 ; CHECK-NEXT:    br label %[[FINALLY:.*]]
 ; CHECK:       [[ELSE]]:
-; CHECK-NEXT:    [[VAL2:%.*]] = shufflevector <3 x i32> [[ARG0]], <3 x i32> poison, <16 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
 ; CHECK-NEXT:    tail call void @func1()
 ; CHECK-NEXT:    br label %[[FINALLY]]
 ; CHECK:       [[FINALLY]]:
-; CHECK-NEXT:    [[VAL3:%.*]] = phi <16 x i32> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = phi <3 x i32> [ [[TMP0]], %[[THEN]] ], [ [[ARG0]], %[[ELSE]] ]
+; CHECK-NEXT:    [[VAL3:%.*]] = shufflevector <3 x i32> [[TMP1]], <3 x i32> poison, <16 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
 ; CHECK-NEXT:    ret <16 x i32> [[VAL3]]
 ;
 entry:
@@ -497,15 +497,15 @@ define <32 x i32> @shuffle_v32i32(<3 x i32> %arg0, i1 %cond) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
 ; CHECK:       [[THEN]]:
-; CHECK-NEXT:    [[VAL1:%.*]] = shufflevector <3 x i32> [[ARG0]], <3 x i32> poison, <32 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <3 x i32> [[ARG0]], <3 x i32> poison, <3 x i32> <i32 1, i32 2, i32 0>
 ; CHECK-NEXT:    tail call void @func0()
 ; CHECK-NEXT:    br label %[[FINALLY:.*]]
 ; CHECK:       [[ELSE]]:
-; CHECK-NEXT:    [[VAL2:%.*]] = shufflevector <3 x i32> [[ARG0]], <3 x i32> poison, <32 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
 ; CHECK-NEXT:    tail call void @func1()
 ; CHECK-NEXT:    br label %[[FINALLY]]
 ; CHECK:       [[FINALLY]]:
-; CHECK-NEXT:    [[VAL3:%.*]] = phi <32 x i32> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = phi <3 x i32> [ [[TMP0]], %[[THEN]] ], [ [[ARG0]], %[[ELSE]] ]
+; CHECK-NEXT:    [[VAL3:%.*]] = shufflevector <3 x i32> [[TMP1]], <3 x i32> poison, <32 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
 ; CHECK-NEXT:    ret <32 x i32> [[VAL3]]
 ;
 entry:
@@ -1092,15 +1092,15 @@ define <4 x float> @shuffle_v4f32(<3 x float> %arg0, i1 %cond) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
 ; CHECK:       [[THEN]]:
-; CHECK-NEXT:    [[VAL1:%.*]] = shufflevector <3 x float> [[ARG0]], <3 x float> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <3 x float> [[ARG0]], <3 x float> poison, <3 x i32> <i32 1, i32 2, i32 0>
 ; CHECK-NEXT:    tail call void @func0()
 ; CHECK-NEXT:    br label %[[FINALLY:.*]]
 ; CHECK:       [[ELSE]]:
-; CHECK-NEXT:    [[VAL2:%.*]] = shufflevector <3 x float> [[ARG0]], <3 x float> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
 ; CHECK-NEXT:    tail call void @func1()
 ; CHECK-NEXT:    br label %[[FINALLY]]
 ; CHECK:       [[FINALLY]]:
-; CHECK-NEXT:    [[VAL3:%.*]] = phi <4 x float> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = phi <3 x float> [ [[TMP0]], %[[THEN]] ], [ [[ARG0]], %[[ELSE]] ]
+; CHECK-NEXT:    [[VAL3:%.*]] = shufflevector <3 x float> [[TMP1]], <3 x float> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
 ; CHECK-NEXT:    ret <4 x float> [[VAL3]]
 ;
 entry:
@@ -1127,15 +1127,15 @@ define <6 x float> @shuffle_v6f32(<3 x float> %arg0, i1 %cond) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
 ; CHECK:       [[THEN]]:
-; CHECK-NEXT:    [[VAL1:%.*]] = shufflevector <3 x float> [[ARG0]], <3 x float> poison, <6 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <3 x float> [[ARG0]], <3 x float> poison, <3 x i32> <i32 1, i32 2, i32 0>
 ; CHECK-NEXT:    tail call void @func0()
 ; CHECK-NEXT:    br label %[[FINALLY:.*]]
 ; CHECK:       [[ELSE]]:
-; CHECK-NEXT:    [[VAL2:%.*]] = shufflevector <3 x float> [[ARG0]], <3 x float> poison, <6 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
 ; CHECK-NEXT:    tail call void @func1()
 ; CHECK-NEXT:    br label %[[FINALLY]]
 ; CHECK:       [[FINALLY]]:
-; CHECK-NEXT:    [[VAL3:%.*]] = phi <6 x float> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = phi <3 x float> [ [[TMP0]], %[[THEN]] ], [ [[ARG0]], %[[ELSE]] ]
+; CHECK-NEXT:    [[VAL3:%.*]] = shufflevector <3 x float> [[TMP1]], <3 x float> poison, <6 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
 ; CHECK-NEXT:    ret <6 x float> [[VAL3]]
 ;
 entry:
@@ -1162,15 +1162,15 @@ define <8 x float> @shuffle_v8f32(<3 x float> %arg0, i1 %cond) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
 ; CHECK:       [[THEN]]:
-; CHECK-NEXT:    [[VAL1:%.*]] = shufflevector <3 x float> [[ARG0]], <3 x float> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <3 x float> [[ARG0]], <3 x float> poison, <3 x i32> <i32 1, i32 2, i32 0>
 ; CHECK-NEXT:    tail call void @func0()
 ; CHECK-NEXT:    br label %[[FINALLY:.*]]
 ; CHECK:       [[ELSE]]:
-; CHECK-NEXT:    [[VAL2:%.*]] = shufflevector <3 x float> [[ARG0]], <3 x float> poison, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
 ; CHECK-NEXT:    tail call void @func1()
 ; CHECK-NEXT:    br label %[[FINALLY]]
 ; CHECK:       [[FINALLY]]:
-; CHECK-NEXT:    [[VAL3:%.*]] = phi <8 x float> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = phi <3 x float> [ [[TMP0]], %[[THEN]] ], [ [[ARG0]], %[[ELSE]] ]
+; CHECK-NEXT:    [[VAL3:%.*]] = shufflevector <3 x float> [[TMP1]], <3 x float> poison, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
 ; CHECK-NEXT:    ret <8 x float> [[VAL3]]
 ;
 entry:
@@ -1197,15 +1197,15 @@ define <16 x float> @shuffle_v16f32(<3 x float> %arg0, i1 %cond) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
 ; CHECK:       [[THEN]]:
-; CHECK-NEXT:    [[VAL1:%.*]] = shufflevector <3 x float> [[ARG0]], <3 x float> poison, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <3 x float> [[ARG0]], <3 x float> poison, <3 x i32> <i32 1, i32 2, i32 0>
 ; CHECK-NEXT:    tail call void @func0()
 ; CHECK-NEXT:    br label %[[FINALLY:.*]]
 ; CHECK:       [[ELSE]]:
-; CHECK-NEXT:    [[VAL2:%.*]] = shufflevector <3 x float> [[ARG0]], <3 x float> poison, <16 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
 ; CHECK-NEXT:    tail call void @func1()
 ; CHECK-NEXT:    br label %[[FINALLY]]
 ; CHECK:       [[FINALLY]]:
-; CHECK-NEXT:    [[VAL3:%.*]] = phi <16 x float> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = phi <3 x float> [ [[TMP0]], %[[THEN]] ], [ [[ARG0]], %[[ELSE]] ]
+; CHECK-NEXT:    [[VAL3:%.*]] = shufflevector <3 x float> [[TMP1]], <3 x float> poison, <16 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
 ; CHECK-NEXT:    ret <16 x float> [[VAL3]]
 ;
 entry:
@@ -1232,15 +1232,15 @@ define <32 x float> @shuffle_v32f32(<3 x float> %arg0, i1 %cond) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
 ; CHECK:       [[THEN]]:
-; CHECK-NEXT:    [[VAL1:%.*]] = shufflevector <3 x float> [[ARG0]], <3 x float> poison, <32 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <3 x float> [[ARG0]], <3 x float> poison, <3 x i32> <i32 1, i32 2, i32 0>
 ; CHECK-NEXT:    tail call void @func0()
 ; CHECK-NEXT:    br label %[[FINALLY:.*]]
 ; CHECK:       [[ELSE]]:
-; CHECK-NEXT:    [[VAL2:%.*]] = shufflevector <3 x float> [[ARG0]], <3 x float> poison, <32 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
 ; CHECK-NEXT:    tail call void @func1()
 ; CHECK-NEXT:    br label %[[FINALLY]]
 ; CHECK:       [[FINALLY]]:
-; CHECK-NEXT:    [[VAL3:%.*]] = phi <32 x float> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = phi <3 x float> [ [[TMP0]], %[[THEN]] ], [ [[ARG0]], %[[ELSE]] ]
+; CHECK-NEXT:    [[VAL3:%.*]] = shufflevector <3 x float> [[TMP1]], <3 x float> poison, <32 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
 ; CHECK-NEXT:    ret <32 x float> [[VAL3]]
 ;
 entry:

>From c712df55f29ab8a111354c66061dfb7734915aba Mon Sep 17 00:00:00 2001
From: Leon Clark <leoclark at amd.com>
Date: Mon, 19 May 2025 14:34:35 +0100
Subject: [PATCH 3/5] Address review comments.

---
 .../Transforms/Vectorize/VectorCombine.cpp    | 63 ++++++++-----------
 1 file changed, 27 insertions(+), 36 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index fb052998c8790..6eae50d2d28f0 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -3498,23 +3498,16 @@ bool VectorCombine::shrinkPhiOfShuffles(Instruction &I) {
   if (!Shuf0 || !Shuf1)
     return false;
 
-  if (!Shuf0->hasOneUse() && !Shuf1->hasOneUse())
-    return false;
-
-  auto *Shuf0Op0 = Shuf0->getOperand(0u);
-  auto *Shuf0Op1 = Shuf0->getOperand(1u);
-  auto *Shuf1Op0 = Shuf1->getOperand(0u);
-  auto *Shuf1Op1 = Shuf1->getOperand(1u);
-
-  auto IsPoison = [](Value *Val) -> bool {
-    return isa<PoisonValue>(Val) || isa<UndefValue>(Val);
-  };
+  Value *Op0 = nullptr;
+  Value *Op1 = nullptr;
 
-  if (Shuf0Op0 != Shuf1Op0 || !IsPoison(Shuf0Op1) || !IsPoison(Shuf1Op1))
+  if (!match(Shuf0, m_OneUse(m_Shuffle(m_Value(Op0), m_Poison()))) ||
+      !match(Shuf1, m_OneUse(m_Shuffle(m_Value(Op1), m_Poison()))) ||
+      Op0 != Op1)
     return false;
 
   // Ensure result vectors are wider than the argument vector.
-  auto *InputVT = cast<FixedVectorType>(Shuf0Op0->getType());
+  auto *InputVT = cast<FixedVectorType>(Op0->getType());
   auto *ResultVT = cast<FixedVectorType>(Shuf0->getType());
   auto const InputNumElements = InputVT->getNumElements();
 
@@ -3523,37 +3516,34 @@ bool VectorCombine::shrinkPhiOfShuffles(Instruction &I) {
 
   // Take the difference of the two shuffle masks at each index. Ignore poison
   // values at the same index in both masks.
-  auto Mask0 = Shuf0->getShuffleMask();
-  auto Mask1 = Shuf1->getShuffleMask();
-  auto NewMask0 = std::vector<int>();
-  NewMask0.reserve(Mask0.size());
+  ArrayRef Mask0 = Shuf0->getShuffleMask();
+  ArrayRef Mask1 = Shuf1->getShuffleMask();
+  SmallVector<int, 16> NewMask;
+  NewMask.reserve(Mask0.size());
 
   for (auto I = 0u; I < Mask0.size(); ++I) {
     if (Mask0[I] >= 0 && Mask1[I] >= 0)
-      NewMask0.push_back(Mask0[I] - Mask1[I]);
+      NewMask.push_back(Mask0[I] - Mask1[I]);
     else if (Mask0[I] == -1 && Mask1[I] == -1)
       continue;
     else
       return false;
   }
 
-  if (NewMask0.empty() ||
-      !std::equal(NewMask0.begin() + 1u, NewMask0.end(), NewMask0.begin()))
+  // Ensure all elements of the new mask are equal. If the difference between
+  // the incoming mask elements is the same, the two must be constant offsets
+  // of one another.
+  if (NewMask.empty() ||
+      !std::equal(NewMask.begin() + 1u, NewMask.end(), NewMask.begin()))
     return false;
 
   // Create new mask using difference of the two incoming masks.
-  auto MaskOffset = NewMask0[0u];
-  if (!Shuf0->hasOneUse()) {
-    std::swap(Shuf0, Shuf1);
-    std::swap(Mask0, Mask1);
-    MaskOffset *= -1;
-  }
-
-  auto Index = (InputNumElements - MaskOffset) % InputNumElements;
-  NewMask0.clear();
+  int MaskOffset = NewMask[0u];
+  unsigned Index = (InputNumElements - MaskOffset) % InputNumElements;
+  NewMask.clear();
 
-  for (auto I = 0u; I < InputNumElements; ++I) {
-    NewMask0.push_back(Index);
+  for (unsigned I = 0u; I < InputNumElements; ++I) {
+    NewMask.push_back(Index);
     Index = (Index + 1u) % InputNumElements;
   }
 
@@ -3561,7 +3551,7 @@ bool VectorCombine::shrinkPhiOfShuffles(Instruction &I) {
   auto const Kind = TTI::SK_PermuteSingleSrc;
   auto OldCost = std::max(TTI.getShuffleCost(Kind, InputVT, Mask0, CostKind),
                           TTI.getShuffleCost(Kind, InputVT, Mask1, CostKind));
-  auto NewCost = TTI.getShuffleCost(Kind, InputVT, NewMask0, CostKind) +
+  auto NewCost = TTI.getShuffleCost(Kind, InputVT, NewMask, CostKind) +
                  TTI.getShuffleCost(Kind, InputVT, Mask1, CostKind);
 
   if (NewCost > OldCost)
@@ -3571,17 +3561,18 @@ bool VectorCombine::shrinkPhiOfShuffles(Instruction &I) {
   auto Builder = IRBuilder(&I);
   Builder.SetInsertPoint(Shuf0);
   Builder.SetCurrentDebugLocation(Shuf0->getDebugLoc());
-  auto *NewShuf0 = Builder.CreateShuffleVector(Shuf0Op0, Shuf0Op1, NewMask0);
+  auto *PoisonVal = PoisonValue::get(InputVT);
+  auto *NewShuf0 = Builder.CreateShuffleVector(Op0, PoisonVal, NewMask);
 
   Builder.SetInsertPoint(Phi);
   Builder.SetCurrentDebugLocation(Phi->getDebugLoc());
   auto *NewPhi = Builder.CreatePHI(NewShuf0->getType(), 2u);
   NewPhi->addIncoming(NewShuf0, Phi->getIncomingBlock(0u));
-  NewPhi->addIncoming(Shuf1Op0, Phi->getIncomingBlock(1u));
+  NewPhi->addIncoming(Op1, Phi->getIncomingBlock(1u));
 
   Builder.SetInsertPoint(*NewPhi->getInsertionPointAfterDef());
-  auto *NewPoison = PoisonValue::get(NewPhi->getType());
-  auto *NewShuf2 = Builder.CreateShuffleVector(NewPhi, NewPoison, Mask1);
+  PoisonVal = PoisonValue::get(NewPhi->getType());
+  auto *NewShuf2 = Builder.CreateShuffleVector(NewPhi, PoisonVal, Mask1);
 
   replaceValue(*Phi, *NewShuf2);
   return true;

>From 78d6a2e88525dc2a4b5a28745c4dbaddd76c7c53 Mon Sep 17 00:00:00 2001
From: Leon Clark <leoclark at amd.com>
Date: Mon, 19 May 2025 18:25:45 +0100
Subject: [PATCH 4/5] Address review comments.

---
 .../Transforms/Vectorize/VectorCombine.cpp    | 37 +++++++++----------
 1 file changed, 17 insertions(+), 20 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 6eae50d2d28f0..dca3991691d07 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -3493,22 +3493,21 @@ bool VectorCombine::shrinkPhiOfShuffles(Instruction &I) {
   if (!Phi || Phi->getNumIncomingValues() != 2u)
     return false;
 
-  auto *Shuf0 = dyn_cast<ShuffleVectorInst>(Phi->getOperand(0u));
-  auto *Shuf1 = dyn_cast<ShuffleVectorInst>(Phi->getOperand(1u));
-  if (!Shuf0 || !Shuf1)
-    return false;
-
-  Value *Op0 = nullptr;
-  Value *Op1 = nullptr;
+  Value *Op = nullptr;
+  ArrayRef<int> Mask0;
+  ArrayRef<int> Mask1;
 
-  if (!match(Shuf0, m_OneUse(m_Shuffle(m_Value(Op0), m_Poison()))) ||
-      !match(Shuf1, m_OneUse(m_Shuffle(m_Value(Op1), m_Poison()))) ||
-      Op0 != Op1)
+  if (!match(Phi->getOperand(0u), 
+             m_OneUse(m_Shuffle(m_Value(Op), m_Poison(), m_Mask(Mask0)))) ||
+      !match(Phi->getOperand(1u), 
+             m_OneUse(m_Shuffle(m_Specific(Op), m_Poison(), m_Mask(Mask1)))))
     return false;
 
+  auto *Shuf = cast<ShuffleVectorInst>(Phi->getOperand(0u));
+
   // Ensure result vectors are wider than the argument vector.
-  auto *InputVT = cast<FixedVectorType>(Op0->getType());
-  auto *ResultVT = cast<FixedVectorType>(Shuf0->getType());
+  auto *InputVT = cast<FixedVectorType>(Op->getType());
+  auto *ResultVT = cast<FixedVectorType>(Shuf->getType());
   auto const InputNumElements = InputVT->getNumElements();
 
   if (InputNumElements >= ResultVT->getNumElements())
@@ -3516,8 +3515,6 @@ bool VectorCombine::shrinkPhiOfShuffles(Instruction &I) {
 
   // Take the difference of the two shuffle masks at each index. Ignore poison
   // values at the same index in both masks.
-  ArrayRef Mask0 = Shuf0->getShuffleMask();
-  ArrayRef Mask1 = Shuf1->getShuffleMask();
   SmallVector<int, 16> NewMask;
   NewMask.reserve(Mask0.size());
 
@@ -3559,22 +3556,22 @@ bool VectorCombine::shrinkPhiOfShuffles(Instruction &I) {
 
   // Create new shuffles and narrowed phi.
   auto Builder = IRBuilder(&I);
-  Builder.SetInsertPoint(Shuf0);
-  Builder.SetCurrentDebugLocation(Shuf0->getDebugLoc());
+  Builder.SetInsertPoint(Shuf);
+  Builder.SetCurrentDebugLocation(Shuf->getDebugLoc());
   auto *PoisonVal = PoisonValue::get(InputVT);
-  auto *NewShuf0 = Builder.CreateShuffleVector(Op0, PoisonVal, NewMask);
+  auto *NewShuf0 = Builder.CreateShuffleVector(Op, PoisonVal, NewMask);
 
   Builder.SetInsertPoint(Phi);
   Builder.SetCurrentDebugLocation(Phi->getDebugLoc());
   auto *NewPhi = Builder.CreatePHI(NewShuf0->getType(), 2u);
   NewPhi->addIncoming(NewShuf0, Phi->getIncomingBlock(0u));
-  NewPhi->addIncoming(Op1, Phi->getIncomingBlock(1u));
+  NewPhi->addIncoming(Op, Phi->getIncomingBlock(1u));
 
   Builder.SetInsertPoint(*NewPhi->getInsertionPointAfterDef());
   PoisonVal = PoisonValue::get(NewPhi->getType());
-  auto *NewShuf2 = Builder.CreateShuffleVector(NewPhi, PoisonVal, Mask1);
+  auto *NewShuf1 = Builder.CreateShuffleVector(NewPhi, PoisonVal, Mask1);
 
-  replaceValue(*Phi, *NewShuf2);
+  replaceValue(*Phi, *NewShuf1);
   return true;
 }
 

>From e4f333b8b0ed16733fd7f0d5147b44a11383f960 Mon Sep 17 00:00:00 2001
From: Leon Clark <leoclark at amd.com>
Date: Mon, 19 May 2025 18:30:08 +0100
Subject: [PATCH 5/5] Formatting.

---
 llvm/lib/Transforms/Vectorize/VectorCombine.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index dca3991691d07..11ccc72614d0d 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -3497,9 +3497,9 @@ bool VectorCombine::shrinkPhiOfShuffles(Instruction &I) {
   ArrayRef<int> Mask0;
   ArrayRef<int> Mask1;
 
-  if (!match(Phi->getOperand(0u), 
+  if (!match(Phi->getOperand(0u),
              m_OneUse(m_Shuffle(m_Value(Op), m_Poison(), m_Mask(Mask0)))) ||
-      !match(Phi->getOperand(1u), 
+      !match(Phi->getOperand(1u),
              m_OneUse(m_Shuffle(m_Specific(Op), m_Poison(), m_Mask(Mask1)))))
     return false;
 



More information about the llvm-commits mailing list