[clang] [llvm] [AggressiveInstCombine] Shrink loads used in shufflevector rebroadcasts. (PR #128938)

Leon Clark via cfe-commits cfe-commits at lists.llvm.org
Fri Feb 28 09:26:23 PST 2025


https://github.com/PeddleSpam updated https://github.com/llvm/llvm-project/pull/128938

>From f1c09277af268256fce71df9a858959b69385ef1 Mon Sep 17 00:00:00 2001
From: Leon Clark <leoclark at amd.com>
Date: Wed, 26 Feb 2025 15:59:02 +0000
Subject: [PATCH 1/3] [AggressiveInstCombine] Shrink loads used in
 shufflevector rebroadcasts.

Attempt to shrink the size of vector loads where only some of the incoming lanes are used for rebroadcasts in shufflevector instructions.
---
 .../load-shufflevector.ll                     | 345 ++++++++++++++++++
 1 file changed, 345 insertions(+)
 create mode 100644 llvm/test/Transforms/AggressiveInstCombine/load-shufflevector.ll

diff --git a/llvm/test/Transforms/AggressiveInstCombine/load-shufflevector.ll b/llvm/test/Transforms/AggressiveInstCombine/load-shufflevector.ll
new file mode 100644
index 0000000000000..3f6c8334e61cf
--- /dev/null
+++ b/llvm/test/Transforms/AggressiveInstCombine/load-shufflevector.ll
@@ -0,0 +1,345 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -passes=aggressive-instcombine -S < %s | FileCheck %s
+
+define <8 x half> @shuffle_v4_v8f16_r0_1(ptr addrspace(1) nocapture readonly %arg0) local_unnamed_addr {
+; CHECK-LABEL: define <8 x half> @shuffle_v4_v8f16_r0_1(
+; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[VAL0:%.*]] = load <4 x half>, ptr addrspace(1) [[ARG0]], align 32
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x half> [[VAL0]], <4 x half> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    ret <8 x half> [[TMP1]]
+;
+entry:
+  %val0 = load <4 x half>, ptr addrspace(1) %arg0, align 32
+  %val1 = shufflevector <4 x half> %val0, <4 x half> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
+  ret <8 x half> %val1
+}
+
+define <8 x half> @shuffle_v4_v8f16_r0_2(ptr addrspace(1) nocapture readonly %arg0) local_unnamed_addr {
+; CHECK-LABEL: define <8 x half> @shuffle_v4_v8f16_r0_2(
+; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[VAL0:%.*]] = load <4 x half>, ptr addrspace(1) [[ARG0]], align 32
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x half> [[VAL0]], <4 x half> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    ret <8 x half> [[TMP1]]
+;
+entry:
+  %val0 = load <4 x half>, ptr addrspace(1) %arg0, align 32
+  %val1 = shufflevector <4 x half> %val0, <4 x half> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 2, i32 2, i32 2, i32 2>
+  ret <8 x half> %val1
+}
+
+define <4 x half> @shuffle_v4_v4f16_r1_2(ptr addrspace(1) nocapture readonly %arg0) local_unnamed_addr {
+; CHECK-LABEL: define <4 x half> @shuffle_v4_v4f16_r1_2(
+; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[VAL0:%.*]] = load <4 x half>, ptr addrspace(1) [[ARG0]], align 32
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x half> [[VAL0]], <4 x half> poison, <4 x i32> <i32 1, i32 1, i32 2, i32 2>
+; CHECK-NEXT:    ret <4 x half> [[TMP1]]
+;
+entry:
+  %val0 = load <4 x half>, ptr addrspace(1) %arg0, align 32
+  %val1 = shufflevector <4 x half> %val0, <4 x half> poison, <4 x i32> <i32 1, i32 1, i32 2, i32 2>
+  ret <4 x half> %val1
+}
+
+define <8 x half> @shuffle_v4_v8f16_r1_2(ptr addrspace(1) nocapture readonly %arg0) local_unnamed_addr {
+; CHECK-LABEL: define <8 x half> @shuffle_v4_v8f16_r1_2(
+; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[VAL0:%.*]] = load <4 x half>, ptr addrspace(1) [[ARG0]], align 32
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x half> [[VAL0]], <4 x half> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    ret <8 x half> [[TMP1]]
+;
+entry:
+  %val0 = load <4 x half>, ptr addrspace(1) %arg0, align 32
+  %val1 = shufflevector <4 x half> %val0, <4 x half> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2>
+  ret <8 x half> %val1
+}
+
+define <8 x half> @shuffle_v4_v8f16_cond_r0_1(ptr addrspace(1) nocapture readonly %arg0, i1 %cond) local_unnamed_addr {
+; CHECK-LABEL: define <8 x half> @shuffle_v4_v8f16_cond_r0_1(
+; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]], i1 [[COND:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[VAL0:%.*]] = load <4 x half>, ptr addrspace(1) [[ARG0]], align 32
+; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK:       [[THEN]]:
+; CHECK-NEXT:    [[VAL1:%.*]] = shufflevector <4 x half> [[VAL0]], <4 x half> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT:    br label %[[FINALLY:.*]]
+; CHECK:       [[ELSE]]:
+; CHECK-NEXT:    [[VAL2:%.*]] = shufflevector <4 x half> [[VAL0]], <4 x half> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    br label %[[FINALLY]]
+; CHECK:       [[FINALLY]]:
+; CHECK-NEXT:    [[VAL3:%.*]] = phi <8 x half> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT:    ret <8 x half> [[VAL3]]
+;
+entry:
+  %val0 = load <4 x half>, ptr addrspace(1) %arg0, align 32
+  br i1 %cond, label %then, label %else
+
+then:
+  %val1 = shufflevector <4 x half> %val0, <4 x half> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  br label %finally
+
+else:
+  %val2 = shufflevector <4 x half> %val0, <4 x half> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  br label %finally
+
+finally:
+  %val3 = phi <8 x half> [ %val1, %then ], [ %val2, %else ]
+  ret <8 x half> %val3
+}
+
+define <4 x half> @shuffle_v4_v4f16_cond_r1_2(ptr addrspace(1) nocapture readonly %arg0, i1 %cond) local_unnamed_addr {
+; CHECK-LABEL: define <4 x half> @shuffle_v4_v4f16_cond_r1_2(
+; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]], i1 [[COND:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[VAL0:%.*]] = load <4 x half>, ptr addrspace(1) [[ARG0]], align 32
+; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK:       [[THEN]]:
+; CHECK-NEXT:    [[VAL1:%.*]] = shufflevector <4 x half> [[VAL0]], <4 x half> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    br label %[[FINALLY:.*]]
+; CHECK:       [[ELSE]]:
+; CHECK-NEXT:    [[VAL2:%.*]] = shufflevector <4 x half> [[VAL0]], <4 x half> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    br label %[[FINALLY]]
+; CHECK:       [[FINALLY]]:
+; CHECK-NEXT:    [[VAL3:%.*]] = phi <4 x half> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT:    ret <4 x half> [[VAL3]]
+;
+entry:
+  %val0 = load <4 x half>, ptr addrspace(1) %arg0, align 32
+  br i1 %cond, label %then, label %else
+
+then:
+  %val1 = shufflevector <4 x half> %val0, <4 x half> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  br label %finally
+
+else:
+  %val2 = shufflevector <4 x half> %val0, <4 x half> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
+  br label %finally
+
+finally:
+  %val3 = phi <4 x half> [ %val1, %then ], [ %val2, %else ]
+  ret <4 x half> %val3
+}
+
+define <8 x half> @shuffle_v4_v8f16_cond_r1_2(ptr addrspace(1) nocapture readonly %arg0, i1 %cond) local_unnamed_addr {
+; CHECK-LABEL: define <8 x half> @shuffle_v4_v8f16_cond_r1_2(
+; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]], i1 [[COND:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[VAL0:%.*]] = load <4 x half>, ptr addrspace(1) [[ARG0]], align 32
+; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK:       [[THEN]]:
+; CHECK-NEXT:    [[VAL1:%.*]] = shufflevector <4 x half> [[VAL0]], <4 x half> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    br label %[[FINALLY:.*]]
+; CHECK:       [[ELSE]]:
+; CHECK-NEXT:    [[VAL2:%.*]] = shufflevector <4 x half> [[VAL0]], <4 x half> poison, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    br label %[[FINALLY]]
+; CHECK:       [[FINALLY]]:
+; CHECK-NEXT:    [[VAL3:%.*]] = phi <8 x half> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT:    ret <8 x half> [[VAL3]]
+;
+entry:
+  %val0 = load <4 x half>, ptr addrspace(1) %arg0, align 32
+  br i1 %cond, label %then, label %else
+
+then:
+  %val1 = shufflevector <4 x half> %val0, <4 x half> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  br label %finally
+
+else:
+  %val2 = shufflevector <4 x half> %val0, <4 x half> poison, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+  br label %finally
+
+finally:
+  %val3 = phi <8 x half> [ %val1, %then ], [ %val2, %else ]
+  ret <8 x half> %val3
+}
+
+define <8 x i32> @shuffle_v4_v8i32_r0_1(ptr addrspace(1) nocapture readonly %arg0) local_unnamed_addr {
+; CHECK-LABEL: define <8 x i32> @shuffle_v4_v8i32_r0_1(
+; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[VAL0:%.*]] = load <4 x i32>, ptr addrspace(1) [[ARG0]], align 32
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[VAL0]], <4 x i32> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
+;
+entry:
+  %val0 = load <4 x i32>, ptr addrspace(1) %arg0, align 32
+  %val1 = shufflevector <4 x i32> %val0, <4 x i32> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
+  ret <8 x i32> %val1
+}
+
+define <8 x i32> @shuffle_v4_v8i32_r0_2(ptr addrspace(1) nocapture readonly %arg0) local_unnamed_addr {
+; CHECK-LABEL: define <8 x i32> @shuffle_v4_v8i32_r0_2(
+; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[VAL0:%.*]] = load <4 x i32>, ptr addrspace(1) [[ARG0]], align 32
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[VAL0]], <4 x i32> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
+;
+entry:
+  %val0 = load <4 x i32>, ptr addrspace(1) %arg0, align 32
+  %val1 = shufflevector <4 x i32> %val0, <4 x i32> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 2, i32 2, i32 2, i32 2>
+  ret <8 x i32> %val1
+}
+
+define <4 x i32> @shuffle_v4_v4i32_r1_2(ptr addrspace(1) nocapture readonly %arg0) local_unnamed_addr {
+; CHECK-LABEL: define <4 x i32> @shuffle_v4_v4i32_r1_2(
+; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[VAL0:%.*]] = load <4 x i32>, ptr addrspace(1) [[ARG0]], align 32
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[VAL0]], <4 x i32> poison, <4 x i32> <i32 1, i32 1, i32 2, i32 2>
+; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+;
+entry:
+  %val0 = load <4 x i32>, ptr addrspace(1) %arg0, align 32
+  %val1 = shufflevector <4 x i32> %val0, <4 x i32> poison, <4 x i32> <i32 1, i32 1, i32 2, i32 2>
+  ret <4 x i32> %val1
+}
+
+define <8 x i32> @shuffle_v4_v8i32_r1_2(ptr addrspace(1) nocapture readonly %arg0) local_unnamed_addr {
+; CHECK-LABEL: define <8 x i32> @shuffle_v4_v8i32_r1_2(
+; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[VAL0:%.*]] = load <4 x i32>, ptr addrspace(1) [[ARG0]], align 32
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[VAL0]], <4 x i32> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
+;
+entry:
+  %val0 = load <4 x i32>, ptr addrspace(1) %arg0, align 32
+  %val1 = shufflevector <4 x i32> %val0, <4 x i32> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2>
+  ret <8 x i32> %val1
+}
+
+define <8 x i32> @shuffle_v4_v8i32_cond_r0_1(ptr addrspace(1) nocapture readonly %arg0, i1 %cond) local_unnamed_addr {
+; CHECK-LABEL: define <8 x i32> @shuffle_v4_v8i32_cond_r0_1(
+; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]], i1 [[COND:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[VAL0:%.*]] = load <4 x i32>, ptr addrspace(1) [[ARG0]], align 32
+; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK:       [[THEN]]:
+; CHECK-NEXT:    [[VAL1:%.*]] = shufflevector <4 x i32> [[VAL0]], <4 x i32> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT:    br label %[[FINALLY:.*]]
+; CHECK:       [[ELSE]]:
+; CHECK-NEXT:    [[VAL2:%.*]] = shufflevector <4 x i32> [[VAL0]], <4 x i32> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    br label %[[FINALLY]]
+; CHECK:       [[FINALLY]]:
+; CHECK-NEXT:    [[VAL3:%.*]] = phi <8 x i32> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT:    ret <8 x i32> [[VAL3]]
+;
+entry:
+  %val0 = load <4 x i32>, ptr addrspace(1) %arg0, align 32
+  br i1 %cond, label %then, label %else
+
+then:
+  %val1 = shufflevector <4 x i32> %val0, <4 x i32> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  br label %finally
+
+else:
+  %val2 = shufflevector <4 x i32> %val0, <4 x i32> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  br label %finally
+
+finally:
+  %val3 = phi <8 x i32> [ %val1, %then ], [ %val2, %else ]
+  ret <8 x i32> %val3
+}
+
+define <8 x i32> @shuffle_v4_v8i32_cond_r0_2(ptr addrspace(1) nocapture readonly %arg0, i1 %cond) local_unnamed_addr {
+; CHECK-LABEL: define <8 x i32> @shuffle_v4_v8i32_cond_r0_2(
+; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]], i1 [[COND:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[VAL0:%.*]] = load <4 x i32>, ptr addrspace(1) [[ARG0]], align 32
+; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK:       [[THEN]]:
+; CHECK-NEXT:    [[VAL1:%.*]] = shufflevector <4 x i32> [[VAL0]], <4 x i32> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT:    br label %[[FINALLY:.*]]
+; CHECK:       [[ELSE]]:
+; CHECK-NEXT:    [[VAL2:%.*]] = shufflevector <4 x i32> [[VAL0]], <4 x i32> poison, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    br label %[[FINALLY]]
+; CHECK:       [[FINALLY]]:
+; CHECK-NEXT:    [[VAL3:%.*]] = phi <8 x i32> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT:    ret <8 x i32> [[VAL3]]
+;
+entry:
+  %val0 = load <4 x i32>, ptr addrspace(1) %arg0, align 32
+  br i1 %cond, label %then, label %else
+
+then:
+  %val1 = shufflevector <4 x i32> %val0, <4 x i32> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  br label %finally
+
+else:
+  %val2 = shufflevector <4 x i32> %val0, <4 x i32> poison, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+  br label %finally
+
+finally:
+  %val3 = phi <8 x i32> [ %val1, %then ], [ %val2, %else ]
+  ret <8 x i32> %val3
+}
+
+define <4 x i32> @shuffle_v4_v4i32_cond_r1_2(ptr addrspace(1) nocapture readonly %arg0, i1 %cond) local_unnamed_addr {
+; CHECK-LABEL: define <4 x i32> @shuffle_v4_v4i32_cond_r1_2(
+; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]], i1 [[COND:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[VAL0:%.*]] = load <4 x i32>, ptr addrspace(1) [[ARG0]], align 32
+; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK:       [[THEN]]:
+; CHECK-NEXT:    [[VAL1:%.*]] = shufflevector <4 x i32> [[VAL0]], <4 x i32> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    br label %[[FINALLY:.*]]
+; CHECK:       [[ELSE]]:
+; CHECK-NEXT:    [[VAL2:%.*]] = shufflevector <4 x i32> [[VAL0]], <4 x i32> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    br label %[[FINALLY]]
+; CHECK:       [[FINALLY]]:
+; CHECK-NEXT:    [[VAL3:%.*]] = phi <4 x i32> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT:    ret <4 x i32> [[VAL3]]
+;
+entry:
+  %val0 = load <4 x i32>, ptr addrspace(1) %arg0, align 32
+  br i1 %cond, label %then, label %else
+
+then:
+  %val1 = shufflevector <4 x i32> %val0, <4 x i32> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  br label %finally
+
+else:
+  %val2 = shufflevector <4 x i32> %val0, <4 x i32> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
+  br label %finally
+
+finally:
+  %val3 = phi <4 x i32> [ %val1, %then ], [ %val2, %else ]
+  ret <4 x i32> %val3
+}
+
+define <8 x i32> @shuffle_v4_v8i32_cond_r1_2(ptr addrspace(1) nocapture readonly %arg0, i1 %cond) local_unnamed_addr {
+; CHECK-LABEL: define <8 x i32> @shuffle_v4_v8i32_cond_r1_2(
+; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]], i1 [[COND:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[VAL0:%.*]] = load <4 x i32>, ptr addrspace(1) [[ARG0]], align 32
+; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK:       [[THEN]]:
+; CHECK-NEXT:    [[VAL1:%.*]] = shufflevector <4 x i32> [[VAL0]], <4 x i32> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    br label %[[FINALLY:.*]]
+; CHECK:       [[ELSE]]:
+; CHECK-NEXT:    [[VAL2:%.*]] = shufflevector <4 x i32> [[VAL0]], <4 x i32> poison, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    br label %[[FINALLY]]
+; CHECK:       [[FINALLY]]:
+; CHECK-NEXT:    [[VAL3:%.*]] = phi <8 x i32> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT:    ret <8 x i32> [[VAL3]]
+;
+entry:
+  %val0 = load <4 x i32>, ptr addrspace(1) %arg0, align 32
+  br i1 %cond, label %then, label %else
+
+then:
+  %val1 = shufflevector <4 x i32> %val0, <4 x i32> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  br label %finally
+
+else:
+  %val2 = shufflevector <4 x i32> %val0, <4 x i32> poison, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+  br label %finally
+
+finally:
+  %val3 = phi <8 x i32> [ %val1, %then ], [ %val2, %else ]
+  ret <8 x i32> %val3
+}

>From b8ec65331def0fce1f70d203f3473accbdd77865 Mon Sep 17 00:00:00 2001
From: Leon Clark <leoclark at amd.com>
Date: Wed, 26 Feb 2025 21:18:30 +0000
Subject: [PATCH 2/3] Add implementation and update tests.

---
 clang/test/CodeGenOpenCL/preserve_vec3.cl     | 20 ++---
 .../AggressiveInstCombine.cpp                 | 90 +++++++++++++++++++
 .../load-shufflevector.ll                     | 88 +++++++++---------
 3 files changed, 144 insertions(+), 54 deletions(-)

diff --git a/clang/test/CodeGenOpenCL/preserve_vec3.cl b/clang/test/CodeGenOpenCL/preserve_vec3.cl
index 49ebae6fc7013..0538eac4029bb 100644
--- a/clang/test/CodeGenOpenCL/preserve_vec3.cl
+++ b/clang/test/CodeGenOpenCL/preserve_vec3.cl
@@ -11,8 +11,8 @@ typedef float float4 __attribute__((ext_vector_type(4)));
 // CHECK-LABEL: define dso_local spir_kernel void @foo(
 // CHECK-SAME: ptr addrspace(1) noundef readonly align 16 captures(none) [[A:%.*]], ptr addrspace(1) noundef writeonly align 16 captures(none) initializes((0, 16)) [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !kernel_arg_addr_space [[META3:![0-9]+]] !kernel_arg_access_qual [[META4:![0-9]+]] !kernel_arg_type [[META5:![0-9]+]] !kernel_arg_base_type [[META6:![0-9]+]] !kernel_arg_type_qual [[META7:![0-9]+]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[LOADVECN:%.*]] = load <4 x float>, ptr addrspace(1) [[A]], align 16
-// CHECK-NEXT:    [[EXTRACTVEC1:%.*]] = shufflevector <4 x float> [[LOADVECN]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
+// CHECK-NEXT:    [[LOADVECN:%.*]] = load <3 x float>, ptr addrspace(1) [[A]], align 16
+// CHECK-NEXT:    [[EXTRACTVEC1:%.*]] = shufflevector <3 x float> [[LOADVECN]], <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
 // CHECK-NEXT:    store <4 x float> [[EXTRACTVEC1]], ptr addrspace(1) [[B]], align 16, !tbaa [[TBAA8:![0-9]+]]
 // CHECK-NEXT:    ret void
 //
@@ -23,8 +23,8 @@ void kernel foo(global float3 *a, global float3 *b) {
 // CHECK-LABEL: define dso_local spir_kernel void @float4_to_float3(
 // CHECK-SAME: ptr addrspace(1) noundef writeonly align 16 captures(none) initializes((0, 16)) [[A:%.*]], ptr addrspace(1) noundef readonly align 16 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !kernel_arg_addr_space [[META3]] !kernel_arg_access_qual [[META4]] !kernel_arg_type [[META11:![0-9]+]] !kernel_arg_base_type [[META12:![0-9]+]] !kernel_arg_type_qual [[META7]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr addrspace(1) [[B]], align 16, !tbaa [[TBAA8]]
-// CHECK-NEXT:    [[EXTRACTVEC:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
+// CHECK-NEXT:    [[TMP0:%.*]] = load <3 x float>, ptr addrspace(1) [[B]], align 16, !tbaa [[TBAA8]]
+// CHECK-NEXT:    [[EXTRACTVEC:%.*]] = shufflevector <3 x float> [[TMP0]], <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
 // CHECK-NEXT:    store <4 x float> [[EXTRACTVEC]], ptr addrspace(1) [[A]], align 16, !tbaa [[TBAA8]]
 // CHECK-NEXT:    ret void
 //
@@ -35,8 +35,8 @@ void kernel float4_to_float3(global float3 *a, global float4 *b) {
 // CHECK-LABEL: define dso_local spir_kernel void @float3_to_float4(
 // CHECK-SAME: ptr addrspace(1) noundef readonly align 16 captures(none) [[A:%.*]], ptr addrspace(1) noundef writeonly align 16 captures(none) initializes((0, 16)) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !kernel_arg_addr_space [[META3]] !kernel_arg_access_qual [[META4]] !kernel_arg_type [[META11]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META7]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[LOADVECN:%.*]] = load <4 x float>, ptr addrspace(1) [[A]], align 16
-// CHECK-NEXT:    [[ASTYPE:%.*]] = shufflevector <4 x float> [[LOADVECN]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
+// CHECK-NEXT:    [[LOADVECN:%.*]] = load <3 x float>, ptr addrspace(1) [[A]], align 16
+// CHECK-NEXT:    [[ASTYPE:%.*]] = shufflevector <3 x float> [[LOADVECN]], <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
 // CHECK-NEXT:    store <4 x float> [[ASTYPE]], ptr addrspace(1) [[B]], align 16, !tbaa [[TBAA8]]
 // CHECK-NEXT:    ret void
 //
@@ -47,8 +47,8 @@ void kernel float3_to_float4(global float3 *a, global float4 *b) {
 // CHECK-LABEL: define dso_local spir_kernel void @float3_to_double2(
 // CHECK-SAME: ptr addrspace(1) noundef readonly align 16 captures(none) [[A:%.*]], ptr addrspace(1) noundef writeonly align 16 captures(none) initializes((0, 16)) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !kernel_arg_addr_space [[META3]] !kernel_arg_access_qual [[META4]] !kernel_arg_type [[META13:![0-9]+]] !kernel_arg_base_type [[META14:![0-9]+]] !kernel_arg_type_qual [[META7]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[LOADVECN:%.*]] = load <4 x float>, ptr addrspace(1) [[A]], align 16
-// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <4 x float> [[LOADVECN]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
+// CHECK-NEXT:    [[LOADVECN:%.*]] = load <3 x float>, ptr addrspace(1) [[A]], align 16
+// CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <3 x float> [[LOADVECN]], <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
 // CHECK-NEXT:    store <4 x float> [[TMP0]], ptr addrspace(1) [[B]], align 16, !tbaa [[TBAA8]]
 // CHECK-NEXT:    ret void
 //
@@ -59,8 +59,8 @@ void kernel float3_to_double2(global float3 *a, global double2 *b) {
 // CHECK-LABEL: define dso_local spir_kernel void @char8_to_short3(
 // CHECK-SAME: ptr addrspace(1) noundef writeonly align 8 captures(none) initializes((0, 8)) [[A:%.*]], ptr addrspace(1) noundef readonly align 8 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !kernel_arg_addr_space [[META3]] !kernel_arg_access_qual [[META4]] !kernel_arg_type [[META15:![0-9]+]] !kernel_arg_base_type [[META16:![0-9]+]] !kernel_arg_type_qual [[META7]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i16>, ptr addrspace(1) [[B]], align 8, !tbaa [[TBAA8]]
-// CHECK-NEXT:    [[EXTRACTVEC:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
+// CHECK-NEXT:    [[TMP0:%.*]] = load <3 x i16>, ptr addrspace(1) [[B]], align 8, !tbaa [[TBAA8]]
+// CHECK-NEXT:    [[EXTRACTVEC:%.*]] = shufflevector <3 x i16> [[TMP0]], <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
 // CHECK-NEXT:    store <4 x i16> [[EXTRACTVEC]], ptr addrspace(1) [[A]], align 8, !tbaa [[TBAA8]]
 // CHECK-NEXT:    ret void
 //
diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
index fe7b3b1676e08..cbdf99316e9e3 100644
--- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
+++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -915,6 +915,95 @@ static bool foldPatternedLoads(Instruction &I, const DataLayout &DL) {
   return true;
 }
 
+// If `I` is a load instruction, used only by shufflevector instructions with 
+// poison values, attempt to shrink the load to only the lanes being used.
+static bool shrinkLoadsForBroadcast(Instruction &I) {
+  auto *OldLoad = dyn_cast<LoadInst>(&I);
+  if (!OldLoad)
+    return false;
+
+  auto *VecTy = dyn_cast<FixedVectorType>(I.getType());
+  if (!VecTy)
+    return false;
+
+  auto IsPoisonOrUndef = [](Value *V) -> bool {
+    if (auto *C = dyn_cast<Constant>(V)) {
+      return isa<PoisonValue>(C) || isa<UndefValue>(C);
+    }
+    return false;
+  };
+
+  using IndexRange = std::pair<unsigned, unsigned>;
+  auto GetIndexRangeInShuffles = [&]() -> std::optional<IndexRange> {
+    auto OutputRange = IndexRange(VecTy->getNumElements(), 0u);
+    for (auto &Use: I.uses()) {
+      // All uses must be ShuffleVector instructions.
+      auto *Shuffle = dyn_cast<ShuffleVectorInst>(Use.getUser());
+      if (!Shuffle)
+        return {};
+
+      // Get index range for value.
+      auto *Op0 = Shuffle->getOperand(0u);
+      auto *Op1 = Shuffle->getOperand(1u);
+      if (!IsPoisonOrUndef(Op1))
+        return {};
+
+      // Find the min and max indices used by the ShuffleVector instruction.
+      auto Mask = Shuffle->getShuffleMask();
+      auto *Op0Ty = cast<FixedVectorType>(Op0->getType());
+      auto NumElems = Op0Ty->getNumElements();
+
+      for (unsigned Index: Mask) {
+        if (Index < NumElems) {
+          OutputRange.first = std::min(Index, OutputRange.first);
+          OutputRange.second = std::max(Index, OutputRange.second);
+        }
+      }
+    }
+    return OutputRange;
+  };
+
+  if (auto Indices = GetIndexRangeInShuffles()) {
+    auto OldSize = VecTy->getNumElements();
+    auto NewSize = Indices->second + 1u;
+
+    if (NewSize < OldSize) {
+      auto Builder = IRBuilder(&I);
+      Builder.SetCurrentDebugLocation(I.getDebugLoc());
+
+      // Create new load of smaller vector.
+      auto *ElemTy = VecTy->getElementType();
+      auto *NewVecTy = FixedVectorType::get(ElemTy, NewSize);
+      auto *NewLoad = cast<LoadInst>(
+        Builder.CreateLoad(NewVecTy, OldLoad->getPointerOperand()));
+      NewLoad->copyMetadata(I);
+
+      // Replace all users.
+      auto OldShuffles = SmallVector<ShuffleVectorInst*, 4u>{};
+      for (auto &Use: I.uses()) {
+        auto *Shuffle = cast<ShuffleVectorInst>(Use.getUser());
+        
+        Builder.SetInsertPoint(Shuffle);
+        Builder.SetCurrentDebugLocation(Shuffle->getDebugLoc());
+        auto *NewShuffle = Builder.CreateShuffleVector(
+          NewLoad, PoisonValue::get(NewVecTy), Shuffle->getShuffleMask()
+        );
+
+        Shuffle->replaceAllUsesWith(NewShuffle);
+        OldShuffles.push_back(Shuffle);
+      }
+
+      // Erase old users.
+      for (auto *Shuffle: OldShuffles)
+        Shuffle->eraseFromParent();
+
+      I.eraseFromParent();
+      return true;
+    }
+  }
+  return false;
+}
+
 namespace {
 class StrNCmpInliner {
 public:
@@ -1251,6 +1340,7 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT,
       MadeChange |= tryToRecognizeTableBasedCttz(I);
       MadeChange |= foldConsecutiveLoads(I, DL, TTI, AA, DT);
       MadeChange |= foldPatternedLoads(I, DL);
+      MadeChange |= shrinkLoadsForBroadcast(I);
       // NOTE: This function introduces erasing of the instruction `I`, so it
       // needs to be called at the end of this sequence, otherwise we may make
       // bugs.
diff --git a/llvm/test/Transforms/AggressiveInstCombine/load-shufflevector.ll b/llvm/test/Transforms/AggressiveInstCombine/load-shufflevector.ll
index 3f6c8334e61cf..57006f2c65380 100644
--- a/llvm/test/Transforms/AggressiveInstCombine/load-shufflevector.ll
+++ b/llvm/test/Transforms/AggressiveInstCombine/load-shufflevector.ll
@@ -5,8 +5,8 @@ define <8 x half> @shuffle_v4_v8f16_r0_1(ptr addrspace(1) nocapture readonly %ar
 ; CHECK-LABEL: define <8 x half> @shuffle_v4_v8f16_r0_1(
 ; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[VAL0:%.*]] = load <4 x half>, ptr addrspace(1) [[ARG0]], align 32
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x half> [[VAL0]], <4 x half> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x half>, ptr addrspace(1) [[ARG0]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x half> [[TMP0]], <2 x half> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
 ; CHECK-NEXT:    ret <8 x half> [[TMP1]]
 ;
 entry:
@@ -19,8 +19,8 @@ define <8 x half> @shuffle_v4_v8f16_r0_2(ptr addrspace(1) nocapture readonly %ar
 ; CHECK-LABEL: define <8 x half> @shuffle_v4_v8f16_r0_2(
 ; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[VAL0:%.*]] = load <4 x half>, ptr addrspace(1) [[ARG0]], align 32
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x half> [[VAL0]], <4 x half> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    [[TMP0:%.*]] = load <3 x half>, ptr addrspace(1) [[ARG0]], align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <3 x half> [[TMP0]], <3 x half> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 2, i32 2, i32 2, i32 2>
 ; CHECK-NEXT:    ret <8 x half> [[TMP1]]
 ;
 entry:
@@ -33,8 +33,8 @@ define <4 x half> @shuffle_v4_v4f16_r1_2(ptr addrspace(1) nocapture readonly %ar
 ; CHECK-LABEL: define <4 x half> @shuffle_v4_v4f16_r1_2(
 ; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[VAL0:%.*]] = load <4 x half>, ptr addrspace(1) [[ARG0]], align 32
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x half> [[VAL0]], <4 x half> poison, <4 x i32> <i32 1, i32 1, i32 2, i32 2>
+; CHECK-NEXT:    [[TMP0:%.*]] = load <3 x half>, ptr addrspace(1) [[ARG0]], align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <3 x half> [[TMP0]], <3 x half> poison, <4 x i32> <i32 1, i32 1, i32 2, i32 2>
 ; CHECK-NEXT:    ret <4 x half> [[TMP1]]
 ;
 entry:
@@ -47,8 +47,8 @@ define <8 x half> @shuffle_v4_v8f16_r1_2(ptr addrspace(1) nocapture readonly %ar
 ; CHECK-LABEL: define <8 x half> @shuffle_v4_v8f16_r1_2(
 ; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[VAL0:%.*]] = load <4 x half>, ptr addrspace(1) [[ARG0]], align 32
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x half> [[VAL0]], <4 x half> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    [[TMP0:%.*]] = load <3 x half>, ptr addrspace(1) [[ARG0]], align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <3 x half> [[TMP0]], <3 x half> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2>
 ; CHECK-NEXT:    ret <8 x half> [[TMP1]]
 ;
 entry:
@@ -61,16 +61,16 @@ define <8 x half> @shuffle_v4_v8f16_cond_r0_1(ptr addrspace(1) nocapture readonl
 ; CHECK-LABEL: define <8 x half> @shuffle_v4_v8f16_cond_r0_1(
 ; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]], i1 [[COND:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[VAL0:%.*]] = load <4 x half>, ptr addrspace(1) [[ARG0]], align 32
+; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x half>, ptr addrspace(1) [[ARG0]], align 4
 ; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
 ; CHECK:       [[THEN]]:
-; CHECK-NEXT:    [[VAL1:%.*]] = shufflevector <4 x half> [[VAL0]], <4 x half> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x half> [[TMP0]], <2 x half> poison, <8 x i32> zeroinitializer
 ; CHECK-NEXT:    br label %[[FINALLY:.*]]
 ; CHECK:       [[ELSE]]:
-; CHECK-NEXT:    [[VAL2:%.*]] = shufflevector <4 x half> [[VAL0]], <4 x half> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <2 x half> [[TMP0]], <2 x half> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
 ; CHECK-NEXT:    br label %[[FINALLY]]
 ; CHECK:       [[FINALLY]]:
-; CHECK-NEXT:    [[VAL3:%.*]] = phi <8 x half> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT:    [[VAL3:%.*]] = phi <8 x half> [ [[TMP1]], %[[THEN]] ], [ [[TMP2]], %[[ELSE]] ]
 ; CHECK-NEXT:    ret <8 x half> [[VAL3]]
 ;
 entry:
@@ -94,16 +94,16 @@ define <4 x half> @shuffle_v4_v4f16_cond_r1_2(ptr addrspace(1) nocapture readonl
 ; CHECK-LABEL: define <4 x half> @shuffle_v4_v4f16_cond_r1_2(
 ; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]], i1 [[COND:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[VAL0:%.*]] = load <4 x half>, ptr addrspace(1) [[ARG0]], align 32
+; CHECK-NEXT:    [[TMP0:%.*]] = load <3 x half>, ptr addrspace(1) [[ARG0]], align 8
 ; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
 ; CHECK:       [[THEN]]:
-; CHECK-NEXT:    [[VAL1:%.*]] = shufflevector <4 x half> [[VAL0]], <4 x half> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <3 x half> [[TMP0]], <3 x half> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
 ; CHECK-NEXT:    br label %[[FINALLY:.*]]
 ; CHECK:       [[ELSE]]:
-; CHECK-NEXT:    [[VAL2:%.*]] = shufflevector <4 x half> [[VAL0]], <4 x half> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <3 x half> [[TMP0]], <3 x half> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
 ; CHECK-NEXT:    br label %[[FINALLY]]
 ; CHECK:       [[FINALLY]]:
-; CHECK-NEXT:    [[VAL3:%.*]] = phi <4 x half> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT:    [[VAL3:%.*]] = phi <4 x half> [ [[TMP1]], %[[THEN]] ], [ [[TMP2]], %[[ELSE]] ]
 ; CHECK-NEXT:    ret <4 x half> [[VAL3]]
 ;
 entry:
@@ -127,16 +127,16 @@ define <8 x half> @shuffle_v4_v8f16_cond_r1_2(ptr addrspace(1) nocapture readonl
 ; CHECK-LABEL: define <8 x half> @shuffle_v4_v8f16_cond_r1_2(
 ; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]], i1 [[COND:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[VAL0:%.*]] = load <4 x half>, ptr addrspace(1) [[ARG0]], align 32
+; CHECK-NEXT:    [[TMP0:%.*]] = load <3 x half>, ptr addrspace(1) [[ARG0]], align 8
 ; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
 ; CHECK:       [[THEN]]:
-; CHECK-NEXT:    [[VAL1:%.*]] = shufflevector <4 x half> [[VAL0]], <4 x half> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <3 x half> [[TMP0]], <3 x half> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
 ; CHECK-NEXT:    br label %[[FINALLY:.*]]
 ; CHECK:       [[ELSE]]:
-; CHECK-NEXT:    [[VAL2:%.*]] = shufflevector <4 x half> [[VAL0]], <4 x half> poison, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <3 x half> [[TMP0]], <3 x half> poison, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
 ; CHECK-NEXT:    br label %[[FINALLY]]
 ; CHECK:       [[FINALLY]]:
-; CHECK-NEXT:    [[VAL3:%.*]] = phi <8 x half> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT:    [[VAL3:%.*]] = phi <8 x half> [ [[TMP1]], %[[THEN]] ], [ [[TMP2]], %[[ELSE]] ]
 ; CHECK-NEXT:    ret <8 x half> [[VAL3]]
 ;
 entry:
@@ -160,8 +160,8 @@ define <8 x i32> @shuffle_v4_v8i32_r0_1(ptr addrspace(1) nocapture readonly %arg
 ; CHECK-LABEL: define <8 x i32> @shuffle_v4_v8i32_r0_1(
 ; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[VAL0:%.*]] = load <4 x i32>, ptr addrspace(1) [[ARG0]], align 32
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[VAL0]], <4 x i32> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i32>, ptr addrspace(1) [[ARG0]], align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
 ; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
 ;
 entry:
@@ -174,8 +174,8 @@ define <8 x i32> @shuffle_v4_v8i32_r0_2(ptr addrspace(1) nocapture readonly %arg
 ; CHECK-LABEL: define <8 x i32> @shuffle_v4_v8i32_r0_2(
 ; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[VAL0:%.*]] = load <4 x i32>, ptr addrspace(1) [[ARG0]], align 32
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[VAL0]], <4 x i32> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    [[TMP0:%.*]] = load <3 x i32>, ptr addrspace(1) [[ARG0]], align 16
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <3 x i32> [[TMP0]], <3 x i32> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 2, i32 2, i32 2, i32 2>
 ; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
 ;
 entry:
@@ -188,8 +188,8 @@ define <4 x i32> @shuffle_v4_v4i32_r1_2(ptr addrspace(1) nocapture readonly %arg
 ; CHECK-LABEL: define <4 x i32> @shuffle_v4_v4i32_r1_2(
 ; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[VAL0:%.*]] = load <4 x i32>, ptr addrspace(1) [[ARG0]], align 32
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[VAL0]], <4 x i32> poison, <4 x i32> <i32 1, i32 1, i32 2, i32 2>
+; CHECK-NEXT:    [[TMP0:%.*]] = load <3 x i32>, ptr addrspace(1) [[ARG0]], align 16
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <3 x i32> [[TMP0]], <3 x i32> poison, <4 x i32> <i32 1, i32 1, i32 2, i32 2>
 ; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
 ;
 entry:
@@ -202,8 +202,8 @@ define <8 x i32> @shuffle_v4_v8i32_r1_2(ptr addrspace(1) nocapture readonly %arg
 ; CHECK-LABEL: define <8 x i32> @shuffle_v4_v8i32_r1_2(
 ; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[VAL0:%.*]] = load <4 x i32>, ptr addrspace(1) [[ARG0]], align 32
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[VAL0]], <4 x i32> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    [[TMP0:%.*]] = load <3 x i32>, ptr addrspace(1) [[ARG0]], align 16
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <3 x i32> [[TMP0]], <3 x i32> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2>
 ; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
 ;
 entry:
@@ -216,16 +216,16 @@ define <8 x i32> @shuffle_v4_v8i32_cond_r0_1(ptr addrspace(1) nocapture readonly
 ; CHECK-LABEL: define <8 x i32> @shuffle_v4_v8i32_cond_r0_1(
 ; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]], i1 [[COND:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[VAL0:%.*]] = load <4 x i32>, ptr addrspace(1) [[ARG0]], align 32
+; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i32>, ptr addrspace(1) [[ARG0]], align 8
 ; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
 ; CHECK:       [[THEN]]:
-; CHECK-NEXT:    [[VAL1:%.*]] = shufflevector <4 x i32> [[VAL0]], <4 x i32> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <8 x i32> zeroinitializer
 ; CHECK-NEXT:    br label %[[FINALLY:.*]]
 ; CHECK:       [[ELSE]]:
-; CHECK-NEXT:    [[VAL2:%.*]] = shufflevector <4 x i32> [[VAL0]], <4 x i32> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
 ; CHECK-NEXT:    br label %[[FINALLY]]
 ; CHECK:       [[FINALLY]]:
-; CHECK-NEXT:    [[VAL3:%.*]] = phi <8 x i32> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT:    [[VAL3:%.*]] = phi <8 x i32> [ [[TMP1]], %[[THEN]] ], [ [[TMP2]], %[[ELSE]] ]
 ; CHECK-NEXT:    ret <8 x i32> [[VAL3]]
 ;
 entry:
@@ -249,16 +249,16 @@ define <8 x i32> @shuffle_v4_v8i32_cond_r0_2(ptr addrspace(1) nocapture readonly
 ; CHECK-LABEL: define <8 x i32> @shuffle_v4_v8i32_cond_r0_2(
 ; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]], i1 [[COND:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[VAL0:%.*]] = load <4 x i32>, ptr addrspace(1) [[ARG0]], align 32
+; CHECK-NEXT:    [[TMP0:%.*]] = load <3 x i32>, ptr addrspace(1) [[ARG0]], align 16
 ; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
 ; CHECK:       [[THEN]]:
-; CHECK-NEXT:    [[VAL1:%.*]] = shufflevector <4 x i32> [[VAL0]], <4 x i32> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <3 x i32> [[TMP0]], <3 x i32> poison, <8 x i32> zeroinitializer
 ; CHECK-NEXT:    br label %[[FINALLY:.*]]
 ; CHECK:       [[ELSE]]:
-; CHECK-NEXT:    [[VAL2:%.*]] = shufflevector <4 x i32> [[VAL0]], <4 x i32> poison, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <3 x i32> [[TMP0]], <3 x i32> poison, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
 ; CHECK-NEXT:    br label %[[FINALLY]]
 ; CHECK:       [[FINALLY]]:
-; CHECK-NEXT:    [[VAL3:%.*]] = phi <8 x i32> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT:    [[VAL3:%.*]] = phi <8 x i32> [ [[TMP1]], %[[THEN]] ], [ [[TMP2]], %[[ELSE]] ]
 ; CHECK-NEXT:    ret <8 x i32> [[VAL3]]
 ;
 entry:
@@ -282,16 +282,16 @@ define <4 x i32> @shuffle_v4_v4i32_cond_r1_2(ptr addrspace(1) nocapture readonly
 ; CHECK-LABEL: define <4 x i32> @shuffle_v4_v4i32_cond_r1_2(
 ; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]], i1 [[COND:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[VAL0:%.*]] = load <4 x i32>, ptr addrspace(1) [[ARG0]], align 32
+; CHECK-NEXT:    [[TMP0:%.*]] = load <3 x i32>, ptr addrspace(1) [[ARG0]], align 16
 ; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
 ; CHECK:       [[THEN]]:
-; CHECK-NEXT:    [[VAL1:%.*]] = shufflevector <4 x i32> [[VAL0]], <4 x i32> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <3 x i32> [[TMP0]], <3 x i32> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
 ; CHECK-NEXT:    br label %[[FINALLY:.*]]
 ; CHECK:       [[ELSE]]:
-; CHECK-NEXT:    [[VAL2:%.*]] = shufflevector <4 x i32> [[VAL0]], <4 x i32> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <3 x i32> [[TMP0]], <3 x i32> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
 ; CHECK-NEXT:    br label %[[FINALLY]]
 ; CHECK:       [[FINALLY]]:
-; CHECK-NEXT:    [[VAL3:%.*]] = phi <4 x i32> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT:    [[VAL3:%.*]] = phi <4 x i32> [ [[TMP1]], %[[THEN]] ], [ [[TMP2]], %[[ELSE]] ]
 ; CHECK-NEXT:    ret <4 x i32> [[VAL3]]
 ;
 entry:
@@ -315,16 +315,16 @@ define <8 x i32> @shuffle_v4_v8i32_cond_r1_2(ptr addrspace(1) nocapture readonly
 ; CHECK-LABEL: define <8 x i32> @shuffle_v4_v8i32_cond_r1_2(
 ; CHECK-SAME: ptr addrspace(1) readonly captures(none) [[ARG0:%.*]], i1 [[COND:%.*]]) local_unnamed_addr {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[VAL0:%.*]] = load <4 x i32>, ptr addrspace(1) [[ARG0]], align 32
+; CHECK-NEXT:    [[TMP0:%.*]] = load <3 x i32>, ptr addrspace(1) [[ARG0]], align 16
 ; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
 ; CHECK:       [[THEN]]:
-; CHECK-NEXT:    [[VAL1:%.*]] = shufflevector <4 x i32> [[VAL0]], <4 x i32> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <3 x i32> [[TMP0]], <3 x i32> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
 ; CHECK-NEXT:    br label %[[FINALLY:.*]]
 ; CHECK:       [[ELSE]]:
-; CHECK-NEXT:    [[VAL2:%.*]] = shufflevector <4 x i32> [[VAL0]], <4 x i32> poison, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <3 x i32> [[TMP0]], <3 x i32> poison, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
 ; CHECK-NEXT:    br label %[[FINALLY]]
 ; CHECK:       [[FINALLY]]:
-; CHECK-NEXT:    [[VAL3:%.*]] = phi <8 x i32> [ [[VAL1]], %[[THEN]] ], [ [[VAL2]], %[[ELSE]] ]
+; CHECK-NEXT:    [[VAL3:%.*]] = phi <8 x i32> [ [[TMP1]], %[[THEN]] ], [ [[TMP2]], %[[ELSE]] ]
 ; CHECK-NEXT:    ret <8 x i32> [[VAL3]]
 ;
 entry:

>From 0f9aada48f0cc08726a0e3f79ab78bead7991d3e Mon Sep 17 00:00:00 2001
From: Leon Clark <leoclark at amd.com>
Date: Fri, 28 Feb 2025 17:25:35 +0000
Subject: [PATCH 3/3] Fix broken tests.

---
 .../builtins-systemz-zvector-constrained.c    |  4 +-
 .../SystemZ/builtins-systemz-zvector.c        | 52 +++++++++----------
 .../builtins-systemz-zvector2-constrained.c   | 12 ++---
 .../SystemZ/builtins-systemz-zvector2.c       | 12 ++---
 .../AggressiveInstCombine.cpp                 | 19 ++++---
 5 files changed, 49 insertions(+), 50 deletions(-)

diff --git a/clang/test/CodeGen/SystemZ/builtins-systemz-zvector-constrained.c b/clang/test/CodeGen/SystemZ/builtins-systemz-zvector-constrained.c
index 4993df20df143..e335c363ecb48 100644
--- a/clang/test/CodeGen/SystemZ/builtins-systemz-zvector-constrained.c
+++ b/clang/test/CodeGen/SystemZ/builtins-systemz-zvector-constrained.c
@@ -79,8 +79,8 @@ void test_core(void) {
   vec_xstd2(vd, idx, ptrd);
 
   vd = vec_splat(vd, 0);
-  // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> poison, <2 x i32> zeroinitializer
-  // CHECK-ASM: vrepg
+  // CHECK: shufflevector <1 x double> %{{.*}}, <1 x double> poison, <2 x i32> zeroinitializer
+  // CHECK-ASM: vlrepg
   vd = vec_splat(vd, 1);
   // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> poison, <2 x i32> <i32 1, i32 1>
   // CHECK-ASM: vrepg
diff --git a/clang/test/CodeGen/SystemZ/builtins-systemz-zvector.c b/clang/test/CodeGen/SystemZ/builtins-systemz-zvector.c
index d5d15b4dea966..422c97a77511c 100644
--- a/clang/test/CodeGen/SystemZ/builtins-systemz-zvector.c
+++ b/clang/test/CodeGen/SystemZ/builtins-systemz-zvector.c
@@ -777,80 +777,80 @@ void test_core(void) {
   // CHECK: <2 x i64> splat (i64 -4503582447501313)
 
   vsc = vec_splat(vsc, 0);
-  // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> poison, <16 x i32> zeroinitializer
-  // CHECK-ASM: vrepb
+  // CHECK: shufflevector <1 x i8> %{{.*}}, <1 x i8> poison, <16 x i32> zeroinitializer
+  // CHECK-ASM: vlrepb
   vsc = vec_splat(vsc, 15);
   // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> poison, <16 x i32> <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
   // CHECK-ASM: vrepb
   vuc = vec_splat(vuc, 0);
-  // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> poison, <16 x i32> zeroinitializer
-  // CHECK-ASM: vrepb
+  // CHECK: store volatile <16 x i8> splat (i8 {{.*}}), ptr @vuc
+  // CHECK-ASM: vst
   vuc = vec_splat(vuc, 15);
   // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> poison, <16 x i32> <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
   // CHECK-ASM: vrepb
   vbc = vec_splat(vbc, 0);
-  // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> poison, <16 x i32> zeroinitializer
-  // CHECK-ASM: vrepb
+  // CHECK: shufflevector <1 x i8> %{{.*}}, <1 x i8> poison, <16 x i32> zeroinitializer
+  // CHECK-ASM: vlrepb
   vbc = vec_splat(vbc, 15);
   // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> poison, <16 x i32> <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
   // CHECK-ASM: vrepb
   vss = vec_splat(vss, 0);
-  // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <8 x i32> zeroinitializer
-  // CHECK-ASM: vreph
+  // CHECK: shufflevector <1 x i16> %{{.*}}, <1 x i16> poison, <8 x i32> zeroinitializer
+  // CHECK-ASM: vlreph
   vss = vec_splat(vss, 7);
   // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
   // CHECK-ASM: vreph
   vus = vec_splat(vus, 0);
-  // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <8 x i32> zeroinitializer
-  // CHECK-ASM: vreph
+  // CHECK: store volatile <8 x i16> splat (i16 {{.*}}), ptr @vus
+  // CHECK-ASM: vst
   vus = vec_splat(vus, 7);
   // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
   // CHECK-ASM: vreph
   vbs = vec_splat(vbs, 0);
-  // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <8 x i32> zeroinitializer
-  // CHECK-ASM: vreph
+  // CHECK: shufflevector <1 x i16> %{{.*}}, <1 x i16> poison, <8 x i32> zeroinitializer
+  // CHECK-ASM: vlreph
   vbs = vec_splat(vbs, 7);
   // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
   // CHECK-ASM: vreph
   vsi = vec_splat(vsi, 0);
-  // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> poison, <4 x i32> zeroinitializer
-  // CHECK-ASM: vrepf
+  // CHECK: shufflevector <1 x i32> %{{.*}}, <1 x i32> poison, <4 x i32> zeroinitializer
+  // CHECK-ASM: vlrepf
   vsi = vec_splat(vsi, 3);
   // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
   // CHECK-ASM: vrepf
   vui = vec_splat(vui, 0);
-  // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> poison, <4 x i32> zeroinitializer
-  // CHECK-ASM: vrepf
+  // CHECK: store volatile <4 x i32> splat (i32 {{.*}}), ptr @vui
+  // CHECK-ASM: vst
   vui = vec_splat(vui, 3);
   // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
   // CHECK-ASM: vrepf
   vbi = vec_splat(vbi, 0);
-  // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> poison, <4 x i32> zeroinitializer
-  // CHECK-ASM: vrepf
+  // CHECK: shufflevector <1 x i32> %{{.*}}, <1 x i32> poison, <4 x i32> zeroinitializer
+  // CHECK-ASM: vlrepf
   vbi = vec_splat(vbi, 3);
   // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
   // CHECK-ASM: vrepf
   vsl = vec_splat(vsl, 0);
-  // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> poison, <2 x i32> zeroinitializer
+  // CHECK: shufflevector <1 x i64> %{{.*}}, <1 x i64> poison, <2 x i32> zeroinitializer
   // CHECK-ASM: vrepg
   vsl = vec_splat(vsl, 1);
   // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> poison, <2 x i32> <i32 1, i32 1>
-  // CHECK-ASM: vrepg
+  // CHECK-ASM: vst
   vul = vec_splat(vul, 0);
-  // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> poison, <2 x i32> zeroinitializer
-  // CHECK-ASM: vrepg
+  // CHECK: store volatile <2 x i64> splat (i64 {{.*}}), ptr @vul
+  // CHECK-ASM: vst
   vul = vec_splat(vul, 1);
   // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> poison, <2 x i32> <i32 1, i32 1>
   // CHECK-ASM: vrepg
   vbl = vec_splat(vbl, 0);
-  // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> poison, <2 x i32> zeroinitializer
-  // CHECK-ASM: vrepg
+  // CHECK: shufflevector <1 x i64> %{{.*}}, <1 x i64> poison, <2 x i32> zeroinitializer
+  // CHECK-ASM: vlrepg
   vbl = vec_splat(vbl, 1);
   // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> poison, <2 x i32> <i32 1, i32 1>
   // CHECK-ASM: vrepg
   vd = vec_splat(vd, 0);
-  // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> poison, <2 x i32> zeroinitializer
-  // CHECK-ASM: vrepg
+  // CHECK: shufflevector <1 x double> %{{.*}}, <1 x double> poison, <2 x i32> zeroinitializer
+  // CHECK-ASM: vlrepg
   vd = vec_splat(vd, 1);
   // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> poison, <2 x i32> <i32 1, i32 1>
   // CHECK-ASM: vrepg
diff --git a/clang/test/CodeGen/SystemZ/builtins-systemz-zvector2-constrained.c b/clang/test/CodeGen/SystemZ/builtins-systemz-zvector2-constrained.c
index 25b3e0b68cd02..2b79df2a1886e 100644
--- a/clang/test/CodeGen/SystemZ/builtins-systemz-zvector2-constrained.c
+++ b/clang/test/CodeGen/SystemZ/builtins-systemz-zvector2-constrained.c
@@ -130,14 +130,14 @@ void test_core(void) {
   // CHECK-ASM: vst
 
   vf = vec_splat(vf, 0);
-  // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> poison, <4 x i32> zeroinitializer
-  // CHECK-ASM: vrepf
+  // CHECK: shufflevector <1 x float> %{{.*}}, <1 x float> poison, <4 x i32> zeroinitializer
+  // CHECK-ASM: vlrepf
   vf = vec_splat(vf, 1);
-  // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  // CHECK-ASM: vrepf
+  // CHECK: shufflevector <2 x float> %{{.*}}, <2 x float> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  // CHECK-ASM: vst
   vd = vec_splat(vd, 0);
-  // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> poison, <2 x i32> zeroinitializer
-  // CHECK-ASM: vrepg
+  // CHECK: shufflevector <1 x double> %{{.*}}, <1 x double> poison, <2 x i32> zeroinitializer
+  // CHECK-ASM: vlrepg
   vd = vec_splat(vd, 1);
   // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> poison, <2 x i32> <i32 1, i32 1>
   // CHECK-ASM: vrepg
diff --git a/clang/test/CodeGen/SystemZ/builtins-systemz-zvector2.c b/clang/test/CodeGen/SystemZ/builtins-systemz-zvector2.c
index c1ef178fcfaa9..1ccbe6df5f16d 100644
--- a/clang/test/CodeGen/SystemZ/builtins-systemz-zvector2.c
+++ b/clang/test/CodeGen/SystemZ/builtins-systemz-zvector2.c
@@ -254,14 +254,14 @@ void test_core(void) {
   // CHECK-ASM: vstrlr
 
   vf = vec_splat(vf, 0);
-  // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> poison, <4 x i32> zeroinitializer
-  // CHECK-ASM: vrepf
+  // CHECK: shufflevector <1 x float> %{{.*}}, <1 x float> poison, <4 x i32> zeroinitializer
+  // CHECK-ASM: vlrepf
   vf = vec_splat(vf, 1);
-  // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  // CHECK-ASM: vrepf
+  // CHECK: shufflevector <2 x float> %{{.*}}, <2 x float> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  // CHECK-ASM: vst
   vd = vec_splat(vd, 0);
-  // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> poison, <2 x i32> zeroinitializer
-  // CHECK-ASM: vrepg
+  // CHECK: shufflevector <1 x double> %{{.*}}, <1 x double> poison, <2 x i32> zeroinitializer
+  // CHECK-ASM: vlrepg
   vd = vec_splat(vd, 1);
   // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> poison, <2 x i32> <i32 1, i32 1>
   // CHECK-ASM: vrepg
diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
index cbdf99316e9e3..813935499f86e 100644
--- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
+++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -915,7 +915,7 @@ static bool foldPatternedLoads(Instruction &I, const DataLayout &DL) {
   return true;
 }
 
-// If `I` is a load instruction, used only by shufflevector instructions with 
+// If `I` is a load instruction, used only by shufflevector instructions with
 // poison values, attempt to shrink the load to only the lanes being used.
 static bool shrinkLoadsForBroadcast(Instruction &I) {
   auto *OldLoad = dyn_cast<LoadInst>(&I);
@@ -936,7 +936,7 @@ static bool shrinkLoadsForBroadcast(Instruction &I) {
   using IndexRange = std::pair<unsigned, unsigned>;
   auto GetIndexRangeInShuffles = [&]() -> std::optional<IndexRange> {
     auto OutputRange = IndexRange(VecTy->getNumElements(), 0u);
-    for (auto &Use: I.uses()) {
+    for (auto &Use : I.uses()) {
       // All uses must be ShuffleVector instructions.
       auto *Shuffle = dyn_cast<ShuffleVectorInst>(Use.getUser());
       if (!Shuffle)
@@ -953,7 +953,7 @@ static bool shrinkLoadsForBroadcast(Instruction &I) {
       auto *Op0Ty = cast<FixedVectorType>(Op0->getType());
       auto NumElems = Op0Ty->getNumElements();
 
-      for (unsigned Index: Mask) {
+      for (unsigned Index : Mask) {
         if (Index < NumElems) {
           OutputRange.first = std::min(Index, OutputRange.first);
           OutputRange.second = std::max(Index, OutputRange.second);
@@ -975,26 +975,25 @@ static bool shrinkLoadsForBroadcast(Instruction &I) {
       auto *ElemTy = VecTy->getElementType();
       auto *NewVecTy = FixedVectorType::get(ElemTy, NewSize);
       auto *NewLoad = cast<LoadInst>(
-        Builder.CreateLoad(NewVecTy, OldLoad->getPointerOperand()));
+          Builder.CreateLoad(NewVecTy, OldLoad->getPointerOperand()));
       NewLoad->copyMetadata(I);
 
       // Replace all users.
-      auto OldShuffles = SmallVector<ShuffleVectorInst*, 4u>{};
-      for (auto &Use: I.uses()) {
+      auto OldShuffles = SmallVector<ShuffleVectorInst *, 4u>{};
+      for (auto &Use : I.uses()) {
         auto *Shuffle = cast<ShuffleVectorInst>(Use.getUser());
-        
+
         Builder.SetInsertPoint(Shuffle);
         Builder.SetCurrentDebugLocation(Shuffle->getDebugLoc());
         auto *NewShuffle = Builder.CreateShuffleVector(
-          NewLoad, PoisonValue::get(NewVecTy), Shuffle->getShuffleMask()
-        );
+            NewLoad, PoisonValue::get(NewVecTy), Shuffle->getShuffleMask());
 
         Shuffle->replaceAllUsesWith(NewShuffle);
         OldShuffles.push_back(Shuffle);
       }
 
       // Erase old users.
-      for (auto *Shuffle: OldShuffles)
+      for (auto *Shuffle : OldShuffles)
         Shuffle->eraseFromParent();
 
       I.eraseFromParent();



More information about the cfe-commits mailing list