[llvm] InstSimplify: lookthru casts, binops in folding shuffles (PR #92668)

Ramkumar Ramachandra via llvm-commits llvm-commits at lists.llvm.org
Sat May 18 13:07:51 PDT 2024


https://github.com/artagnon updated https://github.com/llvm/llvm-project/pull/92668

>From 1ffc5fed15bc0f3232f94b4c6321c6025d450388 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <r at artagnon.com>
Date: Thu, 16 May 2024 15:01:34 +0100
Subject: [PATCH 01/10] InstSimplify: increase shufflevector test coverage

Add examples of patterns that can be simplified, but are currently not.
This patch serves as a pre-commit test for #48960.
---
 .../Transforms/InstSimplify/shufflevector.ll  | 108 +++++++++++++++++-
 1 file changed, 102 insertions(+), 6 deletions(-)

diff --git a/llvm/test/Transforms/InstSimplify/shufflevector.ll b/llvm/test/Transforms/InstSimplify/shufflevector.ll
index 460e90aa31d91..4ebd012c36653 100644
--- a/llvm/test/Transforms/InstSimplify/shufflevector.ll
+++ b/llvm/test/Transforms/InstSimplify/shufflevector.ll
@@ -249,13 +249,13 @@ define <8 x i64> @PR30630(<8 x i64> %x) {
 ;         ret <2 x float> zeroinitializer
 define <2 x float> @PR32872(<2 x float> %x) {
 ; CHECK-LABEL: @PR32872(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> zeroinitializer, <4 x i32> <i32 2, i32 2, i32 0, i32 1>
-; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x float> zeroinitializer, <4 x float> [[TMP1]], <2 x i32> <i32 4, i32 5>
-; CHECK-NEXT:    ret <2 x float> [[TMP4]]
+; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> zeroinitializer, <4 x i32> <i32 2, i32 2, i32 0, i32 1>
+; CHECK-NEXT:    [[SHUF2:%.*]] = shufflevector <4 x float> zeroinitializer, <4 x float> [[SHUF]], <2 x i32> <i32 4, i32 5>
+; CHECK-NEXT:    ret <2 x float> [[SHUF2]]
 ;
-  %tmp1 = shufflevector <2 x float> %x, <2 x float> zeroinitializer, <4 x i32> <i32 2, i32 2, i32 0, i32 1>
-  %tmp4 = shufflevector <4 x float> zeroinitializer, <4 x float> %tmp1, <2 x i32> <i32 4, i32 5>
-  ret <2 x float> %tmp4
+  %shuf = shufflevector <2 x float> %x, <2 x float> zeroinitializer, <4 x i32> <i32 2, i32 2, i32 0, i32 1>
+  %shuf2 = shufflevector <4 x float> zeroinitializer, <4 x float> %shuf, <2 x i32> <i32 4, i32 5>
+  ret <2 x float> %shuf2
 }
 
 define <5 x i8> @splat_inserted_constant(<4 x i8> %x) {
@@ -284,3 +284,99 @@ define <2 x i8> @splat_inserted_constant_not_canonical(<3 x i8> %x, <3 x i8> %y)
   %splat2 = shufflevector <3 x i8> %y, <3 x i8> %ins2, <2 x i32> <i32 undef, i32 5>
   ret <2 x i8> %splat2
 }
+
+define <4 x i32> @fold_identity(<4 x i32> %x) {
+; CHECK-LABEL: @fold_identity(
+; CHECK-NEXT:    ret <4 x i32> [[X:%.*]]
+;
+  %shuf = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  %revshuf = shufflevector <4 x i32> %shuf, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  ret <4 x i32> %revshuf
+}
+
+define <4 x i32> @not_fold_identity(<4 x i32> %x) {
+; CHECK-LABEL: @not_fold_identity(
+; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
+; CHECK-NEXT:    [[REVSHUF:%.*]] = shufflevector <4 x i32> [[SHUF]], <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+; CHECK-NEXT:    ret <4 x i32> [[REVSHUF]]
+;
+  %shuf = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
+  %revshuf = shufflevector <4 x i32> %shuf, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+  ret <4 x i32> %revshuf
+}
+
+define <4 x i32> @not_fold_identity2(<4 x i32> %x) {
+; CHECK-LABEL: @not_fold_identity2(
+; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 1, i32 0>
+; CHECK-NEXT:    [[REVSHUF:%.*]] = shufflevector <4 x i32> [[SHUF]], <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    ret <4 x i32> [[REVSHUF]]
+;
+  %shuf = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 1, i32 0>
+  %revshuf = shufflevector <4 x i32> %shuf, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  ret <4 x i32> %revshuf
+}
+
+define <4 x i64> @fold_lookthrough_cast(<4 x i32> %x) {
+; CHECK-LABEL: @fold_lookthrough_cast(
+; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[ZEXT:%.*]] = zext <4 x i32> [[SHUF]] to <4 x i64>
+; CHECK-NEXT:    [[REVSHUF:%.*]] = shufflevector <4 x i64> [[ZEXT]], <4 x i64> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    ret <4 x i64> [[REVSHUF]]
+;
+  %shuf = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  %zext = zext <4 x i32> %shuf to <4 x i64>
+  %revshuf = shufflevector <4 x i64> %zext, <4 x i64> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  ret <4 x i64> %revshuf
+}
+
+define <4 x i32> @fold_lookthrough_binop_same_operands(<4 x i32> %x) {
+; CHECK-LABEL: @fold_lookthrough_binop_same_operands(
+; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[ADD:%.*]] = add <4 x i32> [[SHUF]], [[SHUF]]
+; CHECK-NEXT:    [[REVSHUF:%.*]] = shufflevector <4 x i32> [[ADD]], <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    ret <4 x i32> [[REVSHUF]]
+;
+  %shuf = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  %add = add <4 x i32> %shuf, %shuf
+  %revshuf = shufflevector <4 x i32> %add, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  ret <4 x i32> %revshuf
+}
+
+define <4 x i32> @fold_lookthrough_binop_commutative(<4 x i32> %x, <4 x i32> %y) {
+; CHECK-LABEL: @fold_lookthrough_binop_commutative(
+; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[ADD:%.*]] = add <4 x i32> [[SHUF]], [[Y:%.*]]
+; CHECK-NEXT:    [[REVSHUF:%.*]] = shufflevector <4 x i32> [[ADD]], <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    ret <4 x i32> [[REVSHUF]]
+;
+  %shuf = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  %add = add <4 x i32> %shuf, %y
+  %revshuf = shufflevector <4 x i32> %add, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  ret <4 x i32> %revshuf
+}
+
+define <4 x i32> @fold_lookthrough_binop_shift(<4 x i32> %x, <4 x i32> %y) {
+; CHECK-LABEL: @fold_lookthrough_binop_shift(
+; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[SHL:%.*]] = shl <4 x i32> [[SHUF]], [[Y:%.*]]
+; CHECK-NEXT:    [[REVSHUF:%.*]] = shufflevector <4 x i32> [[SHL]], <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    ret <4 x i32> [[REVSHUF]]
+;
+  %shuf = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  %shl = shl <4 x i32> %shuf, %y
+  %revshuf = shufflevector <4 x i32> %shl, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  ret <4 x i32> %revshuf
+}
+
+define <4 x i1> @fold_lookthrough_binop_i1(<4 x i1> %x, <4 x i1> %y) {
+; CHECK-LABEL: @fold_lookthrough_binop_i1(
+; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x i1> [[X:%.*]], <4 x i1> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[SHL:%.*]] = xor <4 x i1> [[SHUF]], [[Y:%.*]]
+; CHECK-NEXT:    [[REVSHUF:%.*]] = shufflevector <4 x i1> [[SHL]], <4 x i1> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    ret <4 x i1> [[REVSHUF]]
+;
+  %shuf = shufflevector <4 x i1> %x, <4 x i1> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  %shl = xor <4 x i1> %shuf, %y
+  %revshuf = shufflevector <4 x i1> %shl, <4 x i1> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  ret <4 x i1> %revshuf
+}

>From 21e90c0785a2cbfd188716b2d938a0e800c7e8f8 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <r at artagnon.com>
Date: Thu, 16 May 2024 16:43:35 +0100
Subject: [PATCH 02/10] InstSimplify/test: use poison instead of undef

---
 .../Transforms/InstSimplify/shufflevector.ll  | 60 +++++++++----------
 1 file changed, 30 insertions(+), 30 deletions(-)

diff --git a/llvm/test/Transforms/InstSimplify/shufflevector.ll b/llvm/test/Transforms/InstSimplify/shufflevector.ll
index 4ebd012c36653..bb02e2b688d1b 100644
--- a/llvm/test/Transforms/InstSimplify/shufflevector.ll
+++ b/llvm/test/Transforms/InstSimplify/shufflevector.ll
@@ -289,94 +289,94 @@ define <4 x i32> @fold_identity(<4 x i32> %x) {
 ; CHECK-LABEL: @fold_identity(
 ; CHECK-NEXT:    ret <4 x i32> [[X:%.*]]
 ;
-  %shuf = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-  %revshuf = shufflevector <4 x i32> %shuf, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  %shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  %revshuf = shufflevector <4 x i32> %shuf, <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
   ret <4 x i32> %revshuf
 }
 
 define <4 x i32> @not_fold_identity(<4 x i32> %x) {
 ; CHECK-LABEL: @not_fold_identity(
-; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
-; CHECK-NEXT:    [[REVSHUF:%.*]] = shufflevector <4 x i32> [[SHUF]], <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
+; CHECK-NEXT:    [[REVSHUF:%.*]] = shufflevector <4 x i32> [[SHUF]], <4 x i32> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
 ; CHECK-NEXT:    ret <4 x i32> [[REVSHUF]]
 ;
-  %shuf = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
-  %revshuf = shufflevector <4 x i32> %shuf, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+  %shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
+  %revshuf = shufflevector <4 x i32> %shuf, <4 x i32> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
   ret <4 x i32> %revshuf
 }
 
 define <4 x i32> @not_fold_identity2(<4 x i32> %x) {
 ; CHECK-LABEL: @not_fold_identity2(
-; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 1, i32 0>
-; CHECK-NEXT:    [[REVSHUF:%.*]] = shufflevector <4 x i32> [[SHUF]], <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 1, i32 0>
+; CHECK-NEXT:    [[REVSHUF:%.*]] = shufflevector <4 x i32> [[SHUF]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 ; CHECK-NEXT:    ret <4 x i32> [[REVSHUF]]
 ;
-  %shuf = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 1, i32 0>
-  %revshuf = shufflevector <4 x i32> %shuf, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  %shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 1, i32 0>
+  %revshuf = shufflevector <4 x i32> %shuf, <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
   ret <4 x i32> %revshuf
 }
 
 define <4 x i64> @fold_lookthrough_cast(<4 x i32> %x) {
 ; CHECK-LABEL: @fold_lookthrough_cast(
-; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 ; CHECK-NEXT:    [[ZEXT:%.*]] = zext <4 x i32> [[SHUF]] to <4 x i64>
-; CHECK-NEXT:    [[REVSHUF:%.*]] = shufflevector <4 x i64> [[ZEXT]], <4 x i64> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[REVSHUF:%.*]] = shufflevector <4 x i64> [[ZEXT]], <4 x i64> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 ; CHECK-NEXT:    ret <4 x i64> [[REVSHUF]]
 ;
-  %shuf = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  %shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
   %zext = zext <4 x i32> %shuf to <4 x i64>
-  %revshuf = shufflevector <4 x i64> %zext, <4 x i64> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  %revshuf = shufflevector <4 x i64> %zext, <4 x i64> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
   ret <4 x i64> %revshuf
 }
 
 define <4 x i32> @fold_lookthrough_binop_same_operands(<4 x i32> %x) {
 ; CHECK-LABEL: @fold_lookthrough_binop_same_operands(
-; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 ; CHECK-NEXT:    [[ADD:%.*]] = add <4 x i32> [[SHUF]], [[SHUF]]
-; CHECK-NEXT:    [[REVSHUF:%.*]] = shufflevector <4 x i32> [[ADD]], <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[REVSHUF:%.*]] = shufflevector <4 x i32> [[ADD]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 ; CHECK-NEXT:    ret <4 x i32> [[REVSHUF]]
 ;
-  %shuf = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  %shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
   %add = add <4 x i32> %shuf, %shuf
-  %revshuf = shufflevector <4 x i32> %add, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  %revshuf = shufflevector <4 x i32> %add, <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
   ret <4 x i32> %revshuf
 }
 
 define <4 x i32> @fold_lookthrough_binop_commutative(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: @fold_lookthrough_binop_commutative(
-; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 ; CHECK-NEXT:    [[ADD:%.*]] = add <4 x i32> [[SHUF]], [[Y:%.*]]
-; CHECK-NEXT:    [[REVSHUF:%.*]] = shufflevector <4 x i32> [[ADD]], <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[REVSHUF:%.*]] = shufflevector <4 x i32> [[ADD]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 ; CHECK-NEXT:    ret <4 x i32> [[REVSHUF]]
 ;
-  %shuf = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  %shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
   %add = add <4 x i32> %shuf, %y
-  %revshuf = shufflevector <4 x i32> %add, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  %revshuf = shufflevector <4 x i32> %add, <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
   ret <4 x i32> %revshuf
 }
 
 define <4 x i32> @fold_lookthrough_binop_shift(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: @fold_lookthrough_binop_shift(
-; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 ; CHECK-NEXT:    [[SHL:%.*]] = shl <4 x i32> [[SHUF]], [[Y:%.*]]
-; CHECK-NEXT:    [[REVSHUF:%.*]] = shufflevector <4 x i32> [[SHL]], <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[REVSHUF:%.*]] = shufflevector <4 x i32> [[SHL]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 ; CHECK-NEXT:    ret <4 x i32> [[REVSHUF]]
 ;
-  %shuf = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  %shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
   %shl = shl <4 x i32> %shuf, %y
-  %revshuf = shufflevector <4 x i32> %shl, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  %revshuf = shufflevector <4 x i32> %shl, <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
   ret <4 x i32> %revshuf
 }
 
 define <4 x i1> @fold_lookthrough_binop_i1(<4 x i1> %x, <4 x i1> %y) {
 ; CHECK-LABEL: @fold_lookthrough_binop_i1(
-; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x i1> [[X:%.*]], <4 x i1> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x i1> [[X:%.*]], <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 ; CHECK-NEXT:    [[SHL:%.*]] = xor <4 x i1> [[SHUF]], [[Y:%.*]]
-; CHECK-NEXT:    [[REVSHUF:%.*]] = shufflevector <4 x i1> [[SHL]], <4 x i1> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[REVSHUF:%.*]] = shufflevector <4 x i1> [[SHL]], <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 ; CHECK-NEXT:    ret <4 x i1> [[REVSHUF]]
 ;
-  %shuf = shufflevector <4 x i1> %x, <4 x i1> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  %shuf = shufflevector <4 x i1> %x, <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
   %shl = xor <4 x i1> %shuf, %y
-  %revshuf = shufflevector <4 x i1> %shl, <4 x i1> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  %revshuf = shufflevector <4 x i1> %shl, <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
   ret <4 x i1> %revshuf
 }

>From d99b5b5e851c8762f40e4d0643f7dd3b85d3bd62 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <r at artagnon.com>
Date: Fri, 17 May 2024 19:17:38 +0100
Subject: [PATCH 03/10] InstSimplify/shufflevector: add more tests

---
 .../Transforms/InstSimplify/shufflevector.ll  | 95 +++++++++++++++++++
 1 file changed, 95 insertions(+)

diff --git a/llvm/test/Transforms/InstSimplify/shufflevector.ll b/llvm/test/Transforms/InstSimplify/shufflevector.ll
index bb02e2b688d1b..c174bcf3b77d6 100644
--- a/llvm/test/Transforms/InstSimplify/shufflevector.ll
+++ b/llvm/test/Transforms/InstSimplify/shufflevector.ll
@@ -329,6 +329,28 @@ define <4 x i64> @fold_lookthrough_cast(<4 x i32> %x) {
   ret <4 x i64> %revshuf
 }
 
+define <4 x i64> @not_fold_lookthrough_cast(<4 x i32> %x) {
+; CHECK-LABEL: @not_fold_lookthrough_cast(
+; CHECK-NEXT:    [[ZEXT:%.*]] = zext <4 x i32> [[X:%.*]] to <4 x i64>
+; CHECK-NEXT:    [[REVSHUF:%.*]] = shufflevector <4 x i64> [[ZEXT]], <4 x i64> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    ret <4 x i64> [[REVSHUF]]
+;
+  %zext = zext <4 x i32> %x to <4 x i64>
+  %revshuf = shufflevector <4 x i64> %zext, <4 x i64> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  ret <4 x i64> %revshuf
+}
+
+define i32 @not_fold_lookthrough_bitcast(<4 x i8> %x) {
+; CHECK-LABEL: @not_fold_lookthrough_bitcast(
+; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[BITCAST:%.*]] = bitcast <4 x i8> [[SHUF]] to i32
+; CHECK-NEXT:    ret i32 [[BITCAST]]
+;
+  %shuf = shufflevector <4 x i8> %x, <4 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  %bitcast = bitcast <4 x i8> %shuf to i32
+  ret i32 %bitcast
+}
+
 define <4 x i32> @fold_lookthrough_binop_same_operands(<4 x i32> %x) {
 ; CHECK-LABEL: @fold_lookthrough_binop_same_operands(
 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
@@ -380,3 +402,76 @@ define <4 x i1> @fold_lookthrough_binop_i1(<4 x i1> %x, <4 x i1> %y) {
   %revshuf = shufflevector <4 x i1> %shl, <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
   ret <4 x i1> %revshuf
 }
+
+define <4 x i64> @fold_lookthrough_cast_chain(<4 x i16> %x) {
+; CHECK-LABEL: @fold_lookthrough_cast_chain(
+; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x i16> [[X:%.*]], <4 x i16> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[ZEXT:%.*]] = zext <4 x i16> [[SHUF]] to <4 x i32>
+; CHECK-NEXT:    [[SEXT:%.*]] = sext <4 x i32> [[ZEXT]] to <4 x i64>
+; CHECK-NEXT:    [[REVSHUF:%.*]] = shufflevector <4 x i64> [[SEXT]], <4 x i64> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    ret <4 x i64> [[REVSHUF]]
+;
+  %shuf = shufflevector <4 x i16> %x, <4 x i16> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  %zext = zext <4 x i16> %shuf to <4 x i32>
+  %sext = sext <4 x i32> %zext to <4 x i64>
+  %revshuf = shufflevector <4 x i64> %sext, <4 x i64> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  ret <4 x i64> %revshuf
+}
+
+define <4 x i32> @fold_lookthrough_binop_chain(<4 x i32> %x) {
+; CHECK-LABEL: @fold_lookthrough_binop_chain(
+; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[ADD:%.*]] = add <4 x i32> [[SHUF]], [[SHUF]]
+; CHECK-NEXT:    [[ADD2:%.*]] = add <4 x i32> [[ADD]], [[ADD]]
+; CHECK-NEXT:    [[REVSHUF:%.*]] = shufflevector <4 x i32> [[ADD2]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    ret <4 x i32> [[REVSHUF]]
+;
+  %shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  %add = add <4 x i32> %shuf, %shuf
+  %add2 = add <4 x i32> %add, %add
+  %revshuf = shufflevector <4 x i32> %add2, <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  ret <4 x i32> %revshuf
+}
+
+define <4 x i64> @fold_lookthrough_cast_binop_chain(<4 x i32> %x) {
+; CHECK-LABEL: @fold_lookthrough_cast_binop_chain(
+; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[ZEXT:%.*]] = zext <4 x i32> [[SHUF]] to <4 x i64>
+; CHECK-NEXT:    [[ADD:%.*]] = add <4 x i64> [[ZEXT]], [[ZEXT]]
+; CHECK-NEXT:    [[REVSHUF:%.*]] = shufflevector <4 x i64> [[ADD]], <4 x i64> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    ret <4 x i64> [[REVSHUF]]
+;
+  %shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  %zext = zext <4 x i32> %shuf to <4 x i64>
+  %add = add <4 x i64> %zext, %zext
+  %revshuf = shufflevector <4 x i64> %add, <4 x i64> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  ret <4 x i64> %revshuf
+}
+
+define <4 x i64> @not_fold_cast_mismatched_types(<4 x i32> %x) {
+; CHECK-LABEL: @not_fold_cast_mismatched_types(
+; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT:    [[ZEXT:%.*]] = zext <2 x i32> [[SHUF]] to <2 x i64>
+; CHECK-NEXT:    [[EXTSHUF:%.*]] = shufflevector <2 x i64> [[ZEXT]], <2 x i64> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
+; CHECK-NEXT:    ret <4 x i64> [[EXTSHUF]]
+;
+  %shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <2 x i32> <i32 0, i32 2>
+  %zext = zext <2 x i32> %shuf to <2 x i64>
+  %extshuf = shufflevector <2 x i64> %zext, <2 x i64> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
+  ret <4 x i64> %extshuf
+}
+
+define <4 x float> @not_fold_binop_mismatched_types(<4 x float> %x, <4 x float> %y) {
+; CHECK-LABEL: @not_fold_binop_mismatched_types(
+; CHECK-NEXT:    [[SHUF_X:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> poison, <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT:    [[SHUF_Y:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> poison, <2 x i32> <i32 1, i32 3>
+; CHECK-NEXT:    [[FADD:%.*]] = fadd fast <2 x float> [[SHUF_X]], [[SHUF_Y]]
+; CHECK-NEXT:    [[EXTSHUF:%.*]] = shufflevector <2 x float> [[FADD]], <2 x float> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
+; CHECK-NEXT:    ret <4 x float> [[EXTSHUF]]
+;
+  %shuf.x = shufflevector <4 x float> %x, <4 x float> poison, <2 x i32> <i32 0, i32 2>
+  %shuf.y = shufflevector <4 x float> %y, <4 x float> poison, <2 x i32> <i32 1, i32 3>
+  %fadd = fadd fast <2 x float> %shuf.x, %shuf.y
+  %extshuf = shufflevector <2 x float> %fadd, <2 x float> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
+  ret <4 x float> %extshuf
+}

>From bc789251aeeac220180272af627c1d5b1ccb063a Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <r at artagnon.com>
Date: Sat, 18 May 2024 10:09:48 +0100
Subject: [PATCH 04/10] InstSimplify/shufflevector: remove redundant tests

---
 .../Transforms/InstSimplify/shufflevector.ll  | 30 ++-----------------
 1 file changed, 2 insertions(+), 28 deletions(-)

diff --git a/llvm/test/Transforms/InstSimplify/shufflevector.ll b/llvm/test/Transforms/InstSimplify/shufflevector.ll
index c174bcf3b77d6..da46262f97072 100644
--- a/llvm/test/Transforms/InstSimplify/shufflevector.ll
+++ b/llvm/test/Transforms/InstSimplify/shufflevector.ll
@@ -364,8 +364,8 @@ define <4 x i32> @fold_lookthrough_binop_same_operands(<4 x i32> %x) {
   ret <4 x i32> %revshuf
 }
 
-define <4 x i32> @fold_lookthrough_binop_commutative(<4 x i32> %x, <4 x i32> %y) {
-; CHECK-LABEL: @fold_lookthrough_binop_commutative(
+define <4 x i32> @fold_lookthrough_binop_different_operands(<4 x i32> %x, <4 x i32> %y) {
+; CHECK-LABEL: @fold_lookthrough_binop_different_operands(
 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 ; CHECK-NEXT:    [[ADD:%.*]] = add <4 x i32> [[SHUF]], [[Y:%.*]]
 ; CHECK-NEXT:    [[REVSHUF:%.*]] = shufflevector <4 x i32> [[ADD]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
@@ -377,32 +377,6 @@ define <4 x i32> @fold_lookthrough_binop_commutative(<4 x i32> %x, <4 x i32> %y)
   ret <4 x i32> %revshuf
 }
 
-define <4 x i32> @fold_lookthrough_binop_shift(<4 x i32> %x, <4 x i32> %y) {
-; CHECK-LABEL: @fold_lookthrough_binop_shift(
-; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[SHL:%.*]] = shl <4 x i32> [[SHUF]], [[Y:%.*]]
-; CHECK-NEXT:    [[REVSHUF:%.*]] = shufflevector <4 x i32> [[SHL]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    ret <4 x i32> [[REVSHUF]]
-;
-  %shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-  %shl = shl <4 x i32> %shuf, %y
-  %revshuf = shufflevector <4 x i32> %shl, <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-  ret <4 x i32> %revshuf
-}
-
-define <4 x i1> @fold_lookthrough_binop_i1(<4 x i1> %x, <4 x i1> %y) {
-; CHECK-LABEL: @fold_lookthrough_binop_i1(
-; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x i1> [[X:%.*]], <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[SHL:%.*]] = xor <4 x i1> [[SHUF]], [[Y:%.*]]
-; CHECK-NEXT:    [[REVSHUF:%.*]] = shufflevector <4 x i1> [[SHL]], <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    ret <4 x i1> [[REVSHUF]]
-;
-  %shuf = shufflevector <4 x i1> %x, <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-  %shl = xor <4 x i1> %shuf, %y
-  %revshuf = shufflevector <4 x i1> %shl, <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-  ret <4 x i1> %revshuf
-}
-
 define <4 x i64> @fold_lookthrough_cast_chain(<4 x i16> %x) {
 ; CHECK-LABEL: @fold_lookthrough_cast_chain(
 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x i16> [[X:%.*]], <4 x i16> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>

>From 36d7f84d9c3d640d320215db9af8b6e7ea58f872 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <r at artagnon.com>
Date: Sat, 18 May 2024 11:48:24 +0100
Subject: [PATCH 05/10] InstSimplify/shufflevector: add multiuse test

---
 .../test/Transforms/InstSimplify/shufflevector.ll | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/llvm/test/Transforms/InstSimplify/shufflevector.ll b/llvm/test/Transforms/InstSimplify/shufflevector.ll
index da46262f97072..16bb8bec1b7ca 100644
--- a/llvm/test/Transforms/InstSimplify/shufflevector.ll
+++ b/llvm/test/Transforms/InstSimplify/shufflevector.ll
@@ -377,6 +377,21 @@ define <4 x i32> @fold_lookthrough_binop_different_operands(<4 x i32> %x, <4 x i
   ret <4 x i32> %revshuf
 }
 
+define <4 x i32> @fold_lookthrough_binop_multiuse(<4 x i32> %x) {
+; CHECK-LABEL: @fold_lookthrough_binop_multiuse(
+; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[ADD:%.*]] = add <4 x i32> [[SHUF]], [[SHUF]]
+; CHECK-NEXT:    [[REVSHUF:%.*]] = shufflevector <4 x i32> [[ADD]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[ADD2:%.*]] = add <4 x i32> [[SHUF]], [[REVSHUF]]
+; CHECK-NEXT:    ret <4 x i32> [[ADD2]]
+;
+  %shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  %add = add <4 x i32> %shuf, %shuf
+  %revshuf = shufflevector <4 x i32> %add, <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  %add2 = add <4 x i32> %shuf, %revshuf
+  ret <4 x i32> %add2
+}
+
 define <4 x i64> @fold_lookthrough_cast_chain(<4 x i16> %x) {
 ; CHECK-LABEL: @fold_lookthrough_cast_chain(
 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x i16> [[X:%.*]], <4 x i16> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>

>From 2fecbeb6f39815db95c6c890b453ab51e025096c Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <r at artagnon.com>
Date: Sat, 18 May 2024 14:27:27 +0100
Subject: [PATCH 06/10] InstSimplify/shufflevector: add negative bitcast test

---
 llvm/test/Transforms/InstSimplify/shufflevector.ll | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/llvm/test/Transforms/InstSimplify/shufflevector.ll b/llvm/test/Transforms/InstSimplify/shufflevector.ll
index 16bb8bec1b7ca..1eae1fbd18206 100644
--- a/llvm/test/Transforms/InstSimplify/shufflevector.ll
+++ b/llvm/test/Transforms/InstSimplify/shufflevector.ll
@@ -351,6 +351,17 @@ define i32 @not_fold_lookthrough_bitcast(<4 x i8> %x) {
   ret i32 %bitcast
 }
 
+define <8 x i16> @not_fold_lookthrough_bitcast2(<4 x i32> %x, <8 x i16> %y) {
+; CHECK-LABEL: @not_fold_lookthrough_bitcast2(
+; CHECK-NEXT:    [[CAST:%.*]] = bitcast <4 x i32> [[X:%.*]] to <8 x i16>
+; CHECK-NEXT:    [[OUT:%.*]] = shufflevector <8 x i16> [[Y:%.*]], <8 x i16> [[CAST]], <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7>
+; CHECK-NEXT:    ret <8 x i16> [[OUT]]
+;
+  %cast = bitcast <4 x i32> %x to <8 x i16>
+  %out = shufflevector <8 x i16> %y, <8 x i16> %cast, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7>
+  ret <8 x i16> %out
+}
+
 define <4 x i32> @fold_lookthrough_binop_same_operands(<4 x i32> %x) {
 ; CHECK-LABEL: @fold_lookthrough_binop_same_operands(
 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>

>From cedcc81670feaeac25eaa03fa900191083fde6e0 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <r at artagnon.com>
Date: Sat, 18 May 2024 15:17:53 +0100
Subject: [PATCH 07/10] InstSimplify/shufflevector: add negative cast test

---
 llvm/test/Transforms/InstSimplify/shufflevector.ll | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/llvm/test/Transforms/InstSimplify/shufflevector.ll b/llvm/test/Transforms/InstSimplify/shufflevector.ll
index 1eae1fbd18206..6940c999c0451 100644
--- a/llvm/test/Transforms/InstSimplify/shufflevector.ll
+++ b/llvm/test/Transforms/InstSimplify/shufflevector.ll
@@ -340,6 +340,17 @@ define <4 x i64> @not_fold_lookthrough_cast(<4 x i32> %x) {
   ret <4 x i64> %revshuf
 }
 
+define <4 x i64> @not_fold_lookthrough_cast2(<4 x i32> %x) {
+; CHECK-LABEL: @not_fold_lookthrough_cast2(
+; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[ZEXT:%.*]] = zext <4 x i32> [[SHUF]] to <4 x i64>
+; CHECK-NEXT:    ret <4 x i64> [[ZEXT]]
+;
+  %shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  %zext = zext <4 x i32> %shuf to <4 x i64>
+  ret <4 x i64> %zext
+}
+
 define i32 @not_fold_lookthrough_bitcast(<4 x i8> %x) {
 ; CHECK-LABEL: @not_fold_lookthrough_bitcast(
 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>

>From c1e7335566c7fba7c18ca637d7d378c6755011d2 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <r at artagnon.com>
Date: Sat, 18 May 2024 18:40:10 +0100
Subject: [PATCH 08/10] InstSimplify/shufflevector: add identity tests

---
 .../Transforms/InstSimplify/shufflevector.ll  | 22 +++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/llvm/test/Transforms/InstSimplify/shufflevector.ll b/llvm/test/Transforms/InstSimplify/shufflevector.ll
index 6940c999c0451..64087194b0d17 100644
--- a/llvm/test/Transforms/InstSimplify/shufflevector.ll
+++ b/llvm/test/Transforms/InstSimplify/shufflevector.ll
@@ -294,6 +294,28 @@ define <4 x i32> @fold_identity(<4 x i32> %x) {
   ret <4 x i32> %revshuf
 }
 
+define <4 x i32> @fold_identity2(<4 x i32> %x) {
+; CHECK-LABEL: @fold_identity2(
+; CHECK-NEXT:    [[SHL:%.*]] = shl <4 x i32> [[X:%.*]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    ret <4 x i32> [[SHL]]
+;
+  %shl = shl <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1>
+  %shuf = shufflevector <4 x i32> %shl, <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  %revshuf = shufflevector <4 x i32> %shuf, <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  ret <4 x i32> %revshuf
+}
+
+define <4 x i32> @fold_identity3(<4 x i32> %x) {
+; CHECK-LABEL: @fold_identity3(
+; CHECK-NEXT:    [[SHL:%.*]] = shl <4 x i32> [[X:%.*]], [[X]]
+; CHECK-NEXT:    ret <4 x i32> [[SHL]]
+;
+  %shl = shl <4 x i32> %x, %x
+  %shuf = shufflevector <4 x i32> %shl, <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  %revshuf = shufflevector <4 x i32> %shuf, <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  ret <4 x i32> %revshuf
+}
+
 define <4 x i32> @not_fold_identity(<4 x i32> %x) {
 ; CHECK-LABEL: @not_fold_identity(
 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 0, i32 1>

>From a13fb298c2f82f881dc60369e23c95efe1020808 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <r at artagnon.com>
Date: Fri, 17 May 2024 18:37:41 +0100
Subject: [PATCH 09/10] InstSimplify: lookthru casts, binops in folding
 shuffles

foldIdentityShuffles() currently suffers from the deficiency that it
cannot look through casts or binary operators, and can only fold
sequences of shuffles without any intervening instructions. Fix this
deficiency by looking through chains of casts that preserve the number
of elements in the vector, and chains of binary operators with identical
operands. The way we do this is by saving the last instruction in the
reduction chain as the RootVec, and rewriting the operands of the first
instruction in the chain.

This patch also has a requirement on increasing the MaxRecursionLimit,
and the AMDGPU and LoopVectorize test changes are due to that.

Future work could simplify the case of binary operands with different
operands, and the multi-use case.

Alive2 proofs follow.

  @fold_lookthrough_cast:
    https://alive2.llvm.org/ce/z/YFKQGX
  @fold_lookthrough_binop_same_operands:
    https://alive2.llvm.org/ce/z/gf3WgB
  @fold_lookthrough_cast_chain:
    https://alive2.llvm.org/ce/z/MT9ApH
  @fold_lookthrough_binop_chain:
    https://alive2.llvm.org/ce/z/WV8HRn
  @fold_lookthrough_cast_binop_chain:
    https://alive2.llvm.org/ce/z/CXj-hh
---
 llvm/lib/Analysis/InstructionSimplify.cpp     | 109 ++++++++++++++----
 .../CodeGen/AMDGPU/select-constant-cttz.ll    |  21 +---
 .../Transforms/InstSimplify/shufflevector.ll  |  25 ++--
 .../LoopVectorize/reduction-inloop.ll         |  57 +++++----
 .../LoopVectorize/scev-predicate-reasoning.ll |  35 +++---
 5 files changed, 141 insertions(+), 106 deletions(-)

diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index 37a7259a5cd02..5f90ce8a02631 100644
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -48,7 +48,7 @@ using namespace llvm::PatternMatch;
 
 #define DEBUG_TYPE "instsimplify"
 
-enum { RecursionLimit = 3 };
+static unsigned RecursionLimit = 5;
 
 STATISTIC(NumExpand, "Number of expansions");
 STATISTIC(NumReassoc, "Number of reassociations");
@@ -5315,55 +5315,104 @@ Value *llvm::simplifyCastInst(unsigned CastOpc, Value *Op, Type *Ty,
   return ::simplifyCastInst(CastOpc, Op, Ty, Q, RecursionLimit);
 }
 
+using ReplacementTy = std::optional<std::pair<Value *, Value *>>;
+
 /// For the given destination element of a shuffle, peek through shuffles to
 /// match a root vector source operand that contains that element in the same
 /// vector lane (ie, the same mask index), so we can eliminate the shuffle(s).
-static Value *foldIdentityShuffles(int DestElt, Value *Op0, Value *Op1,
-                                   int MaskVal, Value *RootVec,
-                                   unsigned MaxRecurse) {
+static std::pair<Value *, ReplacementTy>
+foldIdentityShuffles(int DestElt, Value *Op0, Value *Op1, int MaskVal,
+                     Value *RootVec, unsigned MaxRecurse) {
   if (!MaxRecurse--)
-    return nullptr;
+    return {nullptr, std::nullopt};
 
   // Bail out if any mask value is undefined. That kind of shuffle may be
   // simplified further based on demanded bits or other folds.
   if (MaskVal == -1)
-    return nullptr;
+    return {nullptr, std::nullopt};
 
   // The mask value chooses which source operand we need to look at next.
-  int InVecNumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
+  unsigned InVecNumElts =
+      cast<FixedVectorType>(Op0->getType())->getNumElements();
   int RootElt = MaskVal;
   Value *SourceOp = Op0;
-  if (MaskVal >= InVecNumElts) {
+  if (MaskVal > -1 && static_cast<unsigned>(MaskVal) >= InVecNumElts) {
     RootElt = MaskVal - InVecNumElts;
     SourceOp = Op1;
   }
 
+  // The next RootVec preseves an existing RootVec for casts and binops, so that
+  // the final RootVec is the last cast or binop in the chain.
+  Value *NextRootVec = RootVec ? RootVec : SourceOp;
+
+  // Look through a cast instruction that preserves number of elements in the
+  // vector. Set the RootVec to the cast, the SourceOp to the operand and
+  // recurse. If, in a later stack frame, an appropriate ShuffleVector is
+  // matched, the example will reduce.
+  if (auto *SourceCast = dyn_cast<CastInst>(SourceOp))
+    if (auto *CastTy = dyn_cast<FixedVectorType>(SourceCast->getSrcTy()))
+      if (CastTy->getNumElements() == InVecNumElts)
+        return foldIdentityShuffles(DestElt, SourceCast->getOperand(0), Op1,
+                                    MaskVal, NextRootVec, MaxRecurse);
+
+  // Look through a binary operator, with two identical operands. Set the
+  // RootVec to the binop, the SourceOp to the operand, and recurse. If, in a
+  // later stack frame, an appropriate ShuffleVector is matched, the example
+  // will reduce.
+  Value *BinOpLHS = nullptr, *BinOpRHS = nullptr;
+  if (match(SourceOp, m_BinOp(m_Value(BinOpLHS), m_Value(BinOpRHS))) &&
+      BinOpLHS == BinOpRHS)
+    return foldIdentityShuffles(DestElt, BinOpLHS, Op1, MaskVal, NextRootVec,
+                                MaxRecurse);
+
   // If the source operand is a shuffle itself, look through it to find the
   // matching root vector.
   if (auto *SourceShuf = dyn_cast<ShuffleVectorInst>(SourceOp)) {
+    // Here, we use RootVec, because there is no requirement for finding the
+    // last shuffle in a chain. In fact, the zeroth operand of the first shuffle
+    // in the chain will be used as the RootVec for folding.
     return foldIdentityShuffles(
         DestElt, SourceShuf->getOperand(0), SourceShuf->getOperand(1),
         SourceShuf->getMaskValue(RootElt), RootVec, MaxRecurse);
   }
 
-  // TODO: Look through bitcasts? What if the bitcast changes the vector element
-  // size?
-
-  // The source operand is not a shuffle. Initialize the root vector value for
-  // this shuffle if that has not been done yet.
-  if (!RootVec)
-    RootVec = SourceOp;
-
-  // Give up as soon as a source operand does not match the existing root value.
-  if (RootVec != SourceOp)
-    return nullptr;
-
   // The element must be coming from the same lane in the source vector
   // (although it may have crossed lanes in intermediate shuffles).
   if (RootElt != DestElt)
-    return nullptr;
+    return {nullptr, std::nullopt};
+
+  // If NextRootVec is equal to SourceOp, no replacements are required. Just
+  // return NextRootVec as the leaf value of the recursion.
+  if (NextRootVec == SourceOp)
+    return {NextRootVec, std::nullopt};
+
+  // We again have to match the condition for a vector-num-element-preserving
+  // cast or binop with equal operands, as we are not assured of the recursion
+  // happening from the call after the previous match. The RootVec was set to
+  // the last cast or binop in the chain.
+  if ((isa<CastInst>(RootVec) &&
+       isa<FixedVectorType>(cast<CastInst>(RootVec)->getSrcTy()) &&
+       cast<FixedVectorType>(cast<CastInst>(RootVec)->getSrcTy())
+               ->getNumElements() == InVecNumElts) ||
+      (match(RootVec, m_BinOp(m_Value(BinOpLHS), m_Value(BinOpRHS))) &&
+       BinOpLHS == BinOpRHS))
+    // ReplacementSrc should be the User of the the first cast or binop in the
+    // chain. SourceOp is the reduced value, which should replace
+    // ReplacementSrc.
+    if (auto *ReplacementSrc = SourceOp->getUniqueUndroppableUser()) {
+      // If ReplacementSrc equals RootVec, it means that we didn't recurse after
+      // matching a cast or binop, and we should terminate the recursion and
+      // return the leaf value here.
+      if (ReplacementSrc == RootVec)
+        return {RootVec, std::nullopt};
+      // The User of ReplacementSrc is the first cast or binop in the chain.
+      // There could be other Users, but we constrain it to a unique User, since
+      // we perform RAUW later.
+      if (ReplacementSrc->hasOneUser())
+        return {RootVec, std::make_pair(ReplacementSrc, SourceOp)};
+    }
 
-  return RootVec;
+  return {nullptr, std::nullopt};
 }
 
 static Value *simplifyShuffleVectorInst(Value *Op0, Value *Op1,
@@ -5468,16 +5517,26 @@ static Value *simplifyShuffleVectorInst(Value *Op0, Value *Op1,
   // shuffle. This handles simple identity shuffles as well as chains of
   // shuffles that may widen/narrow and/or move elements across lanes and back.
   Value *RootVec = nullptr;
-  for (unsigned i = 0; i != MaskNumElts; ++i) {
+  ReplacementTy ReplaceInRootVec;
+  for (unsigned Idx = 0; Idx != MaskNumElts; ++Idx) {
     // Note that recursion is limited for each vector element, so if any element
     // exceeds the limit, this will fail to simplify.
-    RootVec =
-        foldIdentityShuffles(i, Op0, Op1, Indices[i], RootVec, MaxRecurse);
+    std::pair<Value *, ReplacementTy> Res =
+        foldIdentityShuffles(Idx, Op0, Op1, Indices[Idx], RootVec, MaxRecurse);
+    RootVec = Res.first;
+    ReplaceInRootVec = Res.second;
 
     // We can't replace a widening/narrowing shuffle with one of its operands.
     if (!RootVec || RootVec->getType() != RetTy)
       return nullptr;
   }
+
+  // Apply any replacements in RootVec that are applicable in case we're looking
+  // through a cast or binop.
+  if (ReplaceInRootVec) {
+    auto [ReplacementSrc, ReplacementDst] = *ReplaceInRootVec;
+    ReplacementSrc->replaceAllUsesWith(ReplacementDst);
+  }
   return RootVec;
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/select-constant-cttz.ll b/llvm/test/CodeGen/AMDGPU/select-constant-cttz.ll
index d807c3909e656..5f36531badbf1 100644
--- a/llvm/test/CodeGen/AMDGPU/select-constant-cttz.ll
+++ b/llvm/test/CodeGen/AMDGPU/select-constant-cttz.ll
@@ -6,26 +6,11 @@ declare i32 @llvm.amdgcn.sffbh.i32(i32) nounwind readnone speculatable
 define amdgpu_kernel void @select_constant_cttz(ptr addrspace(1) noalias %out, ptr addrspace(1) nocapture readonly %arrayidx) nounwind {
 ; GCN-LABEL: select_constant_cttz:
 ; GCN:       ; %bb.0:
-; GCN-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
-; GCN-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN-NEXT:    s_load_dword s2, s[2:3], 0x0
+; GCN-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
 ; GCN-NEXT:    s_mov_b32 s3, 0xf000
-; GCN-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN-NEXT:    s_lshr_b32 s4, 1, s2
-; GCN-NEXT:    s_cmp_lg_u32 s2, 0
-; GCN-NEXT:    s_ff1_i32_b32 s2, s4
-; GCN-NEXT:    s_cselect_b64 s[4:5], -1, 0
-; GCN-NEXT:    s_and_b64 s[6:7], s[4:5], exec
-; GCN-NEXT:    s_cselect_b32 s2, -1, s2
-; GCN-NEXT:    s_flbit_i32 s6, s2
-; GCN-NEXT:    s_sub_i32 s8, 31, s6
-; GCN-NEXT:    s_cmp_eq_u32 s2, 0
-; GCN-NEXT:    s_cselect_b64 s[6:7], -1, 0
-; GCN-NEXT:    s_or_b64 s[4:5], s[4:5], s[6:7]
-; GCN-NEXT:    s_and_b64 s[4:5], s[4:5], exec
-; GCN-NEXT:    s_cselect_b32 s4, -1, s8
 ; GCN-NEXT:    s_mov_b32 s2, -1
-; GCN-NEXT:    v_mov_b32_e32 v0, s4
+; GCN-NEXT:    v_mov_b32_e32 v0, -1
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
 ; GCN-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GCN-NEXT:    s_endpgm
   %v    = load i32, ptr addrspace(1) %arrayidx, align 4
diff --git a/llvm/test/Transforms/InstSimplify/shufflevector.ll b/llvm/test/Transforms/InstSimplify/shufflevector.ll
index 64087194b0d17..e17e69a617651 100644
--- a/llvm/test/Transforms/InstSimplify/shufflevector.ll
+++ b/llvm/test/Transforms/InstSimplify/shufflevector.ll
@@ -341,9 +341,8 @@ define <4 x i32> @not_fold_identity2(<4 x i32> %x) {
 define <4 x i64> @fold_lookthrough_cast(<4 x i32> %x) {
 ; CHECK-LABEL: @fold_lookthrough_cast(
 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[ZEXT:%.*]] = zext <4 x i32> [[SHUF]] to <4 x i64>
-; CHECK-NEXT:    [[REVSHUF:%.*]] = shufflevector <4 x i64> [[ZEXT]], <4 x i64> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    ret <4 x i64> [[REVSHUF]]
+; CHECK-NEXT:    [[ZEXT:%.*]] = zext <4 x i32> [[X]] to <4 x i64>
+; CHECK-NEXT:    ret <4 x i64> [[ZEXT]]
 ;
   %shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
   %zext = zext <4 x i32> %shuf to <4 x i64>
@@ -398,9 +397,8 @@ define <8 x i16> @not_fold_lookthrough_bitcast2(<4 x i32> %x, <8 x i16> %y) {
 define <4 x i32> @fold_lookthrough_binop_same_operands(<4 x i32> %x) {
 ; CHECK-LABEL: @fold_lookthrough_binop_same_operands(
 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[ADD:%.*]] = add <4 x i32> [[SHUF]], [[SHUF]]
-; CHECK-NEXT:    [[REVSHUF:%.*]] = shufflevector <4 x i32> [[ADD]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    ret <4 x i32> [[REVSHUF]]
+; CHECK-NEXT:    [[ADD:%.*]] = add <4 x i32> [[X]], [[X]]
+; CHECK-NEXT:    ret <4 x i32> [[ADD]]
 ;
   %shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
   %add = add <4 x i32> %shuf, %shuf
@@ -439,10 +437,9 @@ define <4 x i32> @fold_lookthrough_binop_multiuse(<4 x i32> %x) {
 define <4 x i64> @fold_lookthrough_cast_chain(<4 x i16> %x) {
 ; CHECK-LABEL: @fold_lookthrough_cast_chain(
 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x i16> [[X:%.*]], <4 x i16> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[ZEXT:%.*]] = zext <4 x i16> [[SHUF]] to <4 x i32>
+; CHECK-NEXT:    [[ZEXT:%.*]] = zext <4 x i16> [[X]] to <4 x i32>
 ; CHECK-NEXT:    [[SEXT:%.*]] = sext <4 x i32> [[ZEXT]] to <4 x i64>
-; CHECK-NEXT:    [[REVSHUF:%.*]] = shufflevector <4 x i64> [[SEXT]], <4 x i64> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    ret <4 x i64> [[REVSHUF]]
+; CHECK-NEXT:    ret <4 x i64> [[SEXT]]
 ;
   %shuf = shufflevector <4 x i16> %x, <4 x i16> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
   %zext = zext <4 x i16> %shuf to <4 x i32>
@@ -454,10 +451,9 @@ define <4 x i64> @fold_lookthrough_cast_chain(<4 x i16> %x) {
 define <4 x i32> @fold_lookthrough_binop_chain(<4 x i32> %x) {
 ; CHECK-LABEL: @fold_lookthrough_binop_chain(
 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[ADD:%.*]] = add <4 x i32> [[SHUF]], [[SHUF]]
+; CHECK-NEXT:    [[ADD:%.*]] = add <4 x i32> [[X]], [[X]]
 ; CHECK-NEXT:    [[ADD2:%.*]] = add <4 x i32> [[ADD]], [[ADD]]
-; CHECK-NEXT:    [[REVSHUF:%.*]] = shufflevector <4 x i32> [[ADD2]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    ret <4 x i32> [[REVSHUF]]
+; CHECK-NEXT:    ret <4 x i32> [[ADD2]]
 ;
   %shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
   %add = add <4 x i32> %shuf, %shuf
@@ -469,10 +465,9 @@ define <4 x i32> @fold_lookthrough_binop_chain(<4 x i32> %x) {
 define <4 x i64> @fold_lookthrough_cast_binop_chain(<4 x i32> %x) {
 ; CHECK-LABEL: @fold_lookthrough_cast_binop_chain(
 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[ZEXT:%.*]] = zext <4 x i32> [[SHUF]] to <4 x i64>
+; CHECK-NEXT:    [[ZEXT:%.*]] = zext <4 x i32> [[X]] to <4 x i64>
 ; CHECK-NEXT:    [[ADD:%.*]] = add <4 x i64> [[ZEXT]], [[ZEXT]]
-; CHECK-NEXT:    [[REVSHUF:%.*]] = shufflevector <4 x i64> [[ADD]], <4 x i64> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    ret <4 x i64> [[REVSHUF]]
+; CHECK-NEXT:    ret <4 x i64> [[ADD]]
 ;
   %shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
   %zext = zext <4 x i32> %shuf to <4 x i64>
diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll
index e6936b19415d0..5fca5b0c41ef8 100644
--- a/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll
+++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll
@@ -1202,45 +1202,42 @@ define i32 @predicated_not_dominates_reduction_twoadd(ptr nocapture noundef read
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ undef, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ undef, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = sext i32 [[INDEX]] to i64
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[H:%.*]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1
-; CHECK-NEXT:    [[DOTNOT:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], zeroinitializer
 ; CHECK-NEXT:    [[TMP2:%.*]] = udiv <4 x i8> [[WIDE_LOAD]], <i8 31, i8 31, i8 31, i8 31>
 ; CHECK-NEXT:    [[TMP3:%.*]] = shl nuw nsw <4 x i8> [[TMP2]], <i8 3, i8 3, i8 3, i8 3>
 ; CHECK-NEXT:    [[TMP4:%.*]] = udiv <4 x i8> [[TMP3]], <i8 31, i8 31, i8 31, i8 31>
 ; CHECK-NEXT:    [[TMP5:%.*]] = zext nneg <4 x i8> [[TMP4]] to <4 x i32>
-; CHECK-NEXT:    [[TMP6:%.*]] = select <4 x i1> [[DOTNOT]], <4 x i32> zeroinitializer, <4 x i32> [[TMP5]]
-; CHECK-NEXT:    [[TMP7:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP6]])
-; CHECK-NEXT:    [[TMP8:%.*]] = add i32 [[TMP7]], [[VEC_PHI]]
-; CHECK-NEXT:    [[TMP9:%.*]] = select <4 x i1> [[DOTNOT]], <4 x i32> zeroinitializer, <4 x i32> [[TMP5]]
-; CHECK-NEXT:    [[TMP10:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP9]])
-; CHECK-NEXT:    [[TMP11]] = add i32 [[TMP10]], [[TMP8]]
+; CHECK-NEXT:    [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP5]])
+; CHECK-NEXT:    [[TMP7:%.*]] = add i32 [[TMP6]], [[VEC_PHI]]
+; CHECK-NEXT:    [[TMP8:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP5]])
+; CHECK-NEXT:    [[TMP9]] = add i32 [[TMP8]], [[TMP7]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
-; CHECK-NEXT:    [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP40:![0-9]+]]
+; CHECK-NEXT:    [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP40:![0-9]+]]
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[N_VEC]], [[I]]
 ; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_END7:%.*]], label [[SCALAR_PH]]
 ; CHECK:       scalar.ph:
 ; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP11]], [[MIDDLE_BLOCK]] ], [ undef, [[ENTRY]] ]
+; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP9]], [[MIDDLE_BLOCK]] ], [ undef, [[ENTRY]] ]
 ; CHECK-NEXT:    br label [[FOR_BODY2:%.*]]
 ; CHECK:       for.body2:
 ; CHECK-NEXT:    [[A_117:%.*]] = phi i32 [ [[INC6:%.*]], [[FOR_INC5:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; CHECK-NEXT:    [[G_016:%.*]] = phi i32 [ [[G_1:%.*]], [[FOR_INC5]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[TMP13:%.*]] = sext i32 [[A_117]] to i64
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[H]], i64 [[TMP13]]
-; CHECK-NEXT:    [[TMP14:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
-; CHECK-NEXT:    [[TOBOOL3_NOT:%.*]] = icmp eq i8 [[TMP14]], 0
+; CHECK-NEXT:    [[TMP11:%.*]] = sext i32 [[A_117]] to i64
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[H]], i64 [[TMP11]]
+; CHECK-NEXT:    [[TMP12:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT:    [[TOBOOL3_NOT:%.*]] = icmp eq i8 [[TMP12]], 0
 ; CHECK-NEXT:    br i1 [[TOBOOL3_NOT]], label [[FOR_INC5]], label [[IF_THEN:%.*]]
 ; CHECK:       if.then:
+; CHECK-NEXT:    [[TMP13:%.*]] = udiv i8 [[TMP12]], 31
+; CHECK-NEXT:    [[TMP14:%.*]] = shl nuw nsw i8 [[TMP13]], 3
 ; CHECK-NEXT:    [[TMP15:%.*]] = udiv i8 [[TMP14]], 31
-; CHECK-NEXT:    [[TMP16:%.*]] = shl nuw nsw i8 [[TMP15]], 3
-; CHECK-NEXT:    [[TMP17:%.*]] = udiv i8 [[TMP16]], 31
-; CHECK-NEXT:    [[TMP18:%.*]] = shl nuw nsw i8 [[TMP17]], 1
-; CHECK-NEXT:    [[REASS_ADD:%.*]] = zext nneg i8 [[TMP18]] to i32
+; CHECK-NEXT:    [[TMP16:%.*]] = shl nuw nsw i8 [[TMP15]], 1
+; CHECK-NEXT:    [[REASS_ADD:%.*]] = zext nneg i8 [[TMP16]] to i32
 ; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[G_016]], [[REASS_ADD]]
 ; CHECK-NEXT:    br label [[FOR_INC5]]
 ; CHECK:       for.inc5:
@@ -1249,7 +1246,7 @@ define i32 @predicated_not_dominates_reduction_twoadd(ptr nocapture noundef read
 ; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC6]], [[I]]
 ; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END7]], label [[FOR_BODY2]], !llvm.loop [[LOOP41:![0-9]+]]
 ; CHECK:       for.end7:
-; CHECK-NEXT:    [[G_1_LCSSA:%.*]] = phi i32 [ [[G_1]], [[FOR_INC5]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    [[G_1_LCSSA:%.*]] = phi i32 [ [[G_1]], [[FOR_INC5]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ]
 ; CHECK-NEXT:    ret i32 [[G_1_LCSSA]]
 ;
 entry:
@@ -1292,7 +1289,7 @@ define i32 @predicated_or_dominates_reduction(ptr %b) {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ undef, [[VECTOR_PH]] ], [ [[TMP51:%.*]], [[PRED_LOAD_CONTINUE6]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ undef, [[VECTOR_PH]] ], [ [[TMP48:%.*]], [[PRED_LOAD_CONTINUE6]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = or disjoint i32 [[INDEX]], 1
 ; CHECK-NEXT:    [[TMP1:%.*]] = or disjoint i32 [[INDEX]], 2
 ; CHECK-NEXT:    [[TMP2:%.*]] = or disjoint i32 [[INDEX]], 3
@@ -1354,21 +1351,21 @@ define i32 @predicated_or_dominates_reduction(ptr %b) {
 ; CHECK:       pred.load.continue6:
 ; CHECK-NEXT:    [[TMP43:%.*]] = phi <4 x i32> [ [[TMP37]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP42]], [[PRED_LOAD_IF5]] ]
 ; CHECK-NEXT:    [[TMP44:%.*]] = icmp ne <4 x i32> [[TMP43]], zeroinitializer
-; CHECK-NEXT:    [[TMP46:%.*]] = xor <4 x i1> [[TMP19]], <i1 true, i1 true, i1 true, i1 true>
-; CHECK-NEXT:    [[TMP47:%.*]] = select <4 x i1> [[TMP46]], <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i1> [[TMP44]]
-; CHECK-NEXT:    [[TMP48:%.*]] = bitcast <4 x i1> [[TMP47]] to i4
-; CHECK-NEXT:    [[TMP49:%.*]] = call range(i4 0, 5) i4 @llvm.ctpop.i4(i4 [[TMP48]])
-; CHECK-NEXT:    [[TMP50:%.*]] = zext nneg i4 [[TMP49]] to i32
-; CHECK-NEXT:    [[TMP51]] = add i32 [[VEC_PHI]], [[TMP50]]
+; CHECK-NEXT:    [[NOT_:%.*]] = xor <4 x i1> [[TMP19]], <i1 true, i1 true, i1 true, i1 true>
+; CHECK-NEXT:    [[DOTNOT7:%.*]] = select <4 x i1> [[NOT_]], <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i1> [[TMP44]]
+; CHECK-NEXT:    [[TMP45:%.*]] = bitcast <4 x i1> [[DOTNOT7]] to i4
+; CHECK-NEXT:    [[TMP46:%.*]] = call range(i4 0, 5) i4 @llvm.ctpop.i4(i4 [[TMP45]])
+; CHECK-NEXT:    [[TMP47:%.*]] = zext nneg i4 [[TMP46]] to i32
+; CHECK-NEXT:    [[TMP48]] = add i32 [[VEC_PHI]], [[TMP47]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
-; CHECK-NEXT:    [[TMP52:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000
-; CHECK-NEXT:    br i1 [[TMP52]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]]
+; CHECK-NEXT:    [[TMP49:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000
+; CHECK-NEXT:    br i1 [[TMP49]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]]
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
 ; CHECK:       scalar.ph:
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.cond.cleanup:
-; CHECK-NEXT:    [[A_1_LCSSA:%.*]] = phi i32 [ poison, [[FOR_INC:%.*]] ], [ [[TMP51]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    [[A_1_LCSSA:%.*]] = phi i32 [ poison, [[FOR_INC:%.*]] ], [ [[TMP48]], [[MIDDLE_BLOCK]] ]
 ; CHECK-NEXT:    ret i32 [[A_1_LCSSA]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    br i1 poison, label [[LOR_LHS_FALSE:%.*]], label [[IF_THEN:%.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/scev-predicate-reasoning.ll b/llvm/test/Transforms/LoopVectorize/scev-predicate-reasoning.ll
index 6ae6645378b32..6b983cd9eef3e 100644
--- a/llvm/test/Transforms/LoopVectorize/scev-predicate-reasoning.ll
+++ b/llvm/test/Transforms/LoopVectorize/scev-predicate-reasoning.ll
@@ -19,8 +19,7 @@ define void @step_direction_unknown(i32 %arg, ptr %dst) {
 ; CHECK-NEXT:    [[TMP5:%.*]] = select i1 [[TMP1]], i1 [[TMP4]], i1 false
 ; CHECK-NEXT:    [[TMP6:%.*]] = or i1 [[TMP5]], [[MUL_OVERFLOW]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = icmp ne i32 [[ADD]], 0
-; CHECK-NEXT:    [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]]
-; CHECK-NEXT:    br i1 [[TMP8]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; CHECK-NEXT:    br i1 [[TMP7]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[ADD]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
@@ -28,24 +27,24 @@ define void @step_direction_unknown(i32 %arg, ptr %dst) {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP9:%.*]] = mul <4 x i32> [[BROADCAST_SPLAT]], [[VEC_IND]]
-; CHECK-NEXT:    [[TMP10:%.*]] = zext <4 x i32> [[TMP9]] to <4 x i64>
-; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <4 x i64> [[TMP10]], i32 0
-; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP11]]
-; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <4 x i64> [[TMP10]], i32 1
-; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP13]]
-; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <4 x i64> [[TMP10]], i32 2
-; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP15]]
-; CHECK-NEXT:    [[TMP17:%.*]] = extractelement <4 x i64> [[TMP10]], i32 3
-; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP17]]
-; CHECK-NEXT:    store double 0.000000e+00, ptr [[TMP12]], align 8
-; CHECK-NEXT:    store double 0.000000e+00, ptr [[TMP14]], align 8
-; CHECK-NEXT:    store double 0.000000e+00, ptr [[TMP16]], align 8
-; CHECK-NEXT:    store double 0.000000e+00, ptr [[TMP18]], align 8
+; CHECK-NEXT:    [[TMP8:%.*]] = mul <4 x i32> [[BROADCAST_SPLAT]], [[VEC_IND]]
+; CHECK-NEXT:    [[TMP9:%.*]] = zext <4 x i32> [[TMP8]] to <4 x i64>
+; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <4 x i64> [[TMP9]], i32 0
+; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP10]]
+; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <4 x i64> [[TMP9]], i32 1
+; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP12]]
+; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <4 x i64> [[TMP9]], i32 2
+; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP14]]
+; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <4 x i64> [[TMP9]], i32 3
+; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP16]]
+; CHECK-NEXT:    store double 0.000000e+00, ptr [[TMP11]], align 8
+; CHECK-NEXT:    store double 0.000000e+00, ptr [[TMP13]], align 8
+; CHECK-NEXT:    store double 0.000000e+00, ptr [[TMP15]], align 8
+; CHECK-NEXT:    store double 0.000000e+00, ptr [[TMP17]], align 8
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
-; CHECK-NEXT:    [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0
-; CHECK-NEXT:    br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-NEXT:    [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0
+; CHECK-NEXT:    br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
 ; CHECK:       scalar.ph:

>From 041187d44299f33f34662eeb0effb73112139517 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <r at artagnon.com>
Date: Sat, 18 May 2024 21:03:50 +0100
Subject: [PATCH 10/10] InstSimplify: address review; minor fixes

---
 llvm/lib/Analysis/InstructionSimplify.cpp | 23 +++++++++++++----------
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index 5f90ce8a02631..5e6570d5a6ee4 100644
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -5349,11 +5349,12 @@ foldIdentityShuffles(int DestElt, Value *Op0, Value *Op1, int MaskVal,
   // vector. Set the RootVec to the cast, the SourceOp to the operand and
   // recurse. If, in a later stack frame, an appropriate ShuffleVector is
   // matched, the example will reduce.
-  if (auto *SourceCast = dyn_cast<CastInst>(SourceOp))
+  if (auto *SourceCast = dyn_cast<CastInst>(SourceOp)) {
     if (auto *CastTy = dyn_cast<FixedVectorType>(SourceCast->getSrcTy()))
       if (CastTy->getNumElements() == InVecNumElts)
         return foldIdentityShuffles(DestElt, SourceCast->getOperand(0), Op1,
                                     MaskVal, NextRootVec, MaxRecurse);
+  }
 
   // Look through a binary operator, with two identical operands. Set the
   // RootVec to the binop, the SourceOp to the operand, and recurse. If, in a
@@ -5386,16 +5387,17 @@ foldIdentityShuffles(int DestElt, Value *Op0, Value *Op1, int MaskVal,
   if (NextRootVec == SourceOp)
     return {NextRootVec, std::nullopt};
 
+  auto *RootCast = dyn_cast<CastInst>(RootVec);
+  auto *CastTy =
+      RootCast ? dyn_cast<FixedVectorType>(RootCast->getSrcTy()) : nullptr;
+
   // We again have to match the condition for a vector-num-element-preserving
   // cast or binop with equal operands, as we are not assured of the recursion
   // happening from the call after the previous match. The RootVec was set to
   // the last cast or binop in the chain.
-  if ((isa<CastInst>(RootVec) &&
-       isa<FixedVectorType>(cast<CastInst>(RootVec)->getSrcTy()) &&
-       cast<FixedVectorType>(cast<CastInst>(RootVec)->getSrcTy())
-               ->getNumElements() == InVecNumElts) ||
+  if ((CastTy && CastTy->getNumElements() == InVecNumElts) ||
       (match(RootVec, m_BinOp(m_Value(BinOpLHS), m_Value(BinOpRHS))) &&
-       BinOpLHS == BinOpRHS))
+       BinOpLHS == BinOpRHS)) {
     // ReplacementSrc should be the User of the the first cast or binop in the
     // chain. SourceOp is the reduced value, which should replace
     // ReplacementSrc.
@@ -5411,6 +5413,7 @@ foldIdentityShuffles(int DestElt, Value *Op0, Value *Op1, int MaskVal,
       if (ReplacementSrc->hasOneUser())
         return {RootVec, std::make_pair(ReplacementSrc, SourceOp)};
     }
+  }
 
   return {nullptr, std::nullopt};
 }
@@ -5517,14 +5520,14 @@ static Value *simplifyShuffleVectorInst(Value *Op0, Value *Op1,
   // shuffle. This handles simple identity shuffles as well as chains of
   // shuffles that may widen/narrow and/or move elements across lanes and back.
   Value *RootVec = nullptr;
-  ReplacementTy ReplaceInRootVec;
+  ReplacementTy ReplacementCandidate;
   for (unsigned Idx = 0; Idx != MaskNumElts; ++Idx) {
     // Note that recursion is limited for each vector element, so if any element
     // exceeds the limit, this will fail to simplify.
     std::pair<Value *, ReplacementTy> Res =
         foldIdentityShuffles(Idx, Op0, Op1, Indices[Idx], RootVec, MaxRecurse);
     RootVec = Res.first;
-    ReplaceInRootVec = Res.second;
+    ReplacementCandidate = Res.second;
 
     // We can't replace a widening/narrowing shuffle with one of its operands.
     if (!RootVec || RootVec->getType() != RetTy)
@@ -5533,8 +5536,8 @@ static Value *simplifyShuffleVectorInst(Value *Op0, Value *Op1,
 
   // Apply any replacements in RootVec that are applicable in case we're looking
   // through a cast or binop.
-  if (ReplaceInRootVec) {
-    auto [ReplacementSrc, ReplacementDst] = *ReplaceInRootVec;
+  if (ReplacementCandidate) {
+    auto [ReplacementSrc, ReplacementDst] = *ReplacementCandidate;
     ReplacementSrc->replaceAllUsesWith(ReplacementDst);
   }
   return RootVec;



More information about the llvm-commits mailing list