[llvm] b5281af - [X86] Avoid returning the same shuffle operation for broadcast (#70592)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Oct 29 06:55:05 PDT 2023
Author: Phoebe Wang
Date: 2023-10-29T21:55:00+08:00
New Revision: b5281afe42246680ede4d563227df3657d501028
URL: https://github.com/llvm/llvm-project/commit/b5281afe42246680ede4d563227df3657d501028
DIFF: https://github.com/llvm/llvm-project/commit/b5281afe42246680ede4d563227df3657d501028.diff
LOG: [X86] Avoid returning the same shuffle operation for broadcast (#70592)
This is to fix a crash since aab8b2eb080d, which generates a new pattern
```
t35: v8i32 = xor t11, t14
t36: v8i32 = vector_shuffle<0,1,0,1,0,1,0,1> t35, undef:v8i32
```
The pattern exposed a bug introduced since f885c08034, which breaks
element widen but doesn't handle the broadcast case.
The patch just solved the crash issue. I observed performance regression
cased by above patches in the test, which may need further
investigation.
Added:
llvm/test/CodeGen/X86/shuffle-combine-crash-5.ll
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 6411f27da0776d4..18f6a695e4502e9 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -15293,6 +15293,12 @@ static SDValue lowerShuffleAsRepeatedMaskAndLanePermute(
for (int i = 0; i != NumElts; i += NumBroadcastElts)
for (int j = 0; j != NumBroadcastElts; ++j)
BroadcastMask[i + j] = j;
+
+ // Avoid returning the same shuffle operation. For example,
+ // v8i32 = vector_shuffle<0,1,0,1,0,1,0,1> t5, undef:v8i32
+ if (BroadcastMask == Mask)
+ return SDValue();
+
return DAG.getVectorShuffle(VT, DL, RepeatShuf, DAG.getUNDEF(VT),
BroadcastMask);
}
diff --git a/llvm/test/CodeGen/X86/shuffle-combine-crash-5.ll b/llvm/test/CodeGen/X86/shuffle-combine-crash-5.ll
new file mode 100644
index 000000000000000..f012c05a095731e
--- /dev/null
+++ b/llvm/test/CodeGen/X86/shuffle-combine-crash-5.ll
@@ -0,0 +1,30 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512bw,avx512vl | FileCheck %s
+
+define i1 @test(ptr %q) {
+; CHECK-LABEL: test:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
+; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; CHECK-NEXT: vpbroadcastq %xmm0, %ymm0
+; CHECK-NEXT: vptest %ymm0, %ymm0
+; CHECK-NEXT: sete %al
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+entry:
+ %0 = load i64, ptr %q, align 8
+ %add = add nsw i64 %0, 0
+ %add2 = add nsw i64 %add, 0
+ %add5 = add nsw i64 %add2, 0
+ %vecinit1.i.i68 = insertelement <2 x i64> poison, i64 %add5, i64 0
+ %add8 = add nsw i64 %add5, 0
+ %vecinit.i.i55 = insertelement <4 x i64> undef, i64 %add8, i64 0
+ %1 = bitcast <2 x i64> %vecinit1.i.i68 to <4 x i32>
+ %2 = shufflevector <4 x i32> %1, <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
+ %3 = bitcast <4 x i64> %vecinit.i.i55 to <8 x i32>
+ %4 = shufflevector <8 x i32> %3, <8 x i32> poison, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
+ %5 = icmp ne <8 x i32> %2, %4
+ %6 = bitcast <8 x i1> %5 to i8
+ %7 = icmp eq i8 %6, 0
+ ret i1 %7
+}
More information about the llvm-commits
mailing list