[llvm] 72142b9 - [SLP]Added a tests for shuffled matched tree entries, NFC.
Alexey Bataev via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 14 10:07:50 PDT 2021
Author: Alexey Bataev
Date: 2021-04-14T10:07:26-07:00
New Revision: 72142b909d635d17bdbe3cb4823d97afb96c1d9e
URL: https://github.com/llvm/llvm-project/commit/72142b909d635d17bdbe3cb4823d97afb96c1d9e
DIFF: https://github.com/llvm/llvm-project/commit/72142b909d635d17bdbe3cb4823d97afb96c1d9e.diff
LOG: [SLP]Added a tests for shuffled matched tree entries, NFC.
Added:
llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll
Modified:
Removed:
################################################################################
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll b/llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll
new file mode 100644
index 0000000000000..1a4cbb16b9a91
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll
@@ -0,0 +1,251 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -slp-vectorizer -S -o - -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 -slp-threshold=50 -slp-recursion-max-depth=6 < %s | FileCheck %s
+
+define i32 @bar() local_unnamed_addr {
+; CHECK-LABEL: @bar(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[ADD103:%.*]] = add nsw i32 undef, undef
+; CHECK-NEXT: [[SUB104:%.*]] = sub nsw i32 undef, undef
+; CHECK-NEXT: [[ADD105:%.*]] = add nsw i32 undef, undef
+; CHECK-NEXT: [[SUB106:%.*]] = sub nsw i32 undef, undef
+; CHECK-NEXT: [[SHR_I:%.*]] = lshr i32 [[ADD103]], 15
+; CHECK-NEXT: [[AND_I:%.*]] = and i32 [[SHR_I]], 65537
+; CHECK-NEXT: [[MUL_I:%.*]] = mul nuw i32 [[AND_I]], 65535
+; CHECK-NEXT: [[ADD_I:%.*]] = add i32 [[MUL_I]], [[ADD103]]
+; CHECK-NEXT: [[XOR_I:%.*]] = xor i32 [[ADD_I]], [[MUL_I]]
+; CHECK-NEXT: [[SHR_I64:%.*]] = lshr i32 [[ADD105]], 15
+; CHECK-NEXT: [[AND_I65:%.*]] = and i32 [[SHR_I64]], 65537
+; CHECK-NEXT: [[MUL_I66:%.*]] = mul nuw i32 [[AND_I65]], 65535
+; CHECK-NEXT: [[ADD_I67:%.*]] = add i32 [[MUL_I66]], [[ADD105]]
+; CHECK-NEXT: [[XOR_I68:%.*]] = xor i32 [[ADD_I67]], [[MUL_I66]]
+; CHECK-NEXT: [[SHR_I69:%.*]] = lshr i32 [[SUB104]], 15
+; CHECK-NEXT: [[AND_I70:%.*]] = and i32 [[SHR_I69]], 65537
+; CHECK-NEXT: [[MUL_I71:%.*]] = mul nuw i32 [[AND_I70]], 65535
+; CHECK-NEXT: [[ADD_I72:%.*]] = add i32 [[MUL_I71]], [[SUB104]]
+; CHECK-NEXT: [[XOR_I73:%.*]] = xor i32 [[ADD_I72]], [[MUL_I71]]
+; CHECK-NEXT: [[SHR_I74:%.*]] = lshr i32 [[SUB106]], 15
+; CHECK-NEXT: [[AND_I75:%.*]] = and i32 [[SHR_I74]], 65537
+; CHECK-NEXT: [[MUL_I76:%.*]] = mul nuw i32 [[AND_I75]], 65535
+; CHECK-NEXT: [[ADD_I77:%.*]] = add i32 [[MUL_I76]], [[SUB106]]
+; CHECK-NEXT: [[XOR_I78:%.*]] = xor i32 [[ADD_I77]], [[MUL_I76]]
+; CHECK-NEXT: [[ADD110:%.*]] = add i32 [[XOR_I68]], [[XOR_I]]
+; CHECK-NEXT: [[ADD112:%.*]] = add i32 [[ADD110]], [[XOR_I73]]
+; CHECK-NEXT: [[ADD113:%.*]] = add i32 [[ADD112]], [[XOR_I78]]
+; CHECK-NEXT: [[ADD78_1:%.*]] = add nsw i32 undef, undef
+; CHECK-NEXT: [[SUB86_1:%.*]] = sub nsw i32 undef, undef
+; CHECK-NEXT: [[ADD94_1:%.*]] = add nsw i32 undef, undef
+; CHECK-NEXT: [[SUB102_1:%.*]] = sub nsw i32 undef, undef
+; CHECK-NEXT: [[ADD103_1:%.*]] = add nsw i32 [[ADD94_1]], [[ADD78_1]]
+; CHECK-NEXT: [[SUB104_1:%.*]] = sub nsw i32 [[ADD78_1]], [[ADD94_1]]
+; CHECK-NEXT: [[ADD105_1:%.*]] = add nsw i32 [[SUB102_1]], [[SUB86_1]]
+; CHECK-NEXT: [[SUB106_1:%.*]] = sub nsw i32 [[SUB86_1]], [[SUB102_1]]
+; CHECK-NEXT: [[SHR_I_1:%.*]] = lshr i32 [[ADD103_1]], 15
+; CHECK-NEXT: [[AND_I_1:%.*]] = and i32 [[SHR_I_1]], 65537
+; CHECK-NEXT: [[MUL_I_1:%.*]] = mul nuw i32 [[AND_I_1]], 65535
+; CHECK-NEXT: [[ADD_I_1:%.*]] = add i32 [[MUL_I_1]], [[ADD103_1]]
+; CHECK-NEXT: [[XOR_I_1:%.*]] = xor i32 [[ADD_I_1]], [[MUL_I_1]]
+; CHECK-NEXT: [[SHR_I64_1:%.*]] = lshr i32 [[ADD105_1]], 15
+; CHECK-NEXT: [[AND_I65_1:%.*]] = and i32 [[SHR_I64_1]], 65537
+; CHECK-NEXT: [[MUL_I66_1:%.*]] = mul nuw i32 [[AND_I65_1]], 65535
+; CHECK-NEXT: [[ADD_I67_1:%.*]] = add i32 [[MUL_I66_1]], [[ADD105_1]]
+; CHECK-NEXT: [[XOR_I68_1:%.*]] = xor i32 [[ADD_I67_1]], [[MUL_I66_1]]
+; CHECK-NEXT: [[SHR_I69_1:%.*]] = lshr i32 [[SUB104_1]], 15
+; CHECK-NEXT: [[AND_I70_1:%.*]] = and i32 [[SHR_I69_1]], 65537
+; CHECK-NEXT: [[MUL_I71_1:%.*]] = mul nuw i32 [[AND_I70_1]], 65535
+; CHECK-NEXT: [[ADD_I72_1:%.*]] = add i32 [[MUL_I71_1]], [[SUB104_1]]
+; CHECK-NEXT: [[XOR_I73_1:%.*]] = xor i32 [[ADD_I72_1]], [[MUL_I71_1]]
+; CHECK-NEXT: [[SHR_I74_1:%.*]] = lshr i32 [[SUB106_1]], 15
+; CHECK-NEXT: [[AND_I75_1:%.*]] = and i32 [[SHR_I74_1]], 65537
+; CHECK-NEXT: [[MUL_I76_1:%.*]] = mul nuw i32 [[AND_I75_1]], 65535
+; CHECK-NEXT: [[ADD_I77_1:%.*]] = add i32 [[MUL_I76_1]], [[SUB106_1]]
+; CHECK-NEXT: [[XOR_I78_1:%.*]] = xor i32 [[ADD_I77_1]], [[MUL_I76_1]]
+; CHECK-NEXT: [[ADD108_1:%.*]] = add i32 [[XOR_I68_1]], [[ADD113]]
+; CHECK-NEXT: [[ADD110_1:%.*]] = add i32 [[ADD108_1]], [[XOR_I_1]]
+; CHECK-NEXT: [[ADD112_1:%.*]] = add i32 [[ADD110_1]], [[XOR_I73_1]]
+; CHECK-NEXT: [[ADD113_1:%.*]] = add i32 [[ADD112_1]], [[XOR_I78_1]]
+; CHECK-NEXT: [[ADD78_2:%.*]] = add nsw i32 undef, undef
+; CHECK-NEXT: [[ADD103_2:%.*]] = add nsw i32 undef, [[ADD78_2]]
+; CHECK-NEXT: [[SUB104_2:%.*]] = sub nsw i32 [[ADD78_2]], undef
+; CHECK-NEXT: [[ADD105_2:%.*]] = add nsw i32 undef, undef
+; CHECK-NEXT: [[SUB106_2:%.*]] = sub nsw i32 undef, undef
+; CHECK-NEXT: [[SHR_I_2:%.*]] = lshr i32 [[ADD103_2]], 15
+; CHECK-NEXT: [[AND_I_2:%.*]] = and i32 [[SHR_I_2]], 65537
+; CHECK-NEXT: [[MUL_I_2:%.*]] = mul nuw i32 [[AND_I_2]], 65535
+; CHECK-NEXT: [[ADD_I_2:%.*]] = add i32 [[MUL_I_2]], [[ADD103_2]]
+; CHECK-NEXT: [[XOR_I_2:%.*]] = xor i32 [[ADD_I_2]], [[MUL_I_2]]
+; CHECK-NEXT: [[SHR_I64_2:%.*]] = lshr i32 [[ADD105_2]], 15
+; CHECK-NEXT: [[AND_I65_2:%.*]] = and i32 [[SHR_I64_2]], 65537
+; CHECK-NEXT: [[MUL_I66_2:%.*]] = mul nuw i32 [[AND_I65_2]], 65535
+; CHECK-NEXT: [[ADD_I67_2:%.*]] = add i32 [[MUL_I66_2]], [[ADD105_2]]
+; CHECK-NEXT: [[XOR_I68_2:%.*]] = xor i32 [[ADD_I67_2]], [[MUL_I66_2]]
+; CHECK-NEXT: [[SHR_I69_2:%.*]] = lshr i32 [[SUB104_2]], 15
+; CHECK-NEXT: [[AND_I70_2:%.*]] = and i32 [[SHR_I69_2]], 65537
+; CHECK-NEXT: [[MUL_I71_2:%.*]] = mul nuw i32 [[AND_I70_2]], 65535
+; CHECK-NEXT: [[ADD_I72_2:%.*]] = add i32 [[MUL_I71_2]], [[SUB104_2]]
+; CHECK-NEXT: [[XOR_I73_2:%.*]] = xor i32 [[ADD_I72_2]], [[MUL_I71_2]]
+; CHECK-NEXT: [[SHR_I74_2:%.*]] = lshr i32 [[SUB106_2]], 15
+; CHECK-NEXT: [[AND_I75_2:%.*]] = and i32 [[SHR_I74_2]], 65537
+; CHECK-NEXT: [[MUL_I76_2:%.*]] = mul nuw i32 [[AND_I75_2]], 65535
+; CHECK-NEXT: [[ADD_I77_2:%.*]] = add i32 [[MUL_I76_2]], [[SUB106_2]]
+; CHECK-NEXT: [[XOR_I78_2:%.*]] = xor i32 [[ADD_I77_2]], [[MUL_I76_2]]
+; CHECK-NEXT: [[ADD108_2:%.*]] = add i32 [[XOR_I68_2]], [[ADD113_1]]
+; CHECK-NEXT: [[ADD110_2:%.*]] = add i32 [[ADD108_2]], [[XOR_I_2]]
+; CHECK-NEXT: [[ADD112_2:%.*]] = add i32 [[ADD110_2]], [[XOR_I73_2]]
+; CHECK-NEXT: [[ADD113_2:%.*]] = add i32 [[ADD112_2]], [[XOR_I78_2]]
+; CHECK-NEXT: [[SUB102_3:%.*]] = sub nsw i32 undef, undef
+; CHECK-NEXT: [[ADD103_3:%.*]] = add nsw i32 undef, undef
+; CHECK-NEXT: [[SUB104_3:%.*]] = sub nsw i32 undef, undef
+; CHECK-NEXT: [[ADD105_3:%.*]] = add nsw i32 [[SUB102_3]], undef
+; CHECK-NEXT: [[SUB106_3:%.*]] = sub nsw i32 undef, [[SUB102_3]]
+; CHECK-NEXT: [[SHR_I_3:%.*]] = lshr i32 [[ADD103_3]], 15
+; CHECK-NEXT: [[AND_I_3:%.*]] = and i32 [[SHR_I_3]], 65537
+; CHECK-NEXT: [[MUL_I_3:%.*]] = mul nuw i32 [[AND_I_3]], 65535
+; CHECK-NEXT: [[ADD_I_3:%.*]] = add i32 [[MUL_I_3]], [[ADD103_3]]
+; CHECK-NEXT: [[XOR_I_3:%.*]] = xor i32 [[ADD_I_3]], [[MUL_I_3]]
+; CHECK-NEXT: [[SHR_I64_3:%.*]] = lshr i32 [[ADD105_3]], 15
+; CHECK-NEXT: [[AND_I65_3:%.*]] = and i32 [[SHR_I64_3]], 65537
+; CHECK-NEXT: [[MUL_I66_3:%.*]] = mul nuw i32 [[AND_I65_3]], 65535
+; CHECK-NEXT: [[ADD_I67_3:%.*]] = add i32 [[MUL_I66_3]], [[ADD105_3]]
+; CHECK-NEXT: [[XOR_I68_3:%.*]] = xor i32 [[ADD_I67_3]], [[MUL_I66_3]]
+; CHECK-NEXT: [[SHR_I69_3:%.*]] = lshr i32 [[SUB104_3]], 15
+; CHECK-NEXT: [[AND_I70_3:%.*]] = and i32 [[SHR_I69_3]], 65537
+; CHECK-NEXT: [[MUL_I71_3:%.*]] = mul nuw i32 [[AND_I70_3]], 65535
+; CHECK-NEXT: [[ADD_I72_3:%.*]] = add i32 [[MUL_I71_3]], [[SUB104_3]]
+; CHECK-NEXT: [[XOR_I73_3:%.*]] = xor i32 [[ADD_I72_3]], [[MUL_I71_3]]
+; CHECK-NEXT: [[SHR_I74_3:%.*]] = lshr i32 [[SUB106_3]], 15
+; CHECK-NEXT: [[AND_I75_3:%.*]] = and i32 [[SHR_I74_3]], 65537
+; CHECK-NEXT: [[MUL_I76_3:%.*]] = mul nuw i32 [[AND_I75_3]], 65535
+; CHECK-NEXT: [[ADD_I77_3:%.*]] = add i32 [[MUL_I76_3]], [[SUB106_3]]
+; CHECK-NEXT: [[XOR_I78_3:%.*]] = xor i32 [[ADD_I77_3]], [[MUL_I76_3]]
+; CHECK-NEXT: [[ADD108_3:%.*]] = add i32 [[XOR_I68_3]], [[ADD113_2]]
+; CHECK-NEXT: [[ADD110_3:%.*]] = add i32 [[ADD108_3]], [[XOR_I_3]]
+; CHECK-NEXT: [[ADD112_3:%.*]] = add i32 [[ADD110_3]], [[XOR_I73_3]]
+; CHECK-NEXT: [[ADD113_3:%.*]] = add i32 [[ADD112_3]], [[XOR_I78_3]]
+; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[ADD113_3]], 16
+; CHECK-NEXT: [[ADD119:%.*]] = add nuw nsw i32 undef, [[SHR]]
+; CHECK-NEXT: [[SHR120:%.*]] = lshr i32 [[ADD119]], 1
+; CHECK-NEXT: ret i32 [[SHR120]]
+;
+entry:
+ %add103 = add nsw i32 undef, undef
+ %sub104 = sub nsw i32 undef, undef
+ %add105 = add nsw i32 undef, undef
+ %sub106 = sub nsw i32 undef, undef
+ %shr.i = lshr i32 %add103, 15
+ %and.i = and i32 %shr.i, 65537
+ %mul.i = mul nuw i32 %and.i, 65535
+ %add.i = add i32 %mul.i, %add103
+ %xor.i = xor i32 %add.i, %mul.i
+ %shr.i64 = lshr i32 %add105, 15
+ %and.i65 = and i32 %shr.i64, 65537
+ %mul.i66 = mul nuw i32 %and.i65, 65535
+ %add.i67 = add i32 %mul.i66, %add105
+ %xor.i68 = xor i32 %add.i67, %mul.i66
+ %shr.i69 = lshr i32 %sub104, 15
+ %and.i70 = and i32 %shr.i69, 65537
+ %mul.i71 = mul nuw i32 %and.i70, 65535
+ %add.i72 = add i32 %mul.i71, %sub104
+ %xor.i73 = xor i32 %add.i72, %mul.i71
+ %shr.i74 = lshr i32 %sub106, 15
+ %and.i75 = and i32 %shr.i74, 65537
+ %mul.i76 = mul nuw i32 %and.i75, 65535
+ %add.i77 = add i32 %mul.i76, %sub106
+ %xor.i78 = xor i32 %add.i77, %mul.i76
+ %add110 = add i32 %xor.i68, %xor.i
+ %add112 = add i32 %add110, %xor.i73
+ %add113 = add i32 %add112, %xor.i78
+ %add78.1 = add nsw i32 undef, undef
+ %sub86.1 = sub nsw i32 undef, undef
+ %add94.1 = add nsw i32 undef, undef
+ %sub102.1 = sub nsw i32 undef, undef
+ %add103.1 = add nsw i32 %add94.1, %add78.1
+ %sub104.1 = sub nsw i32 %add78.1, %add94.1
+ %add105.1 = add nsw i32 %sub102.1, %sub86.1
+ %sub106.1 = sub nsw i32 %sub86.1, %sub102.1
+ %shr.i.1 = lshr i32 %add103.1, 15
+ %and.i.1 = and i32 %shr.i.1, 65537
+ %mul.i.1 = mul nuw i32 %and.i.1, 65535
+ %add.i.1 = add i32 %mul.i.1, %add103.1
+ %xor.i.1 = xor i32 %add.i.1, %mul.i.1
+ %shr.i64.1 = lshr i32 %add105.1, 15
+ %and.i65.1 = and i32 %shr.i64.1, 65537
+ %mul.i66.1 = mul nuw i32 %and.i65.1, 65535
+ %add.i67.1 = add i32 %mul.i66.1, %add105.1
+ %xor.i68.1 = xor i32 %add.i67.1, %mul.i66.1
+ %shr.i69.1 = lshr i32 %sub104.1, 15
+ %and.i70.1 = and i32 %shr.i69.1, 65537
+ %mul.i71.1 = mul nuw i32 %and.i70.1, 65535
+ %add.i72.1 = add i32 %mul.i71.1, %sub104.1
+ %xor.i73.1 = xor i32 %add.i72.1, %mul.i71.1
+ %shr.i74.1 = lshr i32 %sub106.1, 15
+ %and.i75.1 = and i32 %shr.i74.1, 65537
+ %mul.i76.1 = mul nuw i32 %and.i75.1, 65535
+ %add.i77.1 = add i32 %mul.i76.1, %sub106.1
+ %xor.i78.1 = xor i32 %add.i77.1, %mul.i76.1
+ %add108.1 = add i32 %xor.i68.1, %add113
+ %add110.1 = add i32 %add108.1, %xor.i.1
+ %add112.1 = add i32 %add110.1, %xor.i73.1
+ %add113.1 = add i32 %add112.1, %xor.i78.1
+ %add78.2 = add nsw i32 undef, undef
+ %add103.2 = add nsw i32 undef, %add78.2
+ %sub104.2 = sub nsw i32 %add78.2, undef
+ %add105.2 = add nsw i32 undef, undef
+ %sub106.2 = sub nsw i32 undef, undef
+ %shr.i.2 = lshr i32 %add103.2, 15
+ %and.i.2 = and i32 %shr.i.2, 65537
+ %mul.i.2 = mul nuw i32 %and.i.2, 65535
+ %add.i.2 = add i32 %mul.i.2, %add103.2
+ %xor.i.2 = xor i32 %add.i.2, %mul.i.2
+ %shr.i64.2 = lshr i32 %add105.2, 15
+ %and.i65.2 = and i32 %shr.i64.2, 65537
+ %mul.i66.2 = mul nuw i32 %and.i65.2, 65535
+ %add.i67.2 = add i32 %mul.i66.2, %add105.2
+ %xor.i68.2 = xor i32 %add.i67.2, %mul.i66.2
+ %shr.i69.2 = lshr i32 %sub104.2, 15
+ %and.i70.2 = and i32 %shr.i69.2, 65537
+ %mul.i71.2 = mul nuw i32 %and.i70.2, 65535
+ %add.i72.2 = add i32 %mul.i71.2, %sub104.2
+ %xor.i73.2 = xor i32 %add.i72.2, %mul.i71.2
+ %shr.i74.2 = lshr i32 %sub106.2, 15
+ %and.i75.2 = and i32 %shr.i74.2, 65537
+ %mul.i76.2 = mul nuw i32 %and.i75.2, 65535
+ %add.i77.2 = add i32 %mul.i76.2, %sub106.2
+ %xor.i78.2 = xor i32 %add.i77.2, %mul.i76.2
+ %add108.2 = add i32 %xor.i68.2, %add113.1
+ %add110.2 = add i32 %add108.2, %xor.i.2
+ %add112.2 = add i32 %add110.2, %xor.i73.2
+ %add113.2 = add i32 %add112.2, %xor.i78.2
+ %sub102.3 = sub nsw i32 undef, undef
+ %add103.3 = add nsw i32 undef, undef
+ %sub104.3 = sub nsw i32 undef, undef
+ %add105.3 = add nsw i32 %sub102.3, undef
+ %sub106.3 = sub nsw i32 undef, %sub102.3
+ %shr.i.3 = lshr i32 %add103.3, 15
+ %and.i.3 = and i32 %shr.i.3, 65537
+ %mul.i.3 = mul nuw i32 %and.i.3, 65535
+ %add.i.3 = add i32 %mul.i.3, %add103.3
+ %xor.i.3 = xor i32 %add.i.3, %mul.i.3
+ %shr.i64.3 = lshr i32 %add105.3, 15
+ %and.i65.3 = and i32 %shr.i64.3, 65537
+ %mul.i66.3 = mul nuw i32 %and.i65.3, 65535
+ %add.i67.3 = add i32 %mul.i66.3, %add105.3
+ %xor.i68.3 = xor i32 %add.i67.3, %mul.i66.3
+ %shr.i69.3 = lshr i32 %sub104.3, 15
+ %and.i70.3 = and i32 %shr.i69.3, 65537
+ %mul.i71.3 = mul nuw i32 %and.i70.3, 65535
+ %add.i72.3 = add i32 %mul.i71.3, %sub104.3
+ %xor.i73.3 = xor i32 %add.i72.3, %mul.i71.3
+ %shr.i74.3 = lshr i32 %sub106.3, 15
+ %and.i75.3 = and i32 %shr.i74.3, 65537
+ %mul.i76.3 = mul nuw i32 %and.i75.3, 65535
+ %add.i77.3 = add i32 %mul.i76.3, %sub106.3
+ %xor.i78.3 = xor i32 %add.i77.3, %mul.i76.3
+ %add108.3 = add i32 %xor.i68.3, %add113.2
+ %add110.3 = add i32 %add108.3, %xor.i.3
+ %add112.3 = add i32 %add110.3, %xor.i73.3
+ %add113.3 = add i32 %add112.3, %xor.i78.3
+ %shr = lshr i32 %add113.3, 16
+ %add119 = add nuw nsw i32 undef, %shr
+ %shr120 = lshr i32 %add119, 1
+ ret i32 %shr120
+}
More information about the llvm-commits
mailing list