[llvm] [CostModel][X86] Add missing AVX1 costs for PMULUDQ v4i64 pattern (PR #157475)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 8 07:33:46 PDT 2025
github-actions[bot] wrote:
<!--LLVM CODE FORMAT COMMENT: {clang-format}-->
:warning: C/C++ code formatter, clang-format found issues in your code. :warning:
<details>
<summary>
You can test this locally with the following command:
</summary>
``````````bash
git-clang-format --diff origin/main HEAD --extensions cpp -- llvm/lib/Target/X86/X86TargetTransformInfo.cpp
``````````
:warning:
The reproduction instructions above might return results for more than one PR
in a stack if you are using a stacked PR workflow. You can limit the results by
changing `origin/main` to the base branch/commit you want to compare against.
:warning:
</details>
<details>
<summary>
View the diff from clang-format here.
</summary>
``````````diff
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 3d8d0a236..9d5fc327a 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -1196,101 +1196,141 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
return LT.first * *KindCost;
static const CostKindTblEntry AVX1CostTable[] = {
- // We don't have to scalarize unsupported ops. We can issue two half-sized
- // operations and we only need to extract the upper YMM half.
- // Two ops + 1 extract + 1 insert = 4.
- { ISD::MUL, MVT::v32i8, { 10, 11, 18, 19 } }, // pmaddubsw + split
- { ISD::MUL, MVT::v16i8, { 5, 6, 8, 12 } }, // 2*pmaddubsw/3*and/psllw/or
- { ISD::MUL, MVT::v16i16, { 4, 8, 5, 6 } }, // pmullw + split
- { ISD::MUL, MVT::v8i32, { 5, 8, 5, 10 } }, // pmulld + split
- { ISD::MUL, MVT::v4i32, { 2, 5, 1, 3 } }, // pmulld
- { ISD::MUL, MVT::v4i64, { 12, 15, 19, 20 } },
-
- { X86ISD::PMULUDQ, MVT::v4i64, { 3, 5, 5, 6 } }, // pmuludq + split
-
- { ISD::AND, MVT::v32i8, { 1, 1, 1, 2 } }, // vandps
- { ISD::AND, MVT::v16i16, { 1, 1, 1, 2 } }, // vandps
- { ISD::AND, MVT::v8i32, { 1, 1, 1, 2 } }, // vandps
- { ISD::AND, MVT::v4i64, { 1, 1, 1, 2 } }, // vandps
-
- { ISD::OR, MVT::v32i8, { 1, 1, 1, 2 } }, // vorps
- { ISD::OR, MVT::v16i16, { 1, 1, 1, 2 } }, // vorps
- { ISD::OR, MVT::v8i32, { 1, 1, 1, 2 } }, // vorps
- { ISD::OR, MVT::v4i64, { 1, 1, 1, 2 } }, // vorps
-
- { ISD::XOR, MVT::v32i8, { 1, 1, 1, 2 } }, // vxorps
- { ISD::XOR, MVT::v16i16, { 1, 1, 1, 2 } }, // vxorps
- { ISD::XOR, MVT::v8i32, { 1, 1, 1, 2 } }, // vxorps
- { ISD::XOR, MVT::v4i64, { 1, 1, 1, 2 } }, // vxorps
-
- { ISD::SUB, MVT::v32i8, { 4, 2, 5, 6 } }, // psubb + split
- { ISD::ADD, MVT::v32i8, { 4, 2, 5, 6 } }, // paddb + split
- { ISD::SUB, MVT::v16i16, { 4, 2, 5, 6 } }, // psubw + split
- { ISD::ADD, MVT::v16i16, { 4, 2, 5, 6 } }, // paddw + split
- { ISD::SUB, MVT::v8i32, { 4, 2, 5, 6 } }, // psubd + split
- { ISD::ADD, MVT::v8i32, { 4, 2, 5, 6 } }, // paddd + split
- { ISD::SUB, MVT::v4i64, { 4, 2, 5, 6 } }, // psubq + split
- { ISD::ADD, MVT::v4i64, { 4, 2, 5, 6 } }, // paddq + split
- { ISD::SUB, MVT::v2i64, { 1, 1, 1, 1 } }, // psubq
- { ISD::ADD, MVT::v2i64, { 1, 1, 1, 1 } }, // paddq
-
- { ISD::SHL, MVT::v16i8, { 10, 21,11,17 } }, // pblendvb sequence.
- { ISD::SHL, MVT::v32i8, { 22, 22,27,40 } }, // pblendvb sequence + split.
- { ISD::SHL, MVT::v8i16, { 6, 9,11,11 } }, // pblendvb sequence.
- { ISD::SHL, MVT::v16i16, { 13, 16,24,25 } }, // pblendvb sequence + split.
- { ISD::SHL, MVT::v4i32, { 3, 11, 4, 6 } }, // pslld/paddd/cvttps2dq/pmulld
- { ISD::SHL, MVT::v8i32, { 9, 11,12,17 } }, // pslld/paddd/cvttps2dq/pmulld + split
- { ISD::SHL, MVT::v2i64, { 2, 4, 4, 6 } }, // Shift each lane + blend.
- { ISD::SHL, MVT::v4i64, { 6, 7,11,15 } }, // Shift each lane + blend + split.
-
- { ISD::SRL, MVT::v16i8, { 11, 27,12,18 } }, // pblendvb sequence.
- { ISD::SRL, MVT::v32i8, { 23, 23,30,43 } }, // pblendvb sequence + split.
- { ISD::SRL, MVT::v8i16, { 13, 16,14,22 } }, // pblendvb sequence.
- { ISD::SRL, MVT::v16i16, { 28, 30,31,48 } }, // pblendvb sequence + split.
- { ISD::SRL, MVT::v4i32, { 6, 7,12,16 } }, // Shift each lane + blend.
- { ISD::SRL, MVT::v8i32, { 14, 14,26,34 } }, // Shift each lane + blend + split.
- { ISD::SRL, MVT::v2i64, { 2, 4, 4, 6 } }, // Shift each lane + blend.
- { ISD::SRL, MVT::v4i64, { 6, 7,11,15 } }, // Shift each lane + blend + split.
-
- { ISD::SRA, MVT::v16i8, { 21, 22,24,36 } }, // pblendvb sequence.
- { ISD::SRA, MVT::v32i8, { 44, 45,51,76 } }, // pblendvb sequence + split.
- { ISD::SRA, MVT::v8i16, { 13, 16,14,22 } }, // pblendvb sequence.
- { ISD::SRA, MVT::v16i16, { 28, 30,31,48 } }, // pblendvb sequence + split.
- { ISD::SRA, MVT::v4i32, { 6, 7,12,16 } }, // Shift each lane + blend.
- { ISD::SRA, MVT::v8i32, { 14, 14,26,34 } }, // Shift each lane + blend + split.
- { ISD::SRA, MVT::v2i64, { 5, 6,10,14 } }, // Shift each lane + blend.
- { ISD::SRA, MVT::v4i64, { 12, 12,22,30 } }, // Shift each lane + blend + split.
-
- { ISD::FNEG, MVT::v4f64, { 2, 2, 1, 2 } }, // BTVER2 from http://www.agner.org/
- { ISD::FNEG, MVT::v8f32, { 2, 2, 1, 2 } }, // BTVER2 from http://www.agner.org/
-
- { ISD::FADD, MVT::f64, { 1, 5, 1, 1 } }, // BDVER2 from http://www.agner.org/
- { ISD::FADD, MVT::f32, { 1, 5, 1, 1 } }, // BDVER2 from http://www.agner.org/
- { ISD::FADD, MVT::v2f64, { 1, 5, 1, 1 } }, // BDVER2 from http://www.agner.org/
- { ISD::FADD, MVT::v4f32, { 1, 5, 1, 1 } }, // BDVER2 from http://www.agner.org/
- { ISD::FADD, MVT::v4f64, { 2, 5, 1, 2 } }, // BDVER2 from http://www.agner.org/
- { ISD::FADD, MVT::v8f32, { 2, 5, 1, 2 } }, // BDVER2 from http://www.agner.org/
-
- { ISD::FSUB, MVT::f64, { 1, 5, 1, 1 } }, // BDVER2 from http://www.agner.org/
- { ISD::FSUB, MVT::f32, { 1, 5, 1, 1 } }, // BDVER2 from http://www.agner.org/
- { ISD::FSUB, MVT::v2f64, { 1, 5, 1, 1 } }, // BDVER2 from http://www.agner.org/
- { ISD::FSUB, MVT::v4f32, { 1, 5, 1, 1 } }, // BDVER2 from http://www.agner.org/
- { ISD::FSUB, MVT::v4f64, { 2, 5, 1, 2 } }, // BDVER2 from http://www.agner.org/
- { ISD::FSUB, MVT::v8f32, { 2, 5, 1, 2 } }, // BDVER2 from http://www.agner.org/
-
- { ISD::FMUL, MVT::f64, { 2, 5, 1, 1 } }, // BTVER2 from http://www.agner.org/
- { ISD::FMUL, MVT::f32, { 1, 5, 1, 1 } }, // BTVER2 from http://www.agner.org/
- { ISD::FMUL, MVT::v2f64, { 2, 5, 1, 1 } }, // BTVER2 from http://www.agner.org/
- { ISD::FMUL, MVT::v4f32, { 1, 5, 1, 1 } }, // BTVER2 from http://www.agner.org/
- { ISD::FMUL, MVT::v4f64, { 4, 5, 1, 2 } }, // BTVER2 from http://www.agner.org/
- { ISD::FMUL, MVT::v8f32, { 2, 5, 1, 2 } }, // BTVER2 from http://www.agner.org/
-
- { ISD::FDIV, MVT::f32, { 14, 14, 1, 1 } }, // SNB from http://www.agner.org/
- { ISD::FDIV, MVT::v4f32, { 14, 14, 1, 1 } }, // SNB from http://www.agner.org/
- { ISD::FDIV, MVT::v8f32, { 28, 29, 1, 3 } }, // SNB from http://www.agner.org/
- { ISD::FDIV, MVT::f64, { 22, 22, 1, 1 } }, // SNB from http://www.agner.org/
- { ISD::FDIV, MVT::v2f64, { 22, 22, 1, 1 } }, // SNB from http://www.agner.org/
- { ISD::FDIV, MVT::v4f64, { 44, 45, 1, 3 } }, // SNB from http://www.agner.org/
+ // We don't have to scalarize unsupported ops. We can issue two half-sized
+ // operations and we only need to extract the upper YMM half.
+ // Two ops + 1 extract + 1 insert = 4.
+ {ISD::MUL, MVT::v32i8, {10, 11, 18, 19}}, // pmaddubsw + split
+ {ISD::MUL, MVT::v16i8, {5, 6, 8, 12}}, // 2*pmaddubsw/3*and/psllw/or
+ {ISD::MUL, MVT::v16i16, {4, 8, 5, 6}}, // pmullw + split
+ {ISD::MUL, MVT::v8i32, {5, 8, 5, 10}}, // pmulld + split
+ {ISD::MUL, MVT::v4i32, {2, 5, 1, 3}}, // pmulld
+ {ISD::MUL, MVT::v4i64, {12, 15, 19, 20}},
+
+ {X86ISD::PMULUDQ, MVT::v4i64, {3, 5, 5, 6}}, // pmuludq + split
+
+ {ISD::AND, MVT::v32i8, {1, 1, 1, 2}}, // vandps
+ {ISD::AND, MVT::v16i16, {1, 1, 1, 2}}, // vandps
+ {ISD::AND, MVT::v8i32, {1, 1, 1, 2}}, // vandps
+ {ISD::AND, MVT::v4i64, {1, 1, 1, 2}}, // vandps
+
+ {ISD::OR, MVT::v32i8, {1, 1, 1, 2}}, // vorps
+ {ISD::OR, MVT::v16i16, {1, 1, 1, 2}}, // vorps
+ {ISD::OR, MVT::v8i32, {1, 1, 1, 2}}, // vorps
+ {ISD::OR, MVT::v4i64, {1, 1, 1, 2}}, // vorps
+
+ {ISD::XOR, MVT::v32i8, {1, 1, 1, 2}}, // vxorps
+ {ISD::XOR, MVT::v16i16, {1, 1, 1, 2}}, // vxorps
+ {ISD::XOR, MVT::v8i32, {1, 1, 1, 2}}, // vxorps
+ {ISD::XOR, MVT::v4i64, {1, 1, 1, 2}}, // vxorps
+
+ {ISD::SUB, MVT::v32i8, {4, 2, 5, 6}}, // psubb + split
+ {ISD::ADD, MVT::v32i8, {4, 2, 5, 6}}, // paddb + split
+ {ISD::SUB, MVT::v16i16, {4, 2, 5, 6}}, // psubw + split
+ {ISD::ADD, MVT::v16i16, {4, 2, 5, 6}}, // paddw + split
+ {ISD::SUB, MVT::v8i32, {4, 2, 5, 6}}, // psubd + split
+ {ISD::ADD, MVT::v8i32, {4, 2, 5, 6}}, // paddd + split
+ {ISD::SUB, MVT::v4i64, {4, 2, 5, 6}}, // psubq + split
+ {ISD::ADD, MVT::v4i64, {4, 2, 5, 6}}, // paddq + split
+ {ISD::SUB, MVT::v2i64, {1, 1, 1, 1}}, // psubq
+ {ISD::ADD, MVT::v2i64, {1, 1, 1, 1}}, // paddq
+
+ {ISD::SHL, MVT::v16i8, {10, 21, 11, 17}}, // pblendvb sequence.
+ {ISD::SHL, MVT::v32i8, {22, 22, 27, 40}}, // pblendvb sequence + split.
+ {ISD::SHL, MVT::v8i16, {6, 9, 11, 11}}, // pblendvb sequence.
+ {ISD::SHL, MVT::v16i16, {13, 16, 24, 25}}, // pblendvb sequence + split.
+ {ISD::SHL, MVT::v4i32, {3, 11, 4, 6}}, // pslld/paddd/cvttps2dq/pmulld
+ {ISD::SHL,
+ MVT::v8i32,
+ {9, 11, 12, 17}}, // pslld/paddd/cvttps2dq/pmulld + split
+ {ISD::SHL, MVT::v2i64, {2, 4, 4, 6}}, // Shift each lane + blend.
+ {ISD::SHL,
+ MVT::v4i64,
+ {6, 7, 11, 15}}, // Shift each lane + blend + split.
+
+ {ISD::SRL, MVT::v16i8, {11, 27, 12, 18}}, // pblendvb sequence.
+ {ISD::SRL, MVT::v32i8, {23, 23, 30, 43}}, // pblendvb sequence + split.
+ {ISD::SRL, MVT::v8i16, {13, 16, 14, 22}}, // pblendvb sequence.
+ {ISD::SRL, MVT::v16i16, {28, 30, 31, 48}}, // pblendvb sequence + split.
+ {ISD::SRL, MVT::v4i32, {6, 7, 12, 16}}, // Shift each lane + blend.
+ {ISD::SRL,
+ MVT::v8i32,
+ {14, 14, 26, 34}}, // Shift each lane + blend + split.
+ {ISD::SRL, MVT::v2i64, {2, 4, 4, 6}}, // Shift each lane + blend.
+ {ISD::SRL,
+ MVT::v4i64,
+ {6, 7, 11, 15}}, // Shift each lane + blend + split.
+
+ {ISD::SRA, MVT::v16i8, {21, 22, 24, 36}}, // pblendvb sequence.
+ {ISD::SRA, MVT::v32i8, {44, 45, 51, 76}}, // pblendvb sequence + split.
+ {ISD::SRA, MVT::v8i16, {13, 16, 14, 22}}, // pblendvb sequence.
+ {ISD::SRA, MVT::v16i16, {28, 30, 31, 48}}, // pblendvb sequence + split.
+ {ISD::SRA, MVT::v4i32, {6, 7, 12, 16}}, // Shift each lane + blend.
+ {ISD::SRA,
+ MVT::v8i32,
+ {14, 14, 26, 34}}, // Shift each lane + blend + split.
+ {ISD::SRA, MVT::v2i64, {5, 6, 10, 14}}, // Shift each lane + blend.
+ {ISD::SRA,
+ MVT::v4i64,
+ {12, 12, 22, 30}}, // Shift each lane + blend + split.
+
+ {ISD::FNEG,
+ MVT::v4f64,
+ {2, 2, 1, 2}}, // BTVER2 from http://www.agner.org/
+ {ISD::FNEG,
+ MVT::v8f32,
+ {2, 2, 1, 2}}, // BTVER2 from http://www.agner.org/
+
+ {ISD::FADD, MVT::f64, {1, 5, 1, 1}}, // BDVER2 from http://www.agner.org/
+ {ISD::FADD, MVT::f32, {1, 5, 1, 1}}, // BDVER2 from http://www.agner.org/
+ {ISD::FADD,
+ MVT::v2f64,
+ {1, 5, 1, 1}}, // BDVER2 from http://www.agner.org/
+ {ISD::FADD,
+ MVT::v4f32,
+ {1, 5, 1, 1}}, // BDVER2 from http://www.agner.org/
+ {ISD::FADD,
+ MVT::v4f64,
+ {2, 5, 1, 2}}, // BDVER2 from http://www.agner.org/
+ {ISD::FADD,
+ MVT::v8f32,
+ {2, 5, 1, 2}}, // BDVER2 from http://www.agner.org/
+
+ {ISD::FSUB, MVT::f64, {1, 5, 1, 1}}, // BDVER2 from http://www.agner.org/
+ {ISD::FSUB, MVT::f32, {1, 5, 1, 1}}, // BDVER2 from http://www.agner.org/
+ {ISD::FSUB,
+ MVT::v2f64,
+ {1, 5, 1, 1}}, // BDVER2 from http://www.agner.org/
+ {ISD::FSUB,
+ MVT::v4f32,
+ {1, 5, 1, 1}}, // BDVER2 from http://www.agner.org/
+ {ISD::FSUB,
+ MVT::v4f64,
+ {2, 5, 1, 2}}, // BDVER2 from http://www.agner.org/
+ {ISD::FSUB,
+ MVT::v8f32,
+ {2, 5, 1, 2}}, // BDVER2 from http://www.agner.org/
+
+ {ISD::FMUL, MVT::f64, {2, 5, 1, 1}}, // BTVER2 from http://www.agner.org/
+ {ISD::FMUL, MVT::f32, {1, 5, 1, 1}}, // BTVER2 from http://www.agner.org/
+ {ISD::FMUL,
+ MVT::v2f64,
+ {2, 5, 1, 1}}, // BTVER2 from http://www.agner.org/
+ {ISD::FMUL,
+ MVT::v4f32,
+ {1, 5, 1, 1}}, // BTVER2 from http://www.agner.org/
+ {ISD::FMUL,
+ MVT::v4f64,
+ {4, 5, 1, 2}}, // BTVER2 from http://www.agner.org/
+ {ISD::FMUL,
+ MVT::v8f32,
+ {2, 5, 1, 2}}, // BTVER2 from http://www.agner.org/
+
+ {ISD::FDIV, MVT::f32, {14, 14, 1, 1}}, // SNB from http://www.agner.org/
+ {ISD::FDIV, MVT::v4f32, {14, 14, 1, 1}}, // SNB from http://www.agner.org/
+ {ISD::FDIV, MVT::v8f32, {28, 29, 1, 3}}, // SNB from http://www.agner.org/
+ {ISD::FDIV, MVT::f64, {22, 22, 1, 1}}, // SNB from http://www.agner.org/
+ {ISD::FDIV, MVT::v2f64, {22, 22, 1, 1}}, // SNB from http://www.agner.org/
+ {ISD::FDIV, MVT::v4f64, {44, 45, 1, 3}}, // SNB from http://www.agner.org/
};
if (ST->hasAVX())
``````````
</details>
https://github.com/llvm/llvm-project/pull/157475
More information about the llvm-commits
mailing list