[llvm] 8bf624a - [RISCV] Key VectorIntrinsicCostTable by SEW [nfc-ish]

Philip Reames via llvm-commits llvm-commits at lists.llvm.org
Thu Jan 18 17:11:03 PST 2024


Author: Philip Reames
Date: 2024-01-18T17:10:56-08:00
New Revision: 8bf624af4776cdaad82c0fc4da9e9b0f14e9ea10

URL: https://github.com/llvm/llvm-project/commit/8bf624af4776cdaad82c0fc4da9e9b0f14e9ea10
DIFF: https://github.com/llvm/llvm-project/commit/8bf624af4776cdaad82c0fc4da9e9b0f14e9ea10.diff

LOG: [RISCV] Key VectorIntrinsicCostTable by SEW [nfc-ish]

Previously, we'd keyed the table by the vector type, but we were actually assigning the same cost for all the types with a common element type.  Unless we'd missed an entry, this means that effectively we were performing an SEW lookup.

Restructure the table to make this SEW dependence more explicit, and in the process greatly reduce the size of the table.

Added: 
    

Modified: 
    llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 056b947c7a9f90..4ea3a519308995 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -656,494 +656,61 @@ InstructionCost RISCVTTIImpl::getGatherScatterOpCost(
 // instruction counts with the following adjustments made:
 // * One vsetvli is considered free.
 static const CostTblEntry VectorIntrinsicCostTable[]{
-    {Intrinsic::floor, MVT::v2f32, 9},
-    {Intrinsic::floor, MVT::v4f32, 9},
-    {Intrinsic::floor, MVT::v8f32, 9},
-    {Intrinsic::floor, MVT::v16f32, 9},
-    {Intrinsic::floor, MVT::nxv1f32, 9},
-    {Intrinsic::floor, MVT::nxv2f32, 9},
-    {Intrinsic::floor, MVT::nxv4f32, 9},
-    {Intrinsic::floor, MVT::nxv8f32, 9},
-    {Intrinsic::floor, MVT::nxv16f32, 9},
-    {Intrinsic::floor, MVT::v2f64, 9},
-    {Intrinsic::floor, MVT::v4f64, 9},
-    {Intrinsic::floor, MVT::v8f64, 9},
-    {Intrinsic::floor, MVT::v16f64, 9},
-    {Intrinsic::floor, MVT::nxv1f64, 9},
-    {Intrinsic::floor, MVT::nxv2f64, 9},
-    {Intrinsic::floor, MVT::nxv4f64, 9},
-    {Intrinsic::floor, MVT::nxv8f64, 9},
-    {Intrinsic::ceil, MVT::v2f32, 9},
-    {Intrinsic::ceil, MVT::v4f32, 9},
-    {Intrinsic::ceil, MVT::v8f32, 9},
-    {Intrinsic::ceil, MVT::v16f32, 9},
-    {Intrinsic::ceil, MVT::nxv1f32, 9},
-    {Intrinsic::ceil, MVT::nxv2f32, 9},
-    {Intrinsic::ceil, MVT::nxv4f32, 9},
-    {Intrinsic::ceil, MVT::nxv8f32, 9},
-    {Intrinsic::ceil, MVT::nxv16f32, 9},
-    {Intrinsic::ceil, MVT::v2f64, 9},
-    {Intrinsic::ceil, MVT::v4f64, 9},
-    {Intrinsic::ceil, MVT::v8f64, 9},
-    {Intrinsic::ceil, MVT::v16f64, 9},
-    {Intrinsic::ceil, MVT::nxv1f64, 9},
-    {Intrinsic::ceil, MVT::nxv2f64, 9},
-    {Intrinsic::ceil, MVT::nxv4f64, 9},
-    {Intrinsic::ceil, MVT::nxv8f64, 9},
-    {Intrinsic::trunc, MVT::v2f32, 7},
-    {Intrinsic::trunc, MVT::v4f32, 7},
-    {Intrinsic::trunc, MVT::v8f32, 7},
-    {Intrinsic::trunc, MVT::v16f32, 7},
-    {Intrinsic::trunc, MVT::nxv1f32, 7},
-    {Intrinsic::trunc, MVT::nxv2f32, 7},
-    {Intrinsic::trunc, MVT::nxv4f32, 7},
-    {Intrinsic::trunc, MVT::nxv8f32, 7},
-    {Intrinsic::trunc, MVT::nxv16f32, 7},
-    {Intrinsic::trunc, MVT::v2f64, 7},
-    {Intrinsic::trunc, MVT::v4f64, 7},
-    {Intrinsic::trunc, MVT::v8f64, 7},
-    {Intrinsic::trunc, MVT::v16f64, 7},
-    {Intrinsic::trunc, MVT::nxv1f64, 7},
-    {Intrinsic::trunc, MVT::nxv2f64, 7},
-    {Intrinsic::trunc, MVT::nxv4f64, 7},
-    {Intrinsic::trunc, MVT::nxv8f64, 7},
-    {Intrinsic::round, MVT::v2f32, 9},
-    {Intrinsic::round, MVT::v4f32, 9},
-    {Intrinsic::round, MVT::v8f32, 9},
-    {Intrinsic::round, MVT::v16f32, 9},
-    {Intrinsic::round, MVT::nxv1f32, 9},
-    {Intrinsic::round, MVT::nxv2f32, 9},
-    {Intrinsic::round, MVT::nxv4f32, 9},
-    {Intrinsic::round, MVT::nxv8f32, 9},
-    {Intrinsic::round, MVT::nxv16f32, 9},
-    {Intrinsic::round, MVT::v2f64, 9},
-    {Intrinsic::round, MVT::v4f64, 9},
-    {Intrinsic::round, MVT::v8f64, 9},
-    {Intrinsic::round, MVT::v16f64, 9},
-    {Intrinsic::round, MVT::nxv1f64, 9},
-    {Intrinsic::round, MVT::nxv2f64, 9},
-    {Intrinsic::round, MVT::nxv4f64, 9},
-    {Intrinsic::round, MVT::nxv8f64, 9},
-    {Intrinsic::roundeven, MVT::v2f32, 9},
-    {Intrinsic::roundeven, MVT::v4f32, 9},
-    {Intrinsic::roundeven, MVT::v8f32, 9},
-    {Intrinsic::roundeven, MVT::v16f32, 9},
-    {Intrinsic::roundeven, MVT::nxv1f32, 9},
-    {Intrinsic::roundeven, MVT::nxv2f32, 9},
-    {Intrinsic::roundeven, MVT::nxv4f32, 9},
-    {Intrinsic::roundeven, MVT::nxv8f32, 9},
-    {Intrinsic::roundeven, MVT::nxv16f32, 9},
-    {Intrinsic::roundeven, MVT::v2f64, 9},
-    {Intrinsic::roundeven, MVT::v4f64, 9},
-    {Intrinsic::roundeven, MVT::v8f64, 9},
-    {Intrinsic::roundeven, MVT::v16f64, 9},
-    {Intrinsic::roundeven, MVT::nxv1f64, 9},
-    {Intrinsic::roundeven, MVT::nxv2f64, 9},
-    {Intrinsic::roundeven, MVT::nxv4f64, 9},
-    {Intrinsic::roundeven, MVT::nxv8f64, 9},
-    {Intrinsic::rint, MVT::v2f32, 7},
-    {Intrinsic::rint, MVT::v4f32, 7},
-    {Intrinsic::rint, MVT::v8f32, 7},
-    {Intrinsic::rint, MVT::v16f32, 7},
-    {Intrinsic::rint, MVT::nxv1f32, 7},
-    {Intrinsic::rint, MVT::nxv2f32, 7},
-    {Intrinsic::rint, MVT::nxv4f32, 7},
-    {Intrinsic::rint, MVT::nxv8f32, 7},
-    {Intrinsic::rint, MVT::nxv16f32, 7},
-    {Intrinsic::rint, MVT::v2f64, 7},
-    {Intrinsic::rint, MVT::v4f64, 7},
-    {Intrinsic::rint, MVT::v8f64, 7},
-    {Intrinsic::rint, MVT::v16f64, 7},
-    {Intrinsic::rint, MVT::nxv1f64, 7},
-    {Intrinsic::rint, MVT::nxv2f64, 7},
-    {Intrinsic::rint, MVT::nxv4f64, 7},
-    {Intrinsic::rint, MVT::nxv8f64, 7},
-    {Intrinsic::lrint, MVT::v2i32, 1},
-    {Intrinsic::lrint, MVT::v4i32, 1},
-    {Intrinsic::lrint, MVT::v8i32, 1},
-    {Intrinsic::lrint, MVT::v16i32, 1},
-    {Intrinsic::lrint, MVT::nxv1i32, 1},
-    {Intrinsic::lrint, MVT::nxv2i32, 1},
-    {Intrinsic::lrint, MVT::nxv4i32, 1},
-    {Intrinsic::lrint, MVT::nxv8i32, 1},
-    {Intrinsic::lrint, MVT::nxv16i32, 1},
-    {Intrinsic::lrint, MVT::v2i64, 1},
-    {Intrinsic::lrint, MVT::v4i64, 1},
-    {Intrinsic::lrint, MVT::v8i64, 1},
-    {Intrinsic::lrint, MVT::v16i64, 1},
-    {Intrinsic::lrint, MVT::nxv1i64, 1},
-    {Intrinsic::lrint, MVT::nxv2i64, 1},
-    {Intrinsic::lrint, MVT::nxv4i64, 1},
-    {Intrinsic::lrint, MVT::nxv8i64, 1},
-    {Intrinsic::llrint, MVT::v2i64, 1},
-    {Intrinsic::llrint, MVT::v4i64, 1},
-    {Intrinsic::llrint, MVT::v8i64, 1},
-    {Intrinsic::llrint, MVT::v16i64, 1},
-    {Intrinsic::llrint, MVT::nxv1i64, 1},
-    {Intrinsic::llrint, MVT::nxv2i64, 1},
-    {Intrinsic::llrint, MVT::nxv4i64, 1},
-    {Intrinsic::llrint, MVT::nxv8i64, 1},
-    {Intrinsic::nearbyint, MVT::v2f32, 9},
-    {Intrinsic::nearbyint, MVT::v4f32, 9},
-    {Intrinsic::nearbyint, MVT::v8f32, 9},
-    {Intrinsic::nearbyint, MVT::v16f32, 9},
-    {Intrinsic::nearbyint, MVT::nxv1f32, 9},
-    {Intrinsic::nearbyint, MVT::nxv2f32, 9},
-    {Intrinsic::nearbyint, MVT::nxv4f32, 9},
-    {Intrinsic::nearbyint, MVT::nxv8f32, 9},
-    {Intrinsic::nearbyint, MVT::nxv16f32, 9},
-    {Intrinsic::nearbyint, MVT::v2f64, 9},
-    {Intrinsic::nearbyint, MVT::v4f64, 9},
-    {Intrinsic::nearbyint, MVT::v8f64, 9},
-    {Intrinsic::nearbyint, MVT::v16f64, 9},
-    {Intrinsic::nearbyint, MVT::nxv1f64, 9},
-    {Intrinsic::nearbyint, MVT::nxv2f64, 9},
-    {Intrinsic::nearbyint, MVT::nxv4f64, 9},
-    {Intrinsic::nearbyint, MVT::nxv8f64, 9},
-    {Intrinsic::bswap, MVT::v2i16, 3},
-    {Intrinsic::bswap, MVT::v4i16, 3},
-    {Intrinsic::bswap, MVT::v8i16, 3},
-    {Intrinsic::bswap, MVT::v16i16, 3},
-    {Intrinsic::bswap, MVT::nxv1i16, 3},
-    {Intrinsic::bswap, MVT::nxv2i16, 3},
-    {Intrinsic::bswap, MVT::nxv4i16, 3},
-    {Intrinsic::bswap, MVT::nxv8i16, 3},
-    {Intrinsic::bswap, MVT::nxv16i16, 3},
-    {Intrinsic::bswap, MVT::v2i32, 12},
-    {Intrinsic::bswap, MVT::v4i32, 12},
-    {Intrinsic::bswap, MVT::v8i32, 12},
-    {Intrinsic::bswap, MVT::v16i32, 12},
-    {Intrinsic::bswap, MVT::nxv1i32, 12},
-    {Intrinsic::bswap, MVT::nxv2i32, 12},
-    {Intrinsic::bswap, MVT::nxv4i32, 12},
-    {Intrinsic::bswap, MVT::nxv8i32, 12},
-    {Intrinsic::bswap, MVT::nxv16i32, 12},
-    {Intrinsic::bswap, MVT::v2i64, 31},
-    {Intrinsic::bswap, MVT::v4i64, 31},
-    {Intrinsic::bswap, MVT::v8i64, 31},
-    {Intrinsic::bswap, MVT::v16i64, 31},
-    {Intrinsic::bswap, MVT::nxv1i64, 31},
-    {Intrinsic::bswap, MVT::nxv2i64, 31},
-    {Intrinsic::bswap, MVT::nxv4i64, 31},
-    {Intrinsic::bswap, MVT::nxv8i64, 31},
-    {Intrinsic::vp_bswap, MVT::v2i16, 3},
-    {Intrinsic::vp_bswap, MVT::v4i16, 3},
-    {Intrinsic::vp_bswap, MVT::v8i16, 3},
-    {Intrinsic::vp_bswap, MVT::v16i16, 3},
-    {Intrinsic::vp_bswap, MVT::nxv1i16, 3},
-    {Intrinsic::vp_bswap, MVT::nxv2i16, 3},
-    {Intrinsic::vp_bswap, MVT::nxv4i16, 3},
-    {Intrinsic::vp_bswap, MVT::nxv8i16, 3},
-    {Intrinsic::vp_bswap, MVT::nxv16i16, 3},
-    {Intrinsic::vp_bswap, MVT::v2i32, 12},
-    {Intrinsic::vp_bswap, MVT::v4i32, 12},
-    {Intrinsic::vp_bswap, MVT::v8i32, 12},
-    {Intrinsic::vp_bswap, MVT::v16i32, 12},
-    {Intrinsic::vp_bswap, MVT::nxv1i32, 12},
-    {Intrinsic::vp_bswap, MVT::nxv2i32, 12},
-    {Intrinsic::vp_bswap, MVT::nxv4i32, 12},
-    {Intrinsic::vp_bswap, MVT::nxv8i32, 12},
-    {Intrinsic::vp_bswap, MVT::nxv16i32, 12},
-    {Intrinsic::vp_bswap, MVT::v2i64, 31},
-    {Intrinsic::vp_bswap, MVT::v4i64, 31},
-    {Intrinsic::vp_bswap, MVT::v8i64, 31},
-    {Intrinsic::vp_bswap, MVT::v16i64, 31},
-    {Intrinsic::vp_bswap, MVT::nxv1i64, 31},
-    {Intrinsic::vp_bswap, MVT::nxv2i64, 31},
-    {Intrinsic::vp_bswap, MVT::nxv4i64, 31},
-    {Intrinsic::vp_bswap, MVT::nxv8i64, 31},
-    {Intrinsic::vp_fshl, MVT::v2i8, 7},
-    {Intrinsic::vp_fshl, MVT::v4i8, 7},
-    {Intrinsic::vp_fshl, MVT::v8i8, 7},
-    {Intrinsic::vp_fshl, MVT::v16i8, 7},
-    {Intrinsic::vp_fshl, MVT::nxv1i8, 7},
-    {Intrinsic::vp_fshl, MVT::nxv2i8, 7},
-    {Intrinsic::vp_fshl, MVT::nxv4i8, 7},
-    {Intrinsic::vp_fshl, MVT::nxv8i8, 7},
-    {Intrinsic::vp_fshl, MVT::nxv16i8, 7},
-    {Intrinsic::vp_fshl, MVT::nxv32i8, 7},
-    {Intrinsic::vp_fshl, MVT::nxv64i8, 7},
-    {Intrinsic::vp_fshl, MVT::v2i16, 7},
-    {Intrinsic::vp_fshl, MVT::v4i16, 7},
-    {Intrinsic::vp_fshl, MVT::v8i16, 7},
-    {Intrinsic::vp_fshl, MVT::v16i16, 7},
-    {Intrinsic::vp_fshl, MVT::nxv1i16, 7},
-    {Intrinsic::vp_fshl, MVT::nxv2i16, 7},
-    {Intrinsic::vp_fshl, MVT::nxv4i16, 7},
-    {Intrinsic::vp_fshl, MVT::nxv8i16, 7},
-    {Intrinsic::vp_fshl, MVT::nxv16i16, 7},
-    {Intrinsic::vp_fshl, MVT::nxv32i16, 7},
-    {Intrinsic::vp_fshl, MVT::v2i32, 7},
-    {Intrinsic::vp_fshl, MVT::v4i32, 7},
-    {Intrinsic::vp_fshl, MVT::v8i32, 7},
-    {Intrinsic::vp_fshl, MVT::v16i32, 7},
-    {Intrinsic::vp_fshl, MVT::nxv1i32, 7},
-    {Intrinsic::vp_fshl, MVT::nxv2i32, 7},
-    {Intrinsic::vp_fshl, MVT::nxv4i32, 7},
-    {Intrinsic::vp_fshl, MVT::nxv8i32, 7},
-    {Intrinsic::vp_fshl, MVT::nxv16i32, 7},
-    {Intrinsic::vp_fshl, MVT::v2i64, 7},
-    {Intrinsic::vp_fshl, MVT::v4i64, 7},
-    {Intrinsic::vp_fshl, MVT::v8i64, 7},
-    {Intrinsic::vp_fshl, MVT::v16i64, 7},
-    {Intrinsic::vp_fshl, MVT::nxv1i64, 7},
-    {Intrinsic::vp_fshl, MVT::nxv2i64, 7},
-    {Intrinsic::vp_fshl, MVT::nxv4i64, 7},
-    {Intrinsic::vp_fshl, MVT::nxv8i64, 7},
-    {Intrinsic::vp_fshr, MVT::v2i8, 7},
-    {Intrinsic::vp_fshr, MVT::v4i8, 7},
-    {Intrinsic::vp_fshr, MVT::v8i8, 7},
-    {Intrinsic::vp_fshr, MVT::v16i8, 7},
-    {Intrinsic::vp_fshr, MVT::nxv1i8, 7},
-    {Intrinsic::vp_fshr, MVT::nxv2i8, 7},
-    {Intrinsic::vp_fshr, MVT::nxv4i8, 7},
-    {Intrinsic::vp_fshr, MVT::nxv8i8, 7},
-    {Intrinsic::vp_fshr, MVT::nxv16i8, 7},
-    {Intrinsic::vp_fshr, MVT::nxv32i8, 7},
-    {Intrinsic::vp_fshr, MVT::nxv64i8, 7},
-    {Intrinsic::vp_fshr, MVT::v2i16, 7},
-    {Intrinsic::vp_fshr, MVT::v4i16, 7},
-    {Intrinsic::vp_fshr, MVT::v8i16, 7},
-    {Intrinsic::vp_fshr, MVT::v16i16, 7},
-    {Intrinsic::vp_fshr, MVT::nxv1i16, 7},
-    {Intrinsic::vp_fshr, MVT::nxv2i16, 7},
-    {Intrinsic::vp_fshr, MVT::nxv4i16, 7},
-    {Intrinsic::vp_fshr, MVT::nxv8i16, 7},
-    {Intrinsic::vp_fshr, MVT::nxv16i16, 7},
-    {Intrinsic::vp_fshr, MVT::nxv32i16, 7},
-    {Intrinsic::vp_fshr, MVT::v2i32, 7},
-    {Intrinsic::vp_fshr, MVT::v4i32, 7},
-    {Intrinsic::vp_fshr, MVT::v8i32, 7},
-    {Intrinsic::vp_fshr, MVT::v16i32, 7},
-    {Intrinsic::vp_fshr, MVT::nxv1i32, 7},
-    {Intrinsic::vp_fshr, MVT::nxv2i32, 7},
-    {Intrinsic::vp_fshr, MVT::nxv4i32, 7},
-    {Intrinsic::vp_fshr, MVT::nxv8i32, 7},
-    {Intrinsic::vp_fshr, MVT::nxv16i32, 7},
-    {Intrinsic::vp_fshr, MVT::v2i64, 7},
-    {Intrinsic::vp_fshr, MVT::v4i64, 7},
-    {Intrinsic::vp_fshr, MVT::v8i64, 7},
-    {Intrinsic::vp_fshr, MVT::v16i64, 7},
-    {Intrinsic::vp_fshr, MVT::nxv1i64, 7},
-    {Intrinsic::vp_fshr, MVT::nxv2i64, 7},
-    {Intrinsic::vp_fshr, MVT::nxv4i64, 7},
-    {Intrinsic::vp_fshr, MVT::nxv8i64, 7},
-    {Intrinsic::bitreverse, MVT::v2i8, 17},
-    {Intrinsic::bitreverse, MVT::v4i8, 17},
-    {Intrinsic::bitreverse, MVT::v8i8, 17},
-    {Intrinsic::bitreverse, MVT::v16i8, 17},
-    {Intrinsic::bitreverse, MVT::nxv1i8, 17},
-    {Intrinsic::bitreverse, MVT::nxv2i8, 17},
-    {Intrinsic::bitreverse, MVT::nxv4i8, 17},
-    {Intrinsic::bitreverse, MVT::nxv8i8, 17},
-    {Intrinsic::bitreverse, MVT::nxv16i8, 17},
-    {Intrinsic::bitreverse, MVT::v2i16, 24},
-    {Intrinsic::bitreverse, MVT::v4i16, 24},
-    {Intrinsic::bitreverse, MVT::v8i16, 24},
-    {Intrinsic::bitreverse, MVT::v16i16, 24},
-    {Intrinsic::bitreverse, MVT::nxv1i16, 24},
-    {Intrinsic::bitreverse, MVT::nxv2i16, 24},
-    {Intrinsic::bitreverse, MVT::nxv4i16, 24},
-    {Intrinsic::bitreverse, MVT::nxv8i16, 24},
-    {Intrinsic::bitreverse, MVT::nxv16i16, 24},
-    {Intrinsic::bitreverse, MVT::v2i32, 33},
-    {Intrinsic::bitreverse, MVT::v4i32, 33},
-    {Intrinsic::bitreverse, MVT::v8i32, 33},
-    {Intrinsic::bitreverse, MVT::v16i32, 33},
-    {Intrinsic::bitreverse, MVT::nxv1i32, 33},
-    {Intrinsic::bitreverse, MVT::nxv2i32, 33},
-    {Intrinsic::bitreverse, MVT::nxv4i32, 33},
-    {Intrinsic::bitreverse, MVT::nxv8i32, 33},
-    {Intrinsic::bitreverse, MVT::nxv16i32, 33},
-    {Intrinsic::bitreverse, MVT::v2i64, 52},
-    {Intrinsic::bitreverse, MVT::v4i64, 52},
-    {Intrinsic::bitreverse, MVT::v8i64, 52},
-    {Intrinsic::bitreverse, MVT::v16i64, 52},
-    {Intrinsic::bitreverse, MVT::nxv1i64, 52},
-    {Intrinsic::bitreverse, MVT::nxv2i64, 52},
-    {Intrinsic::bitreverse, MVT::nxv4i64, 52},
-    {Intrinsic::bitreverse, MVT::nxv8i64, 52},
-    {Intrinsic::vp_bitreverse, MVT::v2i8, 17},
-    {Intrinsic::vp_bitreverse, MVT::v4i8, 17},
-    {Intrinsic::vp_bitreverse, MVT::v8i8, 17},
-    {Intrinsic::vp_bitreverse, MVT::v16i8, 17},
-    {Intrinsic::vp_bitreverse, MVT::nxv1i8, 17},
-    {Intrinsic::vp_bitreverse, MVT::nxv2i8, 17},
-    {Intrinsic::vp_bitreverse, MVT::nxv4i8, 17},
-    {Intrinsic::vp_bitreverse, MVT::nxv8i8, 17},
-    {Intrinsic::vp_bitreverse, MVT::nxv16i8, 17},
-    {Intrinsic::vp_bitreverse, MVT::v2i16, 24},
-    {Intrinsic::vp_bitreverse, MVT::v4i16, 24},
-    {Intrinsic::vp_bitreverse, MVT::v8i16, 24},
-    {Intrinsic::vp_bitreverse, MVT::v16i16, 24},
-    {Intrinsic::vp_bitreverse, MVT::nxv1i16, 24},
-    {Intrinsic::vp_bitreverse, MVT::nxv2i16, 24},
-    {Intrinsic::vp_bitreverse, MVT::nxv4i16, 24},
-    {Intrinsic::vp_bitreverse, MVT::nxv8i16, 24},
-    {Intrinsic::vp_bitreverse, MVT::nxv16i16, 24},
-    {Intrinsic::vp_bitreverse, MVT::v2i32, 33},
-    {Intrinsic::vp_bitreverse, MVT::v4i32, 33},
-    {Intrinsic::vp_bitreverse, MVT::v8i32, 33},
-    {Intrinsic::vp_bitreverse, MVT::v16i32, 33},
-    {Intrinsic::vp_bitreverse, MVT::nxv1i32, 33},
-    {Intrinsic::vp_bitreverse, MVT::nxv2i32, 33},
-    {Intrinsic::vp_bitreverse, MVT::nxv4i32, 33},
-    {Intrinsic::vp_bitreverse, MVT::nxv8i32, 33},
-    {Intrinsic::vp_bitreverse, MVT::nxv16i32, 33},
-    {Intrinsic::vp_bitreverse, MVT::v2i64, 52},
-    {Intrinsic::vp_bitreverse, MVT::v4i64, 52},
-    {Intrinsic::vp_bitreverse, MVT::v8i64, 52},
-    {Intrinsic::vp_bitreverse, MVT::v16i64, 52},
-    {Intrinsic::vp_bitreverse, MVT::nxv1i64, 52},
-    {Intrinsic::vp_bitreverse, MVT::nxv2i64, 52},
-    {Intrinsic::vp_bitreverse, MVT::nxv4i64, 52},
-    {Intrinsic::vp_bitreverse, MVT::nxv8i64, 52},
-    {Intrinsic::ctpop, MVT::v2i8, 12},
-    {Intrinsic::ctpop, MVT::v4i8, 12},
-    {Intrinsic::ctpop, MVT::v8i8, 12},
-    {Intrinsic::ctpop, MVT::v16i8, 12},
-    {Intrinsic::ctpop, MVT::nxv1i8, 12},
-    {Intrinsic::ctpop, MVT::nxv2i8, 12},
-    {Intrinsic::ctpop, MVT::nxv4i8, 12},
-    {Intrinsic::ctpop, MVT::nxv8i8, 12},
-    {Intrinsic::ctpop, MVT::nxv16i8, 12},
-    {Intrinsic::ctpop, MVT::v2i16, 19},
-    {Intrinsic::ctpop, MVT::v4i16, 19},
-    {Intrinsic::ctpop, MVT::v8i16, 19},
-    {Intrinsic::ctpop, MVT::v16i16, 19},
-    {Intrinsic::ctpop, MVT::nxv1i16, 19},
-    {Intrinsic::ctpop, MVT::nxv2i16, 19},
-    {Intrinsic::ctpop, MVT::nxv4i16, 19},
-    {Intrinsic::ctpop, MVT::nxv8i16, 19},
-    {Intrinsic::ctpop, MVT::nxv16i16, 19},
-    {Intrinsic::ctpop, MVT::v2i32, 20},
-    {Intrinsic::ctpop, MVT::v4i32, 20},
-    {Intrinsic::ctpop, MVT::v8i32, 20},
-    {Intrinsic::ctpop, MVT::v16i32, 20},
-    {Intrinsic::ctpop, MVT::nxv1i32, 20},
-    {Intrinsic::ctpop, MVT::nxv2i32, 20},
-    {Intrinsic::ctpop, MVT::nxv4i32, 20},
-    {Intrinsic::ctpop, MVT::nxv8i32, 20},
-    {Intrinsic::ctpop, MVT::nxv16i32, 20},
-    {Intrinsic::ctpop, MVT::v2i64, 21},
-    {Intrinsic::ctpop, MVT::v4i64, 21},
-    {Intrinsic::ctpop, MVT::v8i64, 21},
-    {Intrinsic::ctpop, MVT::v16i64, 21},
-    {Intrinsic::ctpop, MVT::nxv1i64, 21},
-    {Intrinsic::ctpop, MVT::nxv2i64, 21},
-    {Intrinsic::ctpop, MVT::nxv4i64, 21},
-    {Intrinsic::ctpop, MVT::nxv8i64, 21},
-    {Intrinsic::vp_ctpop, MVT::v2i8, 12},
-    {Intrinsic::vp_ctpop, MVT::v4i8, 12},
-    {Intrinsic::vp_ctpop, MVT::v8i8, 12},
-    {Intrinsic::vp_ctpop, MVT::v16i8, 12},
-    {Intrinsic::vp_ctpop, MVT::nxv1i8, 12},
-    {Intrinsic::vp_ctpop, MVT::nxv2i8, 12},
-    {Intrinsic::vp_ctpop, MVT::nxv4i8, 12},
-    {Intrinsic::vp_ctpop, MVT::nxv8i8, 12},
-    {Intrinsic::vp_ctpop, MVT::nxv16i8, 12},
-    {Intrinsic::vp_ctpop, MVT::v2i16, 19},
-    {Intrinsic::vp_ctpop, MVT::v4i16, 19},
-    {Intrinsic::vp_ctpop, MVT::v8i16, 19},
-    {Intrinsic::vp_ctpop, MVT::v16i16, 19},
-    {Intrinsic::vp_ctpop, MVT::nxv1i16, 19},
-    {Intrinsic::vp_ctpop, MVT::nxv2i16, 19},
-    {Intrinsic::vp_ctpop, MVT::nxv4i16, 19},
-    {Intrinsic::vp_ctpop, MVT::nxv8i16, 19},
-    {Intrinsic::vp_ctpop, MVT::nxv16i16, 19},
-    {Intrinsic::vp_ctpop, MVT::v2i32, 20},
-    {Intrinsic::vp_ctpop, MVT::v4i32, 20},
-    {Intrinsic::vp_ctpop, MVT::v8i32, 20},
-    {Intrinsic::vp_ctpop, MVT::v16i32, 20},
-    {Intrinsic::vp_ctpop, MVT::nxv1i32, 20},
-    {Intrinsic::vp_ctpop, MVT::nxv2i32, 20},
-    {Intrinsic::vp_ctpop, MVT::nxv4i32, 20},
-    {Intrinsic::vp_ctpop, MVT::nxv8i32, 20},
-    {Intrinsic::vp_ctpop, MVT::nxv16i32, 20},
-    {Intrinsic::vp_ctpop, MVT::v2i64, 21},
-    {Intrinsic::vp_ctpop, MVT::v4i64, 21},
-    {Intrinsic::vp_ctpop, MVT::v8i64, 21},
-    {Intrinsic::vp_ctpop, MVT::v16i64, 21},
-    {Intrinsic::vp_ctpop, MVT::nxv1i64, 21},
-    {Intrinsic::vp_ctpop, MVT::nxv2i64, 21},
-    {Intrinsic::vp_ctpop, MVT::nxv4i64, 21},
-    {Intrinsic::vp_ctpop, MVT::nxv8i64, 21},
-    {Intrinsic::vp_ctlz, MVT::v2i8, 19},
-    {Intrinsic::vp_ctlz, MVT::v4i8, 19},
-    {Intrinsic::vp_ctlz, MVT::v8i8, 19},
-    {Intrinsic::vp_ctlz, MVT::v16i8, 19},
-    {Intrinsic::vp_ctlz, MVT::nxv1i8, 19},
-    {Intrinsic::vp_ctlz, MVT::nxv2i8, 19},
-    {Intrinsic::vp_ctlz, MVT::nxv4i8, 19},
-    {Intrinsic::vp_ctlz, MVT::nxv8i8, 19},
-    {Intrinsic::vp_ctlz, MVT::nxv16i8, 19},
-    {Intrinsic::vp_ctlz, MVT::nxv32i8, 19},
-    {Intrinsic::vp_ctlz, MVT::nxv64i8, 19},
-    {Intrinsic::vp_ctlz, MVT::v2i16, 28},
-    {Intrinsic::vp_ctlz, MVT::v4i16, 28},
-    {Intrinsic::vp_ctlz, MVT::v8i16, 28},
-    {Intrinsic::vp_ctlz, MVT::v16i16, 28},
-    {Intrinsic::vp_ctlz, MVT::nxv1i16, 28},
-    {Intrinsic::vp_ctlz, MVT::nxv2i16, 28},
-    {Intrinsic::vp_ctlz, MVT::nxv4i16, 28},
-    {Intrinsic::vp_ctlz, MVT::nxv8i16, 28},
-    {Intrinsic::vp_ctlz, MVT::nxv16i16, 28},
-    {Intrinsic::vp_ctlz, MVT::nxv32i16, 28},
-    {Intrinsic::vp_ctlz, MVT::v2i32, 31},
-    {Intrinsic::vp_ctlz, MVT::v4i32, 31},
-    {Intrinsic::vp_ctlz, MVT::v8i32, 31},
-    {Intrinsic::vp_ctlz, MVT::v16i32, 31},
-    {Intrinsic::vp_ctlz, MVT::nxv1i32, 31},
-    {Intrinsic::vp_ctlz, MVT::nxv2i32, 31},
-    {Intrinsic::vp_ctlz, MVT::nxv4i32, 31},
-    {Intrinsic::vp_ctlz, MVT::nxv8i32, 31},
-    {Intrinsic::vp_ctlz, MVT::nxv16i32, 31},
-    {Intrinsic::vp_ctlz, MVT::v2i64, 35},
-    {Intrinsic::vp_ctlz, MVT::v4i64, 35},
-    {Intrinsic::vp_ctlz, MVT::v8i64, 35},
-    {Intrinsic::vp_ctlz, MVT::v16i64, 35},
-    {Intrinsic::vp_ctlz, MVT::nxv1i64, 35},
-    {Intrinsic::vp_ctlz, MVT::nxv2i64, 35},
-    {Intrinsic::vp_ctlz, MVT::nxv4i64, 35},
-    {Intrinsic::vp_ctlz, MVT::nxv8i64, 35},
-    {Intrinsic::vp_cttz, MVT::v2i8, 16},
-    {Intrinsic::vp_cttz, MVT::v4i8, 16},
-    {Intrinsic::vp_cttz, MVT::v8i8, 16},
-    {Intrinsic::vp_cttz, MVT::v16i8, 16},
-    {Intrinsic::vp_cttz, MVT::nxv1i8, 16},
-    {Intrinsic::vp_cttz, MVT::nxv2i8, 16},
-    {Intrinsic::vp_cttz, MVT::nxv4i8, 16},
-    {Intrinsic::vp_cttz, MVT::nxv8i8, 16},
-    {Intrinsic::vp_cttz, MVT::nxv16i8, 16},
-    {Intrinsic::vp_cttz, MVT::nxv32i8, 16},
-    {Intrinsic::vp_cttz, MVT::nxv64i8, 16},
-    {Intrinsic::vp_cttz, MVT::v2i16, 23},
-    {Intrinsic::vp_cttz, MVT::v4i16, 23},
-    {Intrinsic::vp_cttz, MVT::v8i16, 23},
-    {Intrinsic::vp_cttz, MVT::v16i16, 23},
-    {Intrinsic::vp_cttz, MVT::nxv1i16, 23},
-    {Intrinsic::vp_cttz, MVT::nxv2i16, 23},
-    {Intrinsic::vp_cttz, MVT::nxv4i16, 23},
-    {Intrinsic::vp_cttz, MVT::nxv8i16, 23},
-    {Intrinsic::vp_cttz, MVT::nxv16i16, 23},
-    {Intrinsic::vp_cttz, MVT::nxv32i16, 23},
-    {Intrinsic::vp_cttz, MVT::v2i32, 24},
-    {Intrinsic::vp_cttz, MVT::v4i32, 24},
-    {Intrinsic::vp_cttz, MVT::v8i32, 24},
-    {Intrinsic::vp_cttz, MVT::v16i32, 24},
-    {Intrinsic::vp_cttz, MVT::nxv1i32, 24},
-    {Intrinsic::vp_cttz, MVT::nxv2i32, 24},
-    {Intrinsic::vp_cttz, MVT::nxv4i32, 24},
-    {Intrinsic::vp_cttz, MVT::nxv8i32, 24},
-    {Intrinsic::vp_cttz, MVT::nxv16i32, 24},
-    {Intrinsic::vp_cttz, MVT::v2i64, 25},
-    {Intrinsic::vp_cttz, MVT::v4i64, 25},
-    {Intrinsic::vp_cttz, MVT::v8i64, 25},
-    {Intrinsic::vp_cttz, MVT::v16i64, 25},
-    {Intrinsic::vp_cttz, MVT::nxv1i64, 25},
-    {Intrinsic::vp_cttz, MVT::nxv2i64, 25},
-    {Intrinsic::vp_cttz, MVT::nxv4i64, 25},
-    {Intrinsic::vp_cttz, MVT::nxv8i64, 25},
+    {Intrinsic::floor, MVT::f32, 9},
+    {Intrinsic::floor, MVT::f64, 9},
+    {Intrinsic::ceil, MVT::f32, 9},
+    {Intrinsic::ceil, MVT::f64, 9},
+    {Intrinsic::trunc, MVT::f32, 7},
+    {Intrinsic::trunc, MVT::f64, 7},
+    {Intrinsic::round, MVT::f32, 9},
+    {Intrinsic::round, MVT::f64, 9},
+    {Intrinsic::roundeven, MVT::f32, 9},
+    {Intrinsic::roundeven, MVT::f64, 9},
+    {Intrinsic::rint, MVT::f32, 7},
+    {Intrinsic::rint, MVT::f64, 7},
+    {Intrinsic::lrint, MVT::i32, 1},
+    {Intrinsic::lrint, MVT::i64, 1},
+    {Intrinsic::llrint, MVT::i64, 1},
+    {Intrinsic::nearbyint, MVT::f32, 9},
+    {Intrinsic::nearbyint, MVT::f64, 9},
+    {Intrinsic::bswap, MVT::i16, 3},
+    {Intrinsic::bswap, MVT::i32, 12},
+    {Intrinsic::bswap, MVT::i64, 31},
+    {Intrinsic::vp_bswap, MVT::i16, 3},
+    {Intrinsic::vp_bswap, MVT::i32, 12},
+    {Intrinsic::vp_bswap, MVT::i64, 31},
+    {Intrinsic::vp_fshl, MVT::i8, 7},
+    {Intrinsic::vp_fshl, MVT::i16, 7},
+    {Intrinsic::vp_fshl, MVT::i32, 7},
+    {Intrinsic::vp_fshl, MVT::i64, 7},
+    {Intrinsic::vp_fshr, MVT::i8, 7},
+    {Intrinsic::vp_fshr, MVT::i16, 7},
+    {Intrinsic::vp_fshr, MVT::i32, 7},
+    {Intrinsic::vp_fshr, MVT::i64, 7},
+    {Intrinsic::bitreverse, MVT::i8, 17},
+    {Intrinsic::bitreverse, MVT::i16, 24},
+    {Intrinsic::bitreverse, MVT::i32, 33},
+    {Intrinsic::bitreverse, MVT::i64, 52},
+    {Intrinsic::vp_bitreverse, MVT::i8, 17},
+    {Intrinsic::vp_bitreverse, MVT::i16, 24},
+    {Intrinsic::vp_bitreverse, MVT::i32, 33},
+    {Intrinsic::vp_bitreverse, MVT::i64, 52},
+    {Intrinsic::ctpop, MVT::i8, 12},
+    {Intrinsic::ctpop, MVT::i16, 19},
+    {Intrinsic::ctpop, MVT::i32, 20},
+    {Intrinsic::ctpop, MVT::i64, 21},
+    {Intrinsic::vp_ctpop, MVT::i8, 12},
+    {Intrinsic::vp_ctpop, MVT::i16, 19},
+    {Intrinsic::vp_ctpop, MVT::i32, 20},
+    {Intrinsic::vp_ctpop, MVT::i64, 21},
+    {Intrinsic::vp_ctlz, MVT::i8, 19},
+    {Intrinsic::vp_ctlz, MVT::i16, 28},
+    {Intrinsic::vp_ctlz, MVT::i32, 31},
+    {Intrinsic::vp_ctlz, MVT::i64, 35},
+    {Intrinsic::vp_cttz, MVT::i8, 16},
+    {Intrinsic::vp_cttz, MVT::i16, 23},
+    {Intrinsic::vp_cttz, MVT::i32, 24},
+    {Intrinsic::vp_cttz, MVT::i64, 25},
 };
 
 static unsigned getISDForVPIntrinsicID(Intrinsic::ID ID) {
@@ -1251,10 +818,13 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
   }
 
   if (ST->hasVInstructions() && RetTy->isVectorTy()) {
-    auto LT = getTypeLegalizationCost(RetTy);
-    if (const auto *Entry = CostTableLookup(VectorIntrinsicCostTable,
-                                            ICA.getID(), LT.second))
-      return LT.first * Entry->Cost;
+    if (auto LT = getTypeLegalizationCost(RetTy);
+        LT.second.isVector()) {
+      MVT EltTy = LT.second.getVectorElementType();
+      if (const auto *Entry = CostTableLookup(VectorIntrinsicCostTable,
+                                              ICA.getID(), EltTy))
+        return LT.first * Entry->Cost;
+    }
   }
 
   return BaseT::getIntrinsicInstrCost(ICA, CostKind);


        


More information about the llvm-commits mailing list