[llvm] f60f7b4 - [InstCombine][X86] Add multiply-by-one handling for MULH/PMULHU/PMULHRS intrinsics
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 3 09:00:01 PDT 2024
Author: Simon Pilgrim
Date: 2024-07-03T16:59:52+01:00
New Revision: f60f7b47a92dbf2f3d994868d3cd43b86a18a76c
URL: https://github.com/llvm/llvm-project/commit/f60f7b47a92dbf2f3d994868d3cd43b86a18a76c
DIFF: https://github.com/llvm/llvm-project/commit/f60f7b47a92dbf2f3d994868d3cd43b86a18a76c.diff
LOG: [InstCombine][X86] Add multiply-by-one handling for MULH/PMULHU/PMULHRS intrinsics
MULH/PMULHU simplifies to ASHR/ZERO as they just become a SEXT/ZEXT sign-splat instruction
PMULHRS doesn't simplify as much so I've not attempted to fold it.
Added:
Modified:
llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp
llvm/test/Transforms/InstCombine/X86/x86-pmulh.ll
llvm/test/Transforms/InstCombine/X86/x86-pmulhu.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp b/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp
index 6d4734d477b3e..163584b3750d3 100644
--- a/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp
@@ -521,6 +521,16 @@ static Value *simplifyX86pmulh(IntrinsicInst &II,
if (isa<ConstantAggregateZero>(Arg0) || isa<ConstantAggregateZero>(Arg1))
return ConstantAggregateZero::get(ResTy);
+ // Multiply by one.
+ if (!IsRounding) {
+ if (match(Arg0, PatternMatch::m_One()))
+ return IsSigned ? Builder.CreateAShr(Arg1, 15)
+ : ConstantAggregateZero::get(ResTy);
+ if (match(Arg1, PatternMatch::m_One()))
+ return IsSigned ? Builder.CreateAShr(Arg0, 15)
+ : ConstantAggregateZero::get(ResTy);
+ }
+
// Constant folding.
if (!isa<Constant>(Arg0) || !isa<Constant>(Arg1))
return nullptr;
diff --git a/llvm/test/Transforms/InstCombine/X86/x86-pmulh.ll b/llvm/test/Transforms/InstCombine/X86/x86-pmulh.ll
index 699a3c9198e8a..185ab46deed89 100644
--- a/llvm/test/Transforms/InstCombine/X86/x86-pmulh.ll
+++ b/llvm/test/Transforms/InstCombine/X86/x86-pmulh.ll
@@ -111,7 +111,7 @@ define <32 x i16> @zero_pmulh_512_commute(<32 x i16> %a0) {
define <8 x i16> @one_pmulh_128(<8 x i16> %a0) {
; CHECK-LABEL: @one_pmulh_128(
-; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> [[A0:%.*]], <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
+; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> [[A0:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
; CHECK-NEXT: ret <8 x i16> [[TMP1]]
;
%1 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %a0, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
@@ -120,7 +120,7 @@ define <8 x i16> @one_pmulh_128(<8 x i16> %a0) {
define <8 x i16> @one_pmulh_128_commute(<8 x i16> %a0) {
; CHECK-LABEL: @one_pmulh_128_commute(
-; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <8 x i16> [[A0:%.*]])
+; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> [[A0:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
; CHECK-NEXT: ret <8 x i16> [[TMP1]]
;
%1 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <8 x i16> %a0)
@@ -129,7 +129,7 @@ define <8 x i16> @one_pmulh_128_commute(<8 x i16> %a0) {
define <16 x i16> @one_pmulh_256(<16 x i16> %a0) {
; CHECK-LABEL: @one_pmulh_256(
-; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16> [[A0:%.*]], <16 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
+; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> [[A0:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
; CHECK-NEXT: ret <16 x i16> [[TMP1]]
;
%1 = call <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16> %a0, <16 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
@@ -138,7 +138,7 @@ define <16 x i16> @one_pmulh_256(<16 x i16> %a0) {
define <16 x i16> @one_pmulh_256_commute(<16 x i16> %a0) {
; CHECK-LABEL: @one_pmulh_256_commute(
-; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <16 x i16> [[A0:%.*]])
+; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> [[A0:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
; CHECK-NEXT: ret <16 x i16> [[TMP1]]
;
%1 = call <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <16 x i16> %a0)
@@ -147,7 +147,7 @@ define <16 x i16> @one_pmulh_256_commute(<16 x i16> %a0) {
define <32 x i16> @one_pmulh_512(<32 x i16> %a0) {
; CHECK-LABEL: @one_pmulh_512(
-; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i16> @llvm.x86.avx512.pmulh.w.512(<32 x i16> [[A0:%.*]], <32 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
+; CHECK-NEXT: [[TMP1:%.*]] = ashr <32 x i16> [[A0:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
; CHECK-NEXT: ret <32 x i16> [[TMP1]]
;
%1 = call <32 x i16> @llvm.x86.avx512.pmulh.w.512(<32 x i16> %a0, <32 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
@@ -156,7 +156,7 @@ define <32 x i16> @one_pmulh_512(<32 x i16> %a0) {
define <32 x i16> @one_pmulh_512_commute(<32 x i16> %a0) {
; CHECK-LABEL: @one_pmulh_512_commute(
-; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i16> @llvm.x86.avx512.pmulh.w.512(<32 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <32 x i16> [[A0:%.*]])
+; CHECK-NEXT: [[TMP1:%.*]] = ashr <32 x i16> [[A0:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
; CHECK-NEXT: ret <32 x i16> [[TMP1]]
;
%1 = call <32 x i16> @llvm.x86.avx512.pmulh.w.512(<32 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <32 x i16> %a0)
diff --git a/llvm/test/Transforms/InstCombine/X86/x86-pmulhu.ll b/llvm/test/Transforms/InstCombine/X86/x86-pmulhu.ll
index e970ae6080612..b18833f703a5f 100644
--- a/llvm/test/Transforms/InstCombine/X86/x86-pmulhu.ll
+++ b/llvm/test/Transforms/InstCombine/X86/x86-pmulhu.ll
@@ -111,8 +111,7 @@ define <32 x i16> @zero_pmulhu_512_commute(<32 x i16> %a0) {
define <8 x i16> @one_pmulhu_128(<8 x i16> %a0) {
; CHECK-LABEL: @one_pmulhu_128(
-; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> [[A0:%.*]], <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
-; CHECK-NEXT: ret <8 x i16> [[TMP1]]
+; CHECK-NEXT: ret <8 x i16> zeroinitializer
;
%1 = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %a0, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
ret <8 x i16> %1
@@ -120,8 +119,7 @@ define <8 x i16> @one_pmulhu_128(<8 x i16> %a0) {
define <8 x i16> @one_pmulhu_128_commute(<8 x i16> %a0) {
; CHECK-LABEL: @one_pmulhu_128_commute(
-; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <8 x i16> [[A0:%.*]])
-; CHECK-NEXT: ret <8 x i16> [[TMP1]]
+; CHECK-NEXT: ret <8 x i16> zeroinitializer
;
%1 = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <8 x i16> %a0)
ret <8 x i16> %1
@@ -129,8 +127,7 @@ define <8 x i16> @one_pmulhu_128_commute(<8 x i16> %a0) {
define <16 x i16> @one_pmulhu_256(<16 x i16> %a0) {
; CHECK-LABEL: @one_pmulhu_256(
-; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> [[A0:%.*]], <16 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
-; CHECK-NEXT: ret <16 x i16> [[TMP1]]
+; CHECK-NEXT: ret <16 x i16> zeroinitializer
;
%1 = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> %a0, <16 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
ret <16 x i16> %1
@@ -138,8 +135,7 @@ define <16 x i16> @one_pmulhu_256(<16 x i16> %a0) {
define <16 x i16> @one_pmulhu_256_commute(<16 x i16> %a0) {
; CHECK-LABEL: @one_pmulhu_256_commute(
-; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <16 x i16> [[A0:%.*]])
-; CHECK-NEXT: ret <16 x i16> [[TMP1]]
+; CHECK-NEXT: ret <16 x i16> zeroinitializer
;
%1 = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <16 x i16> %a0)
ret <16 x i16> %1
@@ -147,8 +143,7 @@ define <16 x i16> @one_pmulhu_256_commute(<16 x i16> %a0) {
define <32 x i16> @one_pmulhu_512(<32 x i16> %a0) {
; CHECK-LABEL: @one_pmulhu_512(
-; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i16> @llvm.x86.avx512.pmulhu.w.512(<32 x i16> [[A0:%.*]], <32 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
-; CHECK-NEXT: ret <32 x i16> [[TMP1]]
+; CHECK-NEXT: ret <32 x i16> zeroinitializer
;
%1 = call <32 x i16> @llvm.x86.avx512.pmulhu.w.512(<32 x i16> %a0, <32 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
ret <32 x i16> %1
@@ -156,8 +151,7 @@ define <32 x i16> @one_pmulhu_512(<32 x i16> %a0) {
define <32 x i16> @one_pmulhu_512_commute(<32 x i16> %a0) {
; CHECK-LABEL: @one_pmulhu_512_commute(
-; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i16> @llvm.x86.avx512.pmulhu.w.512(<32 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <32 x i16> [[A0:%.*]])
-; CHECK-NEXT: ret <32 x i16> [[TMP1]]
+; CHECK-NEXT: ret <32 x i16> zeroinitializer
;
%1 = call <32 x i16> @llvm.x86.avx512.pmulhu.w.512(<32 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <32 x i16> %a0)
ret <32 x i16> %1
More information about the llvm-commits
mailing list