[llvm] 0a43ca7 - [AMDGPU] Fix missing `IsExact` flag when expanding vector binary operator (#86712)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 27 14:41:01 PDT 2024
Author: Shilei Tian
Date: 2024-03-27T17:40:58-04:00
New Revision: 0a43ca731b1faedd885f86153ecc570dde602ca3
URL: https://github.com/llvm/llvm-project/commit/0a43ca731b1faedd885f86153ecc570dde602ca3
DIFF: https://github.com/llvm/llvm-project/commit/0a43ca731b1faedd885f86153ecc570dde602ca3.diff
LOG: [AMDGPU] Fix missing `IsExact` flag when expanding vector binary operator (#86712)
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
index bddf3d958a1ae6..6e7d34f5adaa3f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
@@ -1594,6 +1594,9 @@ bool AMDGPUCodeGenPrepareImpl::visitBinaryOperator(BinaryOperator &I) {
}
}
+ if (auto *NewEltI = dyn_cast<Instruction>(NewElt))
+ NewEltI->copyIRFlags(&I);
+
NewDiv = Builder.CreateInsertElement(NewDiv, NewElt, N);
}
} else {
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll
index d9001656f308e1..2ad28b8dd6ecf5 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll
@@ -10668,3 +10668,111 @@ define amdgpu_kernel void @srem_v2i64_pow2_shl_denom(ptr addrspace(1) %out, <2 x
store <2 x i64> %r, ptr addrspace(1) %out
ret void
}
+
+define <2 x i32> @v_sdiv_i32_exact(<2 x i32> %num) {
+; CHECK-LABEL: @v_sdiv_i32_exact(
+; CHECK: %1 = extractelement <2 x i32> %num, i64 0
+; CHECK-NEXT: %2 = sdiv exact i32 %1, 4096
+; CHECK-NEXT: %3 = insertelement <2 x i32> poison, i32 %2, i64 0
+; CHECK-NEXT: %4 = extractelement <2 x i32> %num, i64 1
+; CHECK-NEXT: %5 = sdiv exact i32 %4, 1024
+; CHECK-NEXT: %6 = insertelement <2 x i32> %3, i32 %5, i64 1
+; CHECK-NEXT: ret <2 x i32> %6
+;
+; GFX6-LABEL: v_sdiv_i32_exact:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-NEXT: v_ashrrev_i32_e32 v0, 12, v0
+; GFX6-NEXT: v_ashrrev_i32_e32 v1, 10, v1
+; GFX6-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_sdiv_i32_exact:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_ashrrev_i32_e32 v0, 12, v0
+; GFX9-NEXT: v_ashrrev_i32_e32 v1, 10, v1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %result = sdiv exact <2 x i32> %num, <i32 4096, i32 1024>
+ ret <2 x i32> %result
+}
+
+define <2 x i64> @v_sdiv_i64_exact(<2 x i64> %num) {
+; CHECK-LABEL: @v_sdiv_i64_exact(
+; CHECK: %1 = extractelement <2 x i64> %num, i64 0
+; CHECK-NEXT: %2 = sdiv exact i64 %1, 4096
+; CHECK-NEXT: %3 = insertelement <2 x i64> poison, i64 %2, i64 0
+; CHECK-NEXT: %4 = extractelement <2 x i64> %num, i64 1
+; CHECK-NEXT: %5 = sdiv exact i64 %4, 1024
+; CHECK-NEXT: %6 = insertelement <2 x i64> %3, i64 %5, i64 1
+; CHECK-NEXT: ret <2 x i64> %6
+;
+; GFX6-LABEL: v_sdiv_i64_exact:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-NEXT: v_ashr_i64 v[0:1], v[0:1], 12
+; GFX6-NEXT: v_ashr_i64 v[2:3], v[2:3], 10
+; GFX6-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_sdiv_i64_exact:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_ashrrev_i64 v[0:1], 12, v[0:1]
+; GFX9-NEXT: v_ashrrev_i64 v[2:3], 10, v[2:3]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %result = sdiv exact <2 x i64> %num, <i64 4096, i64 1024>
+ ret <2 x i64> %result
+}
+
+define <2 x i32> @v_udiv_i32_exact(<2 x i32> %num) {
+; CHECK-LABEL: @v_udiv_i32_exact(
+; CHECK: %1 = extractelement <2 x i32> %num, i64 0
+; CHECK-NEXT: %2 = udiv exact i32 %1, 4096
+; CHECK-NEXT: %3 = insertelement <2 x i32> poison, i32 %2, i64 0
+; CHECK-NEXT: %4 = extractelement <2 x i32> %num, i64 1
+; CHECK-NEXT: %5 = udiv exact i32 %4, 1024
+; CHECK-NEXT: %6 = insertelement <2 x i32> %3, i32 %5, i64 1
+; CHECK-NEXT: ret <2 x i32> %6
+;
+; GFX6-LABEL: v_udiv_i32_exact:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-NEXT: v_lshrrev_b32_e32 v0, 12, v0
+; GFX6-NEXT: v_lshrrev_b32_e32 v1, 10, v1
+; GFX6-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_udiv_i32_exact:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_lshrrev_b32_e32 v0, 12, v0
+; GFX9-NEXT: v_lshrrev_b32_e32 v1, 10, v1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %result = udiv exact <2 x i32> %num, <i32 4096, i32 1024>
+ ret <2 x i32> %result
+}
+
+define <2 x i64> @v_udiv_i64_exact(<2 x i64> %num) {
+; CHECK-LABEL: @v_udiv_i64_exact(
+; CHECK: %1 = extractelement <2 x i64> %num, i64 0
+; CHECK-NEXT: %2 = udiv exact i64 %1, 4096
+; CHECK-NEXT: %3 = insertelement <2 x i64> poison, i64 %2, i64 0
+; CHECK-NEXT: %4 = extractelement <2 x i64> %num, i64 1
+; CHECK-NEXT: %5 = udiv exact i64 %4, 1024
+; CHECK-NEXT: %6 = insertelement <2 x i64> %3, i64 %5, i64 1
+; CHECK-NEXT: ret <2 x i64> %6
+;
+; GFX6-LABEL: v_udiv_i64_exact:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-NEXT: v_lshr_b64 v[0:1], v[0:1], 12
+; GFX6-NEXT: v_lshr_b64 v[2:3], v[2:3], 10
+; GFX6-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_udiv_i64_exact:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_lshrrev_b64 v[0:1], 12, v[0:1]
+; GFX9-NEXT: v_lshrrev_b64 v[2:3], 10, v[2:3]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %result = udiv exact <2 x i64> %num, <i64 4096, i64 1024>
+ ret <2 x i64> %result
+}
More information about the llvm-commits
mailing list