[llvm] e52e1da - [SDAG] freeze operand when expanging urem
Sanjay Patel via llvm-commits
llvm-commits at lists.llvm.org
Fri May 13 07:59:51 PDT 2022
Author: Sanjay Patel
Date: 2022-05-13T10:55:14-04:00
New Revision: e52e1dab2a25d77a135b891378ad98ae339781ed
URL: https://github.com/llvm/llvm-project/commit/e52e1dab2a25d77a135b891378ad98ae339781ed
DIFF: https://github.com/llvm/llvm-project/commit/e52e1dab2a25d77a135b891378ad98ae339781ed.diff
LOG: [SDAG] freeze operand when expanging urem
This is a potential miscompile as discussed in issue #55291.
The related IR transform was patched with:
d428f09b2c9d49f6a32
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/test/CodeGen/X86/combine-urem.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 5102c1b3c37a..f56012dc90cd 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -4532,10 +4532,13 @@ SDValue DAGCombiner::visitREM(SDNode *N) {
if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
return C;
- // fold (urem X, -1) -> select(X == -1, 0, x)
- if (!isSigned && N1C && N1C->isAllOnes())
- return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
- DAG.getConstant(0, DL, VT), N0);
+ // fold (urem X, -1) -> select(FX == -1, 0, FX)
+ // Freeze the numerator to avoid a miscompile with an undefined value.
+ if (!isSigned && N1C && N1C->isAllOnes()) {
+ SDValue F0 = DAG.getFreeze(N0);
+ SDValue EqualsNeg1 = DAG.getSetCC(DL, CCVT, F0, N1, ISD::SETEQ);
+ return DAG.getSelect(DL, VT, EqualsNeg1, DAG.getConstant(0, DL, VT), F0);
+ }
if (SDValue V = simplifyDivRem(N, DAG))
return V;
diff --git a/llvm/test/CodeGen/X86/combine-urem.ll b/llvm/test/CodeGen/X86/combine-urem.ll
index 5c13315a250c..def64fe194cd 100644
--- a/llvm/test/CodeGen/X86/combine-urem.ll
+++ b/llvm/test/CodeGen/X86/combine-urem.ll
@@ -63,12 +63,16 @@ define <4 x i32> @combine_vec_urem_by_negone(<4 x i32> %x) {
define <4 x i32> @combine_vec_urem_undef_by_negone(<4 x i32> %in) {
; SSE-LABEL: combine_vec_urem_undef_by_negone:
; SSE: # %bb.0:
-; SSE-NEXT: xorps %xmm0, %xmm0
+; SSE-NEXT: pcmpeqd %xmm0, %xmm0
+; SSE-NEXT: pcmpeqd %xmm0, %xmm0
+; SSE-NEXT: pandn %xmm0, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: combine_vec_urem_undef_by_negone:
; AVX: # %bb.0:
-; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; AVX-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX-NEXT: vpandn %xmm0, %xmm0, %xmm0
; AVX-NEXT: retq
%x = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> undef, i32 0)
%y = urem <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
More information about the llvm-commits
mailing list