[llvm] fe05a0a - [SDAG] avoid udiv/urem transform for vector/scalar type mismatches

Tue Nov 15 08:01:36 PST 2022

Author: Sanjay Patel
Date: 2022-11-15T11:01:18-05:00
New Revision: fe05a0a3ddbf7fb54af9536331d39b138763e17a

URL: https://github.com/llvm/llvm-project/commit/fe05a0a3ddbf7fb54af9536331d39b138763e17a
DIFF: https://github.com/llvm/llvm-project/commit/fe05a0a3ddbf7fb54af9536331d39b138763e17a.diff

LOG: [SDAG] avoid udiv/urem transform for vector/scalar type mismatches

This solves the crashing from issue #58994.
I don't know anything about VE, so I don't know if the output
is as expected or even correct.

Added: 
    llvm/test/CodeGen/VE/Vector/vec_divrem.ll

Modified: 
    llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 0112a401a8ea6..73be3b5f78fb6 100644

--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -4477,10 +4477,11 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
 
   // fold (udiv X, -1) -> select(X == -1, 1, 0)
   ConstantSDNode *N1C = isConstOrConstSplat(N1);
-  if (N1C && N1C->isAllOnes())
+  if (N1C && N1C->isAllOnes() && CCVT.isVector() == VT.isVector()) {
     return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
                          DAG.getConstant(1, DL, VT),
                          DAG.getConstant(0, DL, VT));
+  }
 
   if (SDValue V = simplifyDivRem(N, DAG))
     return V;
@@ -4583,7 +4584,8 @@ SDValue DAGCombiner::visitREM(SDNode *N) {
 
   // fold (urem X, -1) -> select(FX == -1, 0, FX)
   // Freeze the numerator to avoid a miscompile with an undefined value.
-  if (!isSigned && llvm::isAllOnesOrAllOnesSplat(N1, /*AllowUndefs*/ false)) {
+  if (!isSigned && llvm::isAllOnesOrAllOnesSplat(N1, /*AllowUndefs*/ false) &&
+      CCVT.isVector() == VT.isVector()) {
     SDValue F0 = DAG.getFreeze(N0);
     SDValue EqualsNeg1 = DAG.getSetCC(DL, CCVT, F0, N1, ISD::SETEQ);
     return DAG.getSelect(DL, VT, EqualsNeg1, DAG.getConstant(0, DL, VT), F0);

diff  --git a/llvm/test/CodeGen/VE/Vector/vec_divrem.ll b/llvm/test/CodeGen/VE/Vector/vec_divrem.ll
new file mode 100644
index 0000000000000..e967225fd31ed
--- /dev/null
+++ b/llvm/test/CodeGen/VE/Vector/vec_divrem.ll
@@ -0,0 +1,45 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=ve -mattr=+vpu | FileCheck %s
+
+; This would assert because VE specified that all setcc
+; nodes (even with vector operands) return a scalar value.
+
+define <4 x i8> @udiv_by_minus_one(<4 x i8> %x) {
+; CHECK-LABEL: udiv_by_minus_one:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    and %s0, %s0, (56)0
+; CHECK-NEXT:    and %s1, %s1, (56)0
+; CHECK-NEXT:    and %s2, %s2, (56)0
+; CHECK-NEXT:    and %s3, %s3, (56)0
+; CHECK-NEXT:    divu.w %s3, %s3, (56)0
+; CHECK-NEXT:    divu.w %s2, %s2, (56)0
+; CHECK-NEXT:    divu.w %s1, %s1, (56)0
+; CHECK-NEXT:    divu.w %s0, %s0, (56)0
+; CHECK-NEXT:    b.l.t (, %s10)
+  %r = udiv <4 x i8> %x, <i8 255, i8 255, i8 255, i8 255>
+  ret <4 x i8> %r
+}
+
+define <4 x i8> @urem_by_minus_one(<4 x i8> %x) {
+; CHECK-LABEL: urem_by_minus_one:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    and %s0, %s0, (56)0
+; CHECK-NEXT:    and %s1, %s1, (56)0
+; CHECK-NEXT:    and %s2, %s2, (56)0
+; CHECK-NEXT:    and %s3, %s3, (56)0
+; CHECK-NEXT:    divu.w %s4, %s3, (56)0
+; CHECK-NEXT:    muls.w.sx %s4, %s4, (56)0
+; CHECK-NEXT:    subs.w.sx %s3, %s3, %s4
+; CHECK-NEXT:    divu.w %s4, %s2, (56)0
+; CHECK-NEXT:    muls.w.sx %s4, %s4, (56)0
+; CHECK-NEXT:    subs.w.sx %s2, %s2, %s4
+; CHECK-NEXT:    divu.w %s4, %s1, (56)0
+; CHECK-NEXT:    muls.w.sx %s4, %s4, (56)0
+; CHECK-NEXT:    subs.w.sx %s1, %s1, %s4
+; CHECK-NEXT:    divu.w %s4, %s0, (56)0
+; CHECK-NEXT:    muls.w.sx %s4, %s4, (56)0
+; CHECK-NEXT:    subs.w.sx %s0, %s0, %s4
+; CHECK-NEXT:    b.l.t (, %s10)
+  %r = urem <4 x i8> %x, <i8 255, i8 255, i8 255, i8 255>
+  ret <4 x i8> %r
+}