[llvm] r359476 - [DAG] Refactor DAGCombiner::ReassociateOps

Mon Apr 29 10:50:10 PDT 2019

Author: bjope
Date: Mon Apr 29 10:50:10 2019
New Revision: 359476

URL: http://llvm.org/viewvc/llvm-project?rev=359476&view=rev
Log:
[DAG] Refactor DAGCombiner::ReassociateOps

Summary:
Extract the logic for doing reassociations
from DAGCombiner::reassociateOps into a helper
function DAGCombiner::reassociateOpsCommutative,
and use that helper to trigger reassociation
on the original operand order, or the commuted
operand order.

Codegen is not identical since the operand order will
be different when doing the reassociations for the
commuted case. That causes some unfortunate churn in
some test cases. Apart from that this should be NFC.

Reviewers: spatel, craig.topper, tstellar

Reviewed By: spatel

Subscribers: dmgreen, dschuff, jvesely, nhaehnle, javed.absar, sbc100, jgravelle-google, hiraditya, aheejin, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D61199

Modified:
    llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/trunk/test/CodeGen/AArch64/arm64-addr-type-promotion.ll
    llvm/trunk/test/CodeGen/AMDGPU/calling-conventions.ll
    llvm/trunk/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll
    llvm/trunk/test/CodeGen/AMDGPU/shl_add_constant.ll
    llvm/trunk/test/CodeGen/AMDGPU/widen-smrd-loads.ll
    llvm/trunk/test/CodeGen/ARM/and-load-combine.ll
    llvm/trunk/test/CodeGen/ARM/load-combine-big-endian.ll
    llvm/trunk/test/CodeGen/ARM/load-combine.ll
    llvm/trunk/test/CodeGen/SystemZ/buildvector-00.ll
    llvm/trunk/test/CodeGen/Thumb2/constant-hoisting.ll
    llvm/trunk/test/CodeGen/WebAssembly/address-offsets.ll
    llvm/trunk/test/CodeGen/X86/add-ext.ll
    llvm/trunk/test/CodeGen/X86/combine-multiplies.ll
    llvm/trunk/test/CodeGen/X86/load-combine.ll
    llvm/trunk/test/CodeGen/X86/lsr-loop-exit-cond.ll
    llvm/trunk/test/CodeGen/X86/merge_store.ll
    llvm/trunk/test/CodeGen/X86/sad.ll
    llvm/trunk/test/CodeGen/X86/vector-ext-logic.ll

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=359476&r1=359475&r2=359476&view=diff
==============================================================================

--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Mon Apr 29 10:50:10 2019
@@ -458,7 +458,9 @@ namespace {
     SDValue visitFMULForFMADistributiveCombine(SDNode *N);
 
     SDValue XformToShuffleWithZero(SDNode *N);
-    SDValue ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
+    SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
+                                      SDValue N1);
+    SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
                            SDValue N1, SDNodeFlags Flags);
 
     SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt);
@@ -1000,53 +1002,50 @@ static bool isAnyConstantBuildVector(SDV
          ISD::isBuildVectorOfConstantFPSDNodes(V.getNode());
 }
 
-SDValue DAGCombiner::ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
-                                    SDValue N1, SDNodeFlags Flags) {
+// Helper for DAGCombiner::reassociateOps. Try to reassociate an expression
+// such as (Opc N0, N1), if \p N0 is the same kind of operation as \p Opc.
+SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
+                                               SDValue N0, SDValue N1) {
+  EVT VT = N0.getValueType();
+
+  if (N0.getOpcode() != Opc)
+    return SDValue();
+
   // Don't reassociate reductions.
-  if (Flags.hasVectorReduction())
+  if (N0->getFlags().hasVectorReduction())
     return SDValue();
 
-  EVT VT = N0.getValueType();
-  if (N0.getOpcode() == Opc && !N0->getFlags().hasVectorReduction()) {
-    if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
-      if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
-        // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
-        if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, L, R))
-          return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
-        return SDValue();
-      }
-      if (N0.hasOneUse()) {
-        // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one
-        // use
-        SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
-        if (!OpNode.getNode())
-          return SDValue();
-        AddToWorklist(OpNode.getNode());
-        return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
-      }
+  if (SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
+    if (SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
+      // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
+      if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, C1, C2))
+        return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
+      return SDValue();
     }
-  }
-
-  if (N1.getOpcode() == Opc && !N1->getFlags().hasVectorReduction()) {
-    if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1.getOperand(1))) {
-      if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
-        // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2))
-        if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, R, L))
-          return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode);
+    if (N0.hasOneUse()) {
+      // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
+      //              iff (op x, c1) has one use
+      SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
+      if (!OpNode.getNode())
         return SDValue();
-      }
-      if (N1.hasOneUse()) {
-        // reassoc. (op x, (op y, c1)) -> (op (op x, y), c1) iff x+c1 has one
-        // use
-        SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0, N1.getOperand(0));
-        if (!OpNode.getNode())
-          return SDValue();
-        AddToWorklist(OpNode.getNode());
-        return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1));
-      }
+      AddToWorklist(OpNode.getNode());
+      return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
     }
   }
+  return SDValue();
+}
 
+// Try to reassociate commutative binops.
+SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
+                                    SDValue N1, SDNodeFlags Flags) {
+  assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative.");
+  // Don't reassociate reductions.
+  if (Flags.hasVectorReduction())
+    return SDValue();
+  if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1))
+    return Combined;
+  if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0))
+    return Combined;
   return SDValue();
 }
 
@@ -2193,7 +2192,7 @@ SDValue DAGCombiner::visitADDLike(SDNode
     return NewSel;
 
   // reassociate add
-  if (SDValue RADD = ReassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
+  if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
     return RADD;
 
   // fold ((0-A) + B) -> B-A
@@ -3275,7 +3274,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N)
                                      N0.getOperand(1), N1));
 
   // reassociate mul
-  if (SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags()))
+  if (SDValue RMUL = reassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags()))
     return RMUL;
 
   return SDValue();
@@ -4799,7 +4798,7 @@ SDValue DAGCombiner::visitAND(SDNode *N)
     return NewSel;
 
   // reassociate and
-  if (SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags()))
+  if (SDValue RAND = reassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags()))
     return RAND;
 
   // Try to convert a constant mask AND into a shuffle clear mask.
@@ -5525,7 +5524,7 @@ SDValue DAGCombiner::visitOR(SDNode *N)
     return BSwap;
 
   // reassociate or
-  if (SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1, N->getFlags()))
+  if (SDValue ROR = reassociateOps(ISD::OR, SDLoc(N), N0, N1, N->getFlags()))
     return ROR;
 
   // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
@@ -6412,7 +6411,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N)
     return NewSel;
 
   // reassociate xor
-  if (SDValue RXOR = ReassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
+  if (SDValue RXOR = reassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
     return RXOR;
 
   // fold !(x cc y) -> (x !cc y)

Modified: llvm/trunk/test/CodeGen/AArch64/arm64-addr-type-promotion.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-addr-type-promotion.ll?rev=359476&r1=359475&r2=359476&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/arm64-addr-type-promotion.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/arm64-addr-type-promotion.ll Mon Apr 29 10:50:10 2019
@@ -19,8 +19,8 @@ define zeroext i8 @fullGtU(i32 %i1, i32
 ; CHECK-NEXT: cmp [[BLOCKVAL1]], [[BLOCKVAL2]]
 ; CHECK-NEXT: b.ne
 ; Next BB
-; CHECK: add [[BLOCKBASE2:x[0-9]+]], [[BLOCKBASE]], [[I2]]
-; CHECK-NEXT: add [[BLOCKBASE1:x[0-9]+]], [[BLOCKBASE]], [[I1]]
+; CHECK: add [[BLOCKBASE1:x[0-9]+]], [[I1]], [[BLOCKBASE]]
+; CHECK-NEXT: add [[BLOCKBASE2:x[0-9]+]], [[I2]], [[BLOCKBASE]]
 ; CHECK-NEXT: ldrb [[LOADEDVAL1:w[0-9]+]], {{\[}}[[BLOCKBASE1]], #1]
 ; CHECK-NEXT: ldrb [[LOADEDVAL2:w[0-9]+]], {{\[}}[[BLOCKBASE2]], #1]
 ; CHECK-NEXT: cmp [[LOADEDVAL1]], [[LOADEDVAL2]]

Modified: llvm/trunk/test/CodeGen/AMDGPU/calling-conventions.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/calling-conventions.ll?rev=359476&r1=359475&r2=359476&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/calling-conventions.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/calling-conventions.ll Mon Apr 29 10:50:10 2019
@@ -184,14 +184,14 @@ define amdgpu_ps void @ps_mesa_v2i16(<2
 ; VI: s_and_b32 s1, s0, 0xffff0000
 ; VI: s_add_i32 s0, s0, 1
 ; VI: s_and_b32 s0, s0, 0xffff
-; VI: s_or_b32 s0, s0, s1
+; VI: s_or_b32 s0, s1, s0
 ; VI: s_add_i32 s0, s0, 0x10000
 ; VI: v_mov_b32_e32 v0, s0
 
 ; SI: s_lshl_b32 s1, s1, 16
 ; SI: s_add_i32 s0, s0, 1
 ; SI: s_and_b32 s0, s0, 0xffff
-; SI: s_or_b32 s0, s0, s1
+; SI: s_or_b32 s0, s1, s0
 ; SI: s_add_i32 s0, s0, 0x10000
 define amdgpu_ps void @ps_mesa_inreg_v2i16(<2 x i16> inreg %arg0) {
   %add = add <2 x i16> %arg0, <i16 1, i16 1>

Modified: llvm/trunk/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll?rev=359476&r1=359475&r2=359476&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll Mon Apr 29 10:50:10 2019
@@ -289,18 +289,18 @@ define amdgpu_kernel void @load_v4i8_to_
 ; SI-NEXT:    v_cvt_f32_ubyte2_e32 v2, v1
 ; SI-NEXT:    v_cvt_f32_ubyte0_e32 v0, v1
 ; SI-NEXT:    v_cvt_f32_ubyte1_e32 v1, v6
-; SI-NEXT:    v_add_i32_e32 v4, vcc, 9, v4
 ; SI-NEXT:    v_and_b32_e32 v7, s12, v7
+; SI-NEXT:    v_add_i32_e32 v4, vcc, 9, v4
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
 ; SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0
 ; SI-NEXT:    s_waitcnt expcnt(0)
-; SI-NEXT:    v_or_b32_e32 v1, v7, v6
+; SI-NEXT:    v_or_b32_e32 v0, v6, v7
 ; SI-NEXT:    v_lshlrev_b32_e32 v5, 8, v5
-; SI-NEXT:    v_and_b32_e32 v0, s12, v4
-; SI-NEXT:    v_or_b32_e32 v0, v0, v5
-; SI-NEXT:    v_add_i32_e32 v1, vcc, 0x900, v1
-; SI-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
-; SI-NEXT:    v_and_b32_e32 v1, 0xffff, v1
+; SI-NEXT:    v_and_b32_e32 v1, s12, v4
+; SI-NEXT:    v_add_i32_e32 v0, vcc, 0x900, v0
+; SI-NEXT:    v_or_b32_e32 v1, v5, v1
+; SI-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; SI-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
 ; SI-NEXT:    v_or_b32_e32 v0, v1, v0
 ; SI-NEXT:    v_add_i32_e32 v0, vcc, 0x9000000, v0
 ; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
@@ -335,8 +335,8 @@ define amdgpu_kernel void @load_v4i8_to_
 ; VI-NEXT:    v_add_u16_e32 v9, 9, v5
 ; VI-NEXT:    v_add_u16_sdwa v4, v5, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
 ; VI-NEXT:    v_lshlrev_b16_e32 v1, 8, v7
-; VI-NEXT:    v_or_b32_sdwa v0, v9, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
-; VI-NEXT:    v_or_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; VI-NEXT:    v_or_b32_sdwa v0, v8, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
+; VI-NEXT:    v_or_b32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
 ; VI-NEXT:    v_add_u16_e32 v0, s8, v0
 ; VI-NEXT:    v_add_u16_sdwa v1, v1, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; VI-NEXT:    v_or_b32_e32 v0, v0, v1

Modified: llvm/trunk/test/CodeGen/AMDGPU/shl_add_constant.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/shl_add_constant.ll?rev=359476&r1=359475&r2=359476&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/shl_add_constant.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/shl_add_constant.ll Mon Apr 29 10:50:10 2019
@@ -71,7 +71,7 @@ define amdgpu_kernel void @test_add_shl_
 ; FUNC-LABEL: {{^}}test_add_shl_add_constant_inv:
 ; SI-DAG: s_load_dwordx2 s{{\[}}[[X:[0-9]+]]:[[Y:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x13
 ; SI: s_lshl_b32 [[SHL3:s[0-9]+]], s[[X]], 3
-; SI: s_add_i32 [[TMP:s[0-9]+]], s[[Y]], [[SHL3]]
+; SI: s_add_i32 [[TMP:s[0-9]+]], [[SHL3]], s[[Y]]
 ; SI: s_addk_i32 [[TMP]], 0x3d8
 ; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[TMP]]
 ; SI: buffer_store_dword [[VRESULT]]

Modified: llvm/trunk/test/CodeGen/AMDGPU/widen-smrd-loads.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/widen-smrd-loads.ll?rev=359476&r1=359475&r2=359476&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/widen-smrd-loads.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/widen-smrd-loads.ll Mon Apr 29 10:50:10 2019
@@ -216,7 +216,7 @@ define amdgpu_kernel void @widen_v2i8_co
 ; SI-NEXT:    s_add_i32 s0, s0, 12
 ; SI-NEXT:    s_or_b32 s0, s0, 4
 ; SI-NEXT:    s_and_b32 s0, s0, 0xff
-; SI-NEXT:    s_or_b32 s0, s0, s1
+; SI-NEXT:    s_or_b32 s0, s1, s0
 ; SI-NEXT:    s_addk_i32 s0, 0x2c00
 ; SI-NEXT:    s_or_b32 s0, s0, 0x300
 ; SI-NEXT:    v_mov_b32_e32 v0, s0

Modified: llvm/trunk/test/CodeGen/ARM/and-load-combine.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/and-load-combine.ll?rev=359476&r1=359475&r2=359476&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/and-load-combine.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/and-load-combine.ll Mon Apr 29 10:50:10 2019
@@ -414,35 +414,35 @@ entry:
 define arm_aapcscc zeroext i1 @cmp_and8_short_int(i16* nocapture readonly %a, i32* nocapture readonly %b) {
 ; ARM-LABEL: cmp_and8_short_int:
 ; ARM:       @ %bb.0: @ %entry
-; ARM-NEXT:    ldrb r0, [r0]
 ; ARM-NEXT:    ldrb r1, [r1]
-; ARM-NEXT:    and r0, r1, r0
+; ARM-NEXT:    ldrb r0, [r0]
+; ARM-NEXT:    and r0, r0, r1
 ; ARM-NEXT:    clz r0, r0
 ; ARM-NEXT:    lsr r0, r0, #5
 ; ARM-NEXT:    bx lr
 ;
 ; ARMEB-LABEL: cmp_and8_short_int:
 ; ARMEB:       @ %bb.0: @ %entry
-; ARMEB-NEXT:    ldrb r0, [r0, #1]
 ; ARMEB-NEXT:    ldrb r1, [r1, #3]
-; ARMEB-NEXT:    and r0, r1, r0
+; ARMEB-NEXT:    ldrb r0, [r0, #1]
+; ARMEB-NEXT:    and r0, r0, r1
 ; ARMEB-NEXT:    clz r0, r0
 ; ARMEB-NEXT:    lsr r0, r0, #5
 ; ARMEB-NEXT:    bx lr
 ;
 ; THUMB1-LABEL: cmp_and8_short_int:
 ; THUMB1:       @ %bb.0: @ %entry
-; THUMB1-NEXT:    ldrb r0, [r0]
 ; THUMB1-NEXT:    ldrb r1, [r1]
-; THUMB1-NEXT:    ands r1, r0
-; THUMB1-NEXT:    rsbs r0, r1, #0
-; THUMB1-NEXT:    adcs r0, r1
+; THUMB1-NEXT:    ldrb r2, [r0]
+; THUMB1-NEXT:    ands r2, r1
+; THUMB1-NEXT:    rsbs r0, r2, #0
+; THUMB1-NEXT:    adcs r0, r2
 ; THUMB1-NEXT:    bx lr
 ;
 ; THUMB2-LABEL: cmp_and8_short_int:
 ; THUMB2:       @ %bb.0: @ %entry
-; THUMB2-NEXT:    ldrb r0, [r0]
 ; THUMB2-NEXT:    ldrb r1, [r1]
+; THUMB2-NEXT:    ldrb r0, [r0]
 ; THUMB2-NEXT:    ands r0, r1
 ; THUMB2-NEXT:    clz r0, r0
 ; THUMB2-NEXT:    lsrs r0, r0, #5
@@ -846,7 +846,7 @@ define arm_aapcscc i1 @test6(i8* %x, i8
 ; ARM-LABEL: test6:
 ; ARM:       @ %bb.0: @ %entry
 ; ARM-NEXT:    ldrb r0, [r0]
-; ARM-NEXT:    and r0, r0, r1
+; ARM-NEXT:    and r0, r1, r0
 ; ARM-NEXT:    uxtb r1, r2
 ; ARM-NEXT:    sub r0, r0, r1
 ; ARM-NEXT:    clz r0, r0
@@ -856,7 +856,7 @@ define arm_aapcscc i1 @test6(i8* %x, i8
 ; ARMEB-LABEL: test6:
 ; ARMEB:       @ %bb.0: @ %entry
 ; ARMEB-NEXT:    ldrb r0, [r0]
-; ARMEB-NEXT:    and r0, r0, r1
+; ARMEB-NEXT:    and r0, r1, r0
 ; ARMEB-NEXT:    uxtb r1, r2
 ; ARMEB-NEXT:    sub r0, r0, r1
 ; ARMEB-NEXT:    clz r0, r0
@@ -893,7 +893,7 @@ define arm_aapcscc i1 @test7(i16* %x, i1
 ; ARM-LABEL: test7:
 ; ARM:       @ %bb.0: @ %entry
 ; ARM-NEXT:    ldrb r0, [r0]
-; ARM-NEXT:    and r0, r0, r1
+; ARM-NEXT:    and r0, r1, r0
 ; ARM-NEXT:    uxtb r1, r2
 ; ARM-NEXT:    sub r0, r0, r1
 ; ARM-NEXT:    clz r0, r0
@@ -903,7 +903,7 @@ define arm_aapcscc i1 @test7(i16* %x, i1
 ; ARMEB-LABEL: test7:
 ; ARMEB:       @ %bb.0: @ %entry
 ; ARMEB-NEXT:    ldrb r0, [r0, #1]
-; ARMEB-NEXT:    and r0, r0, r1
+; ARMEB-NEXT:    and r0, r1, r0
 ; ARMEB-NEXT:    uxtb r1, r2
 ; ARMEB-NEXT:    sub r0, r0, r1
 ; ARMEB-NEXT:    clz r0, r0
@@ -1550,34 +1550,34 @@ define arm_aapcscc i64 @test26(i64* noca
   ret i64 %and
 }
 
+define void @test27(i32* nocapture %ptr) {
 ; ARM-LABEL: test27:
-; ARM:       @ %bb.0:
+; ARM:       @ %bb.0: @ %entry
 ; ARM-NEXT:    ldrb r1, [r0, #1]
 ; ARM-NEXT:    lsl r1, r1, #16
 ; ARM-NEXT:    str r1, [r0]
 ; ARM-NEXT:    bx lr
 ;
 ; ARMEB-LABEL: test27:
-; ARMEB:     @ %bb.0:
-; ARMEB-NEXT:  ldrb r1, [r0, #2]
-; ARMEB-NEXT:  lsl r1, r1, #16
-; ARMEB-NEXT:  str r1, [r0]
-; ARMEB-NEXT:  bx lr
+; ARMEB:       @ %bb.0: @ %entry
+; ARMEB-NEXT:    ldrb r1, [r0, #2]
+; ARMEB-NEXT:    lsl r1, r1, #16
+; ARMEB-NEXT:    str r1, [r0]
+; ARMEB-NEXT:    bx lr
 ;
 ; THUMB1-LABEL: test27:
-; THUMB1:     @ %bb.0:
-; THUMB1-NEXT:  ldrb r1, [r0, #1]
-; THUMB1-NEXT:  lsls r1, r1, #16
-; THUMB1-NEXT:  str r1, [r0]
-; THUMB1-NEXT:  bx lr
+; THUMB1:       @ %bb.0: @ %entry
+; THUMB1-NEXT:    ldrb r1, [r0, #1]
+; THUMB1-NEXT:    lsls r1, r1, #16
+; THUMB1-NEXT:    str r1, [r0]
+; THUMB1-NEXT:    bx lr
 ;
 ; THUMB2-LABEL: test27:
-; THUMB2:       @ %bb.0:
+; THUMB2:       @ %bb.0: @ %entry
 ; THUMB2-NEXT:    ldrb r1, [r0, #1]
 ; THUMB2-NEXT:    lsls r1, r1, #16
 ; THUMB2-NEXT:    str r1, [r0]
 ; THUMB2-NEXT:    bx lr
-define void @test27(i32* nocapture %ptr) {
 entry:
   %0 = load i32, i32* %ptr, align 4
   %and = and i32 %0, 65280

Modified: llvm/trunk/test/CodeGen/ARM/load-combine-big-endian.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/load-combine-big-endian.ll?rev=359476&r1=359475&r2=359476&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/load-combine-big-endian.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/load-combine-big-endian.ll Mon Apr 29 10:50:10 2019
@@ -528,7 +528,7 @@ define i32 @load_i32_by_i8_base_offset_i
 ; (i32) p[i + 1] | ((i32) p[i + 2] << 8) | ((i32) p[i + 3] << 16) | ((i32) p[i + 4] << 24)
 define i32 @load_i32_by_i8_base_offset_index_2(i8* %arg, i32 %i) {
 ; CHECK-LABEL: load_i32_by_i8_base_offset_index_2:
-; CHECK: add r0, r0, r1
+; CHECK: add r0, r1, r0
 ; CHECK-NEXT: mov r1, #65280
 ; CHECK-NEXT: mov r2, #16711680
 ; CHECK-NEXT: ldr r0, [r0, #13]
@@ -540,7 +540,7 @@ define i32 @load_i32_by_i8_base_offset_i
 ; CHECK-NEXT: mov pc, lr
 ;
 ; CHECK-ARMv6-LABEL: load_i32_by_i8_base_offset_index_2:
-; CHECK-ARMv6: add r0, r0, r1
+; CHECK-ARMv6: add r0, r1, r0
 ; CHECK-ARMv6-NEXT: ldr r0, [r0, #13]
 ; CHECK-ARMv6-NEXT: rev r0, r0
 ; CHECK-ARMv6-NEXT: bx  lr

Modified: llvm/trunk/test/CodeGen/ARM/load-combine.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/load-combine.ll?rev=359476&r1=359475&r2=359476&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/load-combine.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/load-combine.ll Mon Apr 29 10:50:10 2019
@@ -479,12 +479,12 @@ define i32 @load_i32_by_i8_base_offset_i
 ; (i32) p[i + 1] | ((i32) p[i + 2] << 8) | ((i32) p[i + 3] << 16) | ((i32) p[i + 4] << 24)
 define i32 @load_i32_by_i8_base_offset_index_2(i8* %arg, i32 %i) {
 ; CHECK-LABEL: load_i32_by_i8_base_offset_index_2:
-; CHECK: add r0, r0, r1
+; CHECK: add r0, r1, r0
 ; CHECK-NEXT: ldr r0, [r0, #13]
 ; CHECK-NEXT: mov pc, lr
 ;
 ; CHECK-ARMv6-LABEL: load_i32_by_i8_base_offset_index_2:
-; CHECK-ARMv6: add r0, r0, r1
+; CHECK-ARMv6: add r0, r1, r0
 ; CHECK-ARMv6-NEXT: ldr r0, [r0, #13]
 ; CHECK-ARMv6-NEXT: bx  lr
   %tmp = add nuw nsw i32 %i, 4

Modified: llvm/trunk/test/CodeGen/SystemZ/buildvector-00.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/SystemZ/buildvector-00.ll?rev=359476&r1=359475&r2=359476&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/SystemZ/buildvector-00.ll (original)
+++ llvm/trunk/test/CodeGen/SystemZ/buildvector-00.ll Mon Apr 29 10:50:10 2019
@@ -13,7 +13,7 @@ define void @f1(<2 x i64> %a0) {
 ; CHECK-NEXT:    vn %v0, %v0, %v0
 ; CHECK-NEXT:    vno %v2, %v2, %v2
 ; CHECK-NEXT:    vceqg %v0, %v0, %v1
-; CHECK-NEXT:    vx %v0, %v2, %v0
+; CHECK-NEXT:    vx %v0, %v0, %v2
 ; CHECK-NEXT:    vnc %v0, %v2, %v0
 ; CHECK-NEXT:    vlgvf %r0, %v0, 1
 ; CHECK-NEXT:    tmll %r0, 1

Modified: llvm/trunk/test/CodeGen/Thumb2/constant-hoisting.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb2/constant-hoisting.ll?rev=359476&r1=359475&r2=359476&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/Thumb2/constant-hoisting.ll (original)
+++ llvm/trunk/test/CodeGen/Thumb2/constant-hoisting.ll Mon Apr 29 10:50:10 2019
@@ -17,16 +17,16 @@ define i32 @test_values(i32 %a, i32 %b)
 ; CHECK-V6M-NEXT:    adds r0, r1, r0
 ; CHECK-V6M-NEXT:    bx lr
 ; CHECK-V6M-NEXT:  .LBB0_5:
-; CHECK-V6M-NEXT:    adds r0, r1, r0
+; CHECK-V6M-NEXT:    adds r0, r0, r1
 ; CHECK-V6M-NEXT:    adds r0, r0, #4
 ; CHECK-V6M-NEXT:  .LBB0_6:
 ; CHECK-V6M-NEXT:    bx lr
 ; CHECK-V6M-NEXT:  .LBB0_7:
-; CHECK-V6M-NEXT:    adds r0, r1, r0
+; CHECK-V6M-NEXT:    adds r0, r0, r1
 ; CHECK-V6M-NEXT:    adds r0, r0, #1
 ; CHECK-V6M-NEXT:    bx lr
 ; CHECK-V6M-NEXT:  .LBB0_8:
-; CHECK-V6M-NEXT:    adds r0, r1, r0
+; CHECK-V6M-NEXT:    adds r0, r0, r1
 ; CHECK-V6M-NEXT:    adds r0, r0, #2
 ; CHECK-V6M-NEXT:    bx lr
 ; CHECK-V6M-NEXT:    .p2align 2

Modified: llvm/trunk/test/CodeGen/WebAssembly/address-offsets.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/WebAssembly/address-offsets.ll?rev=359476&r1=359475&r2=359476&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/WebAssembly/address-offsets.ll (original)
+++ llvm/trunk/test/CodeGen/WebAssembly/address-offsets.ll Mon Apr 29 10:50:10 2019
@@ -165,10 +165,10 @@ define i32 @load_test9() {
 ; NON-PIC-NEXT:  i32.load  $push4=, 0($pop3){{$}}
 ; NON-PIC-NEXT:  return    $pop4{{$}}
 
-; PIC-NEXT:   global.get $push2=, g at GOT{{$}}
 ; PIC-NEXT:   i32.const $push0=, 2{{$}}
 ; PIC-NEXT:   i32.shl   $push1=, $0, $pop0{{$}}
-; PIC-NEXT:   i32.add   $push3=, $pop2, $pop1{{$}}
+; PIC-NEXT:   global.get $push2=, g at GOT{{$}}
+; PIC-NEXT:   i32.add   $push3=, $pop1, $pop2{{$}}
 ; PIC-NEXT:   i32.const $push4=, -40{{$}}
 ; PIC-NEXT:   i32.add   $push5=, $pop3, $pop4{{$}}
 ; PIC-NEXT:   i32.load  $push6=, 0($pop5){{$}}
@@ -206,7 +206,7 @@ define i32 @load_test11_noinbounds(i32*
 ; CHECK-NEXT: .functype load_test12 (i32, i32) -> (i32){{$}}
 ; CHECK-NEXT: i32.const $push0=, 2{{$}}
 ; CHECK-NEXT: i32.shl   $push1=, $1, $pop0{{$}}
-; CHECK-NEXT: i32.add   $push2=, $0, $pop1{{$}}
+; CHECK-NEXT: i32.add   $push2=, $pop1, $0{{$}}
 ; CHECK-NEXT: i32.const $push3=, 40{{$}}
 ; CHECK-NEXT: i32.add   $push4=, $pop2, $pop3{{$}}
 ; CHECK-NEXT: i32.load  $push5=, 0($pop4){{$}}
@@ -222,7 +222,7 @@ define i32 @load_test12(i32* %p, i32 %n)
 ; CHECK-NEXT: .functype load_test13 (i32, i32) -> (i32){{$}}
 ; CHECK-NEXT: i32.const $push0=, 2{{$}}
 ; CHECK-NEXT: i32.shl   $push1=, $1, $pop0{{$}}
-; CHECK-NEXT: i32.add   $push2=, $0, $pop1{{$}}
+; CHECK-NEXT: i32.add   $push2=, $pop1, $0{{$}}
 ; CHECK-NEXT: i32.const $push3=, 40{{$}}
 ; CHECK-NEXT: i32.add   $push4=, $pop2, $pop3{{$}}
 ; CHECK-NEXT: i32.load  $push5=, 0($pop4){{$}}
@@ -284,7 +284,7 @@ define i32 @load_test16(i32* %p, i32 %n)
 ; CHECK-NEXT: .functype load_test17 (i32, i32) -> (i32){{$}}
 ; CHECK-NEXT: i32.const $push0=, 2{{$}}
 ; CHECK-NEXT: i32.shl   $push1=, $1, $pop0{{$}}
-; CHECK-NEXT: i32.add   $push2=, $0, $pop1{{$}}
+; CHECK-NEXT: i32.add   $push2=, $pop1, $0{{$}}
 ; CHECK-NEXT: i32.const $push3=, 40{{$}}
 ; CHECK-NEXT: i32.add   $push4=, $pop2, $pop3{{$}}
 ; CHECK-NEXT: i32.load  $push5=, 0($pop4){{$}}
@@ -314,7 +314,7 @@ define i32 @load_test18(i32* %p, i32 %n)
 ; CHECK-NEXT: .functype load_test19 (i32, i32) -> (i32){{$}}
 ; CHECK-NEXT: i32.const $push0=, 2{{$}}
 ; CHECK-NEXT: i32.shl   $push1=, $1, $pop0{{$}}
-; CHECK-NEXT: i32.add   $push2=, $0, $pop1{{$}}
+; CHECK-NEXT: i32.add   $push2=, $pop1, $0{{$}}
 ; CHECK-NEXT: i32.const $push3=, 40{{$}}
 ; CHECK-NEXT: i32.add   $push4=, $pop2, $pop3{{$}}
 ; CHECK-NEXT: i32.load  $push5=, 0($pop4){{$}}
@@ -342,7 +342,7 @@ define i32 @load_test20(i32* %p) {
 ; CHECK-NEXT: .functype load_test21 (i32, i32) -> (i32){{$}}
 ; CHECK-NEXT: i32.const $push0=, 2{{$}}
 ; CHECK-NEXT: i32.shl   $push1=, $1, $pop0{{$}}
-; CHECK-NEXT: i32.add   $push2=, $0, $pop1{{$}}
+; CHECK-NEXT: i32.add   $push2=, $pop1, $0{{$}}
 ; CHECK-NEXT: i32.const $push3=, -40{{$}}
 ; CHECK-NEXT: i32.add   $push4=, $pop2, $pop3{{$}}
 ; CHECK-NEXT: i32.load  $push5=, 0($pop4){{$}}
@@ -501,10 +501,10 @@ define void @store_test9(i32 %i) {
 ; NON-PIC-NEXT:  i32.const $push2=, g-40{{$}}
 ; NON-PIC-NEXT:  i32.add   $push3=, $pop1, $pop2{{$}}
 ; NON-PIC-NEXT:  i32.store 0($pop3), $1{{$}}
-; PIC-NEXT: global.get $push2=, g at GOT{{$}}
 ; PIC-NEXT: i32.const  $push0=, 2{{$}}
 ; PIC-NEXT: i32.shl    $push1=, $0, $pop0{{$}}
-; PIC-NEXT: i32.add    $push3=, $pop2, $pop1{{$}}
+; PIC-NEXT: global.get $push2=, g at GOT{{$}}
+; PIC-NEXT: i32.add    $push3=, $pop1, $pop2{{$}}
 ; PIC-NEXT: i32.const  $push4=, -40{{$}}
 ; PIC-NEXT: i32.add    $push5=, $pop3, $pop4{{$}}
 ; PIC-NEXT: i32.store  0($pop5), $1{{$}}
@@ -542,7 +542,7 @@ define void @store_test11_noinbounds(i32
 ; CHECK-NEXT: .functype store_test12 (i32, i32, i32) -> (){{$}}
 ; NON-PIC-NEXT:  i32.const $push0=, 2{{$}}
 ; NON-PIC-NEXT:  i32.shl   $push1=, $1, $pop0{{$}}
-; NON-PIC-NEXT:  i32.add   $push2=, $0, $pop1{{$}}
+; NON-PIC-NEXT:  i32.add   $push2=, $pop1, $0{{$}}
 ; NON-PIC-NEXT:  i32.const $push3=, 40{{$}}
 ; NON-PIC-NEXT:  i32.add   $push4=, $pop2, $pop3{{$}}
 ; NON-PIC-NEXT:  i32.store 0($pop4), $2{{$}}
@@ -558,7 +558,7 @@ define void @store_test12(i32* %p, i32 %
 ; CHECK-NEXT: .functype store_test13 (i32, i32, i32) -> (){{$}}
 ; NON-PIC-NEXT:  i32.const $push0=, 2{{$}}
 ; NON-PIC-NEXT:  i32.shl   $push1=, $1, $pop0{{$}}
-; NON-PIC-NEXT:  i32.add   $push2=, $0, $pop1{{$}}
+; NON-PIC-NEXT:  i32.add   $push2=, $pop1, $0{{$}}
 ; NON-PIC-NEXT:  i32.const $push3=, 40{{$}}
 ; NON-PIC-NEXT:  i32.add   $push4=, $pop2, $pop3{{$}}
 ; NON-PIC-NEXT:  i32.store 0($pop4), $2{{$}}
@@ -620,7 +620,7 @@ define void @store_test16(i32* %p, i32 %
 ; CHECK-NEXT: .functype store_test17 (i32, i32, i32) -> (){{$}}
 ; NON-PIC-NEXT:  i32.const $push0=, 2{{$}}
 ; NON-PIC-NEXT:  i32.shl   $push1=, $1, $pop0{{$}}
-; NON-PIC-NEXT:  i32.add   $push2=, $0, $pop1{{$}}
+; NON-PIC-NEXT:  i32.add   $push2=, $pop1, $0{{$}}
 ; NON-PIC-NEXT:  i32.const $push3=, 40{{$}}
 ; NON-PIC-NEXT:  i32.add   $push4=, $pop2, $pop3{{$}}
 ; NON-PIC-NEXT:  i32.store 0($pop4), $2{{$}}
@@ -650,7 +650,7 @@ define void @store_test18(i32* %p, i32 %
 ; CHECK-NEXT: .functype store_test19 (i32, i32, i32) -> (){{$}}
 ; NON-PIC-NEXT:  i32.const $push0=, 2{{$}}
 ; NON-PIC-NEXT:  i32.shl   $push1=, $1, $pop0{{$}}
-; NON-PIC-NEXT:  i32.add   $push2=, $0, $pop1{{$}}
+; NON-PIC-NEXT:  i32.add   $push2=, $pop1, $0{{$}}
 ; NON-PIC-NEXT:  i32.const $push3=, 40{{$}}
 ; NON-PIC-NEXT:  i32.add   $push4=, $pop2, $pop3{{$}}
 ; NON-PIC-NEXT:  i32.store 0($pop4), $2{{$}}
@@ -678,7 +678,7 @@ define void @store_test20(i32* %p, i32 %
 ; CHECK-NEXT: .functype store_test21 (i32, i32, i32) -> (){{$}}
 ; NON-PIC-NEXT:  i32.const $push0=, 2{{$}}
 ; NON-PIC-NEXT:  i32.shl   $push1=, $1, $pop0{{$}}
-; NON-PIC-NEXT:  i32.add   $push2=, $0, $pop1{{$}}
+; NON-PIC-NEXT:  i32.add   $push2=, $pop1, $0{{$}}
 ; NON-PIC-NEXT:  i32.const $push3=, -40{{$}}
 ; NON-PIC-NEXT:  i32.add   $push4=, $pop2, $pop3{{$}}
 ; NON-PIC-NEXT:  i32.store 0($pop4), $2{{$}}

Modified: llvm/trunk/test/CodeGen/X86/add-ext.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/add-ext.ll?rev=359476&r1=359475&r2=359476&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/add-ext.ll (original)
+++ llvm/trunk/test/CodeGen/X86/add-ext.ll Mon Apr 29 10:50:10 2019
@@ -26,7 +26,7 @@ define i64 @add_nsw_sext_add(i32 %i, i64
 ; CHECK-LABEL: add_nsw_sext_add:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movslq %edi, %rax
-; CHECK-NEXT:    leaq 5(%rsi,%rax), %rax
+; CHECK-NEXT:    leaq 5(%rax,%rsi), %rax
 ; CHECK-NEXT:    retq
 
   %add = add nsw i32 %i, 5
@@ -73,7 +73,7 @@ define i8* @gep8(i32 %i, i8* %x) {
 ; CHECK-LABEL: gep8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movslq %edi, %rax
-; CHECK-NEXT:    leaq 5(%rsi,%rax), %rax
+; CHECK-NEXT:    leaq 5(%rax,%rsi), %rax
 ; CHECK-NEXT:    retq
 
   %add = add nsw i32 %i, 5
@@ -128,7 +128,7 @@ define i128* @gep128(i32 %i, i128* %x) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movslq %edi, %rax
 ; CHECK-NEXT:    shlq $4, %rax
-; CHECK-NEXT:    leaq 80(%rsi,%rax), %rax
+; CHECK-NEXT:    leaq 80(%rax,%rsi), %rax
 ; CHECK-NEXT:    retq
 
   %add = add nsw i32 %i, 5
@@ -169,12 +169,13 @@ define void @PR20134(i32* %a, i32 %i) {
 
 ; The same as @PR20134 but sign extension is replaced with zero extension
 define void @PR20134_zext(i32* %a, i32 %i) {
-; CHECK: # %bb.0:
-; CHECK-NEXT: movl %esi, %eax
-; CHECK-NEXT: movl 4(%rdi,%rax,4), %ecx
-; CHECK-NEXT: addl 8(%rdi,%rax,4), %ecx
-; CHECK-NEXT: movl %ecx, (%rdi,%rax,4)
-; CHECK-NEXT: retq
+; CHECK-LABEL: PR20134_zext:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movl %esi, %eax
+; CHECK-NEXT:    movl 4(%rdi,%rax,4), %ecx
+; CHECK-NEXT:    addl 8(%rdi,%rax,4), %ecx
+; CHECK-NEXT:    movl %ecx, (%rdi,%rax,4)
+; CHECK-NEXT:    retq
 
   %add1 = add nuw i32 %i, 1
   %idx1 = zext i32 %add1 to i64

Modified: llvm/trunk/test/CodeGen/X86/combine-multiplies.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/combine-multiplies.ll?rev=359476&r1=359475&r2=359476&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/combine-multiplies.ll (original)
+++ llvm/trunk/test/CodeGen/X86/combine-multiplies.ll Mon Apr 29 10:50:10 2019
@@ -38,10 +38,10 @@ define void @testCombineMultiplies([100
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; CHECK-NEXT:    imull $400, %ecx, %edx # imm = 0x190
-; CHECK-NEXT:    leal (%eax,%edx), %esi
+; CHECK-NEXT:    leal (%edx,%eax), %esi
 ; CHECK-NEXT:    movl $11, 2020(%esi,%ecx,4)
-; CHECK-NEXT:    movl $22, 2080(%eax,%edx)
-; CHECK-NEXT:    movl $33, 10080(%eax,%edx)
+; CHECK-NEXT:    movl $22, 2080(%edx,%eax)
+; CHECK-NEXT:    movl $33, 10080(%edx,%eax)
 ; CHECK-NEXT:    popl %esi
 ; CHECK-NEXT:    retl
 entry:

Modified: llvm/trunk/test/CodeGen/X86/load-combine.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/load-combine.ll?rev=359476&r1=359475&r2=359476&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/load-combine.ll (original)
+++ llvm/trunk/test/CodeGen/X86/load-combine.ll Mon Apr 29 10:50:10 2019
@@ -966,7 +966,7 @@ define i32 @load_i32_by_i8_base_offset_i
 ; CHECK64-LABEL: load_i32_by_i8_base_offset_index_2:
 ; CHECK64:       # %bb.0:
 ; CHECK64-NEXT:    movl %esi, %eax
-; CHECK64-NEXT:    movl 13(%rdi,%rax), %eax
+; CHECK64-NEXT:    movl 13(%rax,%rdi), %eax
 ; CHECK64-NEXT:    retq
   %tmp = add nuw nsw i32 %i, 4
   %tmp2 = add nuw nsw i32 %i, 3
@@ -1016,7 +1016,7 @@ define i32 @load_i32_by_i8_zaext_loads(i
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; CHECK-NEXT:    movl 12(%eax,%ecx), %eax
+; CHECK-NEXT:    movl 12(%ecx,%eax), %eax
 ; CHECK-NEXT:    retl
 ;
 ; CHECK64-LABEL: load_i32_by_i8_zaext_loads:
@@ -1072,7 +1072,7 @@ define i32 @load_i32_by_i8_zsext_loads(i
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; CHECK-NEXT:    movl 12(%eax,%ecx), %eax
+; CHECK-NEXT:    movl 12(%ecx,%eax), %eax
 ; CHECK-NEXT:    retl
 ;
 ; CHECK64-LABEL: load_i32_by_i8_zsext_loads:

Modified: llvm/trunk/test/CodeGen/X86/lsr-loop-exit-cond.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/lsr-loop-exit-cond.ll?rev=359476&r1=359475&r2=359476&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/lsr-loop-exit-cond.ll (original)
+++ llvm/trunk/test/CodeGen/X86/lsr-loop-exit-cond.ll Mon Apr 29 10:50:10 2019
@@ -66,7 +66,7 @@ define void @t(i8* nocapture %in, i8* no
 ; GENERIC-NEXT:    movzbl 2(%r8,%rbx,4), %ebx
 ; GENERIC-NEXT:    shll $16, %ebx
 ; GENERIC-NEXT:    orl %eax, %ebx
-; GENERIC-NEXT:    xorl 16(%rdx,%rcx), %ebx
+; GENERIC-NEXT:    xorl 16(%rcx,%rdx), %ebx
 ; GENERIC-NEXT:    shrl $8, %edi
 ; GENERIC-NEXT:    movzbl 3(%r9,%rdi,4), %eax
 ; GENERIC-NEXT:    shll $24, %eax
@@ -74,7 +74,7 @@ define void @t(i8* nocapture %in, i8* no
 ; GENERIC-NEXT:    movzbl 2(%r8,%rdi,4), %edi
 ; GENERIC-NEXT:    shll $16, %edi
 ; GENERIC-NEXT:    orl %eax, %edi
-; GENERIC-NEXT:    xorl 20(%rdx,%rcx), %edi
+; GENERIC-NEXT:    xorl 20(%rcx,%rdx), %edi
 ; GENERIC-NEXT:    movl %ebx, %eax
 ; GENERIC-NEXT:    shrl $24, %eax
 ; GENERIC-NEXT:    movb %al, (%rsi)
@@ -156,8 +156,8 @@ define void @t(i8* nocapture %in, i8* no
 ; ATOM-NEXT:    shll $16, %eax
 ; ATOM-NEXT:    orl %edi, %ebp
 ; ATOM-NEXT:    orl %r15d, %eax
-; ATOM-NEXT:    xorl 20(%rdx,%rcx), %ebp
-; ATOM-NEXT:    xorl 16(%rdx,%rcx), %eax
+; ATOM-NEXT:    xorl 20(%rcx,%rdx), %ebp
+; ATOM-NEXT:    xorl 16(%rcx,%rdx), %eax
 ; ATOM-NEXT:    movl %eax, %edi
 ; ATOM-NEXT:    shrl $16, %eax
 ; ATOM-NEXT:    shrl $24, %edi

Modified: llvm/trunk/test/CodeGen/X86/merge_store.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/merge_store.ll?rev=359476&r1=359475&r2=359476&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/merge_store.ll (original)
+++ llvm/trunk/test/CodeGen/X86/merge_store.ll Mon Apr 29 10:50:10 2019
@@ -44,7 +44,7 @@ entry:
 define void @indexed_store_merge(i64 %p, i8* %v) {
 ; CHECK-LABEL: indexed_store_merge:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    movl $0, 2(%rsi,%rdi)
+; CHECK-NEXT:    movl $0, 2(%rdi,%rsi)
 ; CHECK-NEXT:    movb $0, (%rsi)
 ; CHECK-NEXT:    retq
 entry:

Modified: llvm/trunk/test/CodeGen/X86/sad.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sad.ll?rev=359476&r1=359475&r2=359476&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sad.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sad.ll Mon Apr 29 10:50:10 2019
@@ -1403,18 +1403,18 @@ define i32 @sad_unroll_nonzero_initial(<
 ; SSE2-NEXT:    movdqu (%rdi), %xmm0
 ; SSE2-NEXT:    movdqu (%rsi), %xmm1
 ; SSE2-NEXT:    psadbw %xmm0, %xmm1
+; SSE2-NEXT:    movdqu (%rdx), %xmm0
+; SSE2-NEXT:    movdqu (%rcx), %xmm2
+; SSE2-NEXT:    psadbw %xmm0, %xmm2
 ; SSE2-NEXT:    movl $1, %eax
 ; SSE2-NEXT:    movd %eax, %xmm0
-; SSE2-NEXT:    paddd %xmm1, %xmm0
-; SSE2-NEXT:    movdqu (%rdx), %xmm1
-; SSE2-NEXT:    movdqu (%rcx), %xmm2
-; SSE2-NEXT:    psadbw %xmm1, %xmm2
-; SSE2-NEXT:    paddd %xmm0, %xmm2
-; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
 ; SSE2-NEXT:    paddd %xmm2, %xmm0
-; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
+; SSE2-NEXT:    paddd %xmm1, %xmm0
+; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
 ; SSE2-NEXT:    paddd %xmm0, %xmm1
-; SSE2-NEXT:    movd %xmm1, %eax
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
+; SSE2-NEXT:    paddd %xmm1, %xmm0
+; SSE2-NEXT:    movd %xmm0, %eax
 ; SSE2-NEXT:    retq
 ;
 ; AVX1-LABEL: sad_unroll_nonzero_initial:
@@ -1425,8 +1425,8 @@ define i32 @sad_unroll_nonzero_initial(<
 ; AVX1-NEXT:    vpsadbw (%rcx), %xmm1, %xmm1
 ; AVX1-NEXT:    movl $1, %eax
 ; AVX1-NEXT:    vmovd %eax, %xmm2
-; AVX1-NEXT:    vpaddd %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vpaddd %xmm0, %xmm1, %xmm0
+; AVX1-NEXT:    vpaddd %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
 ; AVX1-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
@@ -1438,12 +1438,12 @@ define i32 @sad_unroll_nonzero_initial(<
 ; AVX2:       # %bb.0: # %bb
 ; AVX2-NEXT:    vmovdqu (%rdi), %xmm0
 ; AVX2-NEXT:    vpsadbw (%rsi), %xmm0, %xmm0
-; AVX2-NEXT:    movl $1, %eax
-; AVX2-NEXT:    vmovd %eax, %xmm1
-; AVX2-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    vmovdqu (%rdx), %xmm1
 ; AVX2-NEXT:    vpsadbw (%rcx), %xmm1, %xmm1
-; AVX2-NEXT:    vpaddd %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    movl $1, %eax
+; AVX2-NEXT:    vmovd %eax, %xmm2
+; AVX2-NEXT:    vpaddd %ymm2, %ymm1, %ymm1
+; AVX2-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
 ; AVX2-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
@@ -1458,12 +1458,12 @@ define i32 @sad_unroll_nonzero_initial(<
 ; AVX512:       # %bb.0: # %bb
 ; AVX512-NEXT:    vmovdqu (%rdi), %xmm0
 ; AVX512-NEXT:    vpsadbw (%rsi), %xmm0, %xmm0
-; AVX512-NEXT:    movl $1, %eax
-; AVX512-NEXT:    vmovd %eax, %xmm1
-; AVX512-NEXT:    vpaddd %zmm1, %zmm0, %zmm0
 ; AVX512-NEXT:    vmovdqu (%rdx), %xmm1
 ; AVX512-NEXT:    vpsadbw (%rcx), %xmm1, %xmm1
-; AVX512-NEXT:    vpaddd %zmm0, %zmm1, %zmm0
+; AVX512-NEXT:    movl $1, %eax
+; AVX512-NEXT:    vmovd %eax, %xmm2
+; AVX512-NEXT:    vpaddd %zmm2, %zmm1, %zmm1
+; AVX512-NEXT:    vpaddd %zmm1, %zmm0, %zmm0
 ; AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
 ; AVX512-NEXT:    vpaddd %zmm1, %zmm0, %zmm0
 ; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1

Modified: llvm/trunk/test/CodeGen/X86/vector-ext-logic.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-ext-logic.ll?rev=359476&r1=359475&r2=359476&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-ext-logic.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-ext-logic.ll Mon Apr 29 10:50:10 2019
@@ -146,7 +146,7 @@ define <8 x i16> @zext_and_v8i16(<8 x i8
 ;
 ; AVX2-LABEL: zext_and_v8i16:
 ; AVX2:       # %bb.0:
-; AVX2-NEXT:    vandps %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    vandps %xmm0, %xmm1, %xmm0
 ; AVX2-NEXT:    vandps {{.*}}(%rip), %xmm0, %xmm0
 ; AVX2-NEXT:    retq
   %xz = zext <8 x i8> %x to <8 x i16>