[llvm] 5d79a8d - [X86] fold-and-shift.ll - add x86-64 test coverage

Wed Aug 23 14:17:02 PDT 2023

Author: Simon Pilgrim
Date: 2023-08-23T22:16:54+01:00
New Revision: 5d79a8d148418da0565e6aa665178e6f095dd73e

URL: https://github.com/llvm/llvm-project/commit/5d79a8d148418da0565e6aa665178e6f095dd73e
DIFF: https://github.com/llvm/llvm-project/commit/5d79a8d148418da0565e6aa665178e6f095dd73e.diff

LOG: [X86] fold-and-shift.ll - add x86-64 test coverage

Although we already have fold-and-shift-x86_64.ll - this adds additional test coverage for various and-shift patterns split by sign/zero extensions from i32 index patterns to i64 pointers

Added: 
    

Modified: 
    llvm/test/CodeGen/X86/fold-and-shift.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/X86/fold-and-shift.ll b/llvm/test/CodeGen/X86/fold-and-shift.ll
index 1318de65df3022..41adab63a11a6f 100644

--- a/llvm/test/CodeGen/X86/fold-and-shift.ll
+++ b/llvm/test/CodeGen/X86/fold-and-shift.ll
@@ -1,15 +1,21 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=i686-- | FileCheck %s
+; RUN: llc < %s -mtriple=i686-- | FileCheck %s --check-prefixes=X86
+; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s --check-prefixes=X64
 
 define i32 @t1(ptr %X, i32 %i) {
-; CHECK-LABEL: t1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; CHECK-NEXT:    movzbl %cl, %ecx
-; CHECK-NEXT:    movl (%eax,%ecx,4), %eax
-; CHECK-NEXT:    retl
-
+; X86-LABEL: t1:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movzbl %cl, %ecx
+; X86-NEXT:    movl (%eax,%ecx,4), %eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: t1:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movzbl %sil, %eax
+; X64-NEXT:    movl (%rdi,%rax,4), %eax
+; X64-NEXT:    retq
 entry:
   %tmp2 = shl i32 %i, 2
   %tmp4 = and i32 %tmp2, 1020
@@ -19,14 +25,20 @@ entry:
 }
 
 define i32 @t2(ptr %X, i32 %i) {
-; CHECK-LABEL: t2:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; CHECK-NEXT:    movzwl %cx, %ecx
-; CHECK-NEXT:    movl (%eax,%ecx,4), %eax
-; CHECK-NEXT:    retl
-
+; X86-LABEL: t2:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movzwl %cx, %ecx
+; X86-NEXT:    movl (%eax,%ecx,4), %eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: t2:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movzwl %si, %eax
+; X64-NEXT:    addl %eax, %eax
+; X64-NEXT:    movl (%rdi,%rax,2), %eax
+; X64-NEXT:    retq
 entry:
   %tmp2 = shl i32 %i, 1
   %tmp4 = and i32 %tmp2, 131070
@@ -35,23 +47,30 @@ entry:
   ret i32 %tmp9
 }
 
-define i32 @t3(ptr %i.ptr, ptr %arr) {
 ; This case is tricky. The lshr followed by a gep will produce a lshr followed
 ; by an and to remove the low bits. This can be simplified by doing the lshr by
 ; a greater constant and using the addressing mode to scale the result back up.
 ; To make matters worse, because of the two-phase zext of %i and their reuse in
 ; the function, the DAG can get confusing trying to re-use both of them and
 ; prevent easy analysis of the mask in order to match this.
-; CHECK-LABEL: t3:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT:    movzwl (%eax), %eax
-; CHECK-NEXT:    movl %eax, %edx
-; CHECK-NEXT:    shrl $11, %edx
-; CHECK-NEXT:    addl (%ecx,%edx,4), %eax
-; CHECK-NEXT:    retl
-
+define i32 @t3(ptr %i.ptr, ptr %arr) {
+; X86-LABEL: t3:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movzwl (%eax), %eax
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:    shrl $11, %edx
+; X86-NEXT:    addl (%ecx,%edx,4), %eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: t3:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movzwl (%rdi), %eax
+; X64-NEXT:    movl %eax, %ecx
+; X64-NEXT:    shrl $11, %ecx
+; X64-NEXT:    addl (%rsi,%rcx,4), %eax
+; X64-NEXT:    retq
 entry:
   %i = load i16, ptr %i.ptr
   %i.zext = zext i16 %i to i32
@@ -62,20 +81,28 @@ entry:
   ret i32 %sum
 }
 
-define i32 @t4(ptr %i.ptr, ptr %arr) {
 ; A version of @t3 that has more zero extends and more re-use of intermediate
 ; values. This exercise slightly 
diff erent bits of canonicalization.
-; CHECK-LABEL: t4:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT:    movzwl (%eax), %eax
-; CHECK-NEXT:    movl %eax, %edx
-; CHECK-NEXT:    shrl $11, %edx
-; CHECK-NEXT:    addl (%ecx,%edx,4), %eax
-; CHECK-NEXT:    addl %edx, %eax
-; CHECK-NEXT:    retl
-
+define i32 @t4(ptr %i.ptr, ptr %arr) {
+; X86-LABEL: t4:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movzwl (%eax), %eax
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:    shrl $11, %edx
+; X86-NEXT:    addl (%ecx,%edx,4), %eax
+; X86-NEXT:    addl %edx, %eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: t4:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movzwl (%rdi), %eax
+; X64-NEXT:    movl %eax, %ecx
+; X64-NEXT:    shrl $11, %ecx
+; X64-NEXT:    addl (%rsi,%rcx,4), %eax
+; X64-NEXT:    addl %ecx, %eax
+; X64-NEXT:    retq
 entry:
   %i = load i16, ptr %i.ptr
   %i.zext = zext i16 %i to i32
@@ -89,14 +116,21 @@ entry:
 }
 
 define i8 @t5(ptr %X, i32 %i) {
-; CHECK-LABEL: t5:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; CHECK-NEXT:    andl $-14, %ecx
-; CHECK-NEXT:    movzbl (%eax,%ecx,4), %eax
-; CHECK-NEXT:    retl
-
+; X86-LABEL: t5:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    andl $-14, %ecx
+; X86-NEXT:    movzbl (%eax,%ecx,4), %eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: t5:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    shll $2, %esi
+; X64-NEXT:    andl $-56, %esi
+; X64-NEXT:    movslq %esi, %rax
+; X64-NEXT:    movzbl (%rdi,%rax), %eax
+; X64-NEXT:    retq
 entry:
   %tmp2 = shl i32 %i, 2
   %tmp4 = and i32 %tmp2, -56
@@ -106,14 +140,21 @@ entry:
 }
 
 define i8 @t6(ptr %X, i32 %i) {
-; CHECK-LABEL: t6:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT:    movl $-255, %ecx
-; CHECK-NEXT:    andl {{[0-9]+}}(%esp), %ecx
-; CHECK-NEXT:    movzbl (%eax,%ecx,4), %eax
-; CHECK-NEXT:    retl
-
+; X86-LABEL: t6:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl $-255, %ecx
+; X86-NEXT:    andl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movzbl (%eax,%ecx,4), %eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: t6:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    shll $2, %esi
+; X64-NEXT:    andl $-1020, %esi # imm = 0xFC04
+; X64-NEXT:    movslq %esi, %rax
+; X64-NEXT:    movzbl (%rdi,%rax), %eax
+; X64-NEXT:    retq
 entry:
   %tmp2 = shl i32 %i, 2
   %tmp4 = and i32 %tmp2, -1020