[PATCH] D69044: [X86] Allow up to 4 loads per inline memcmp()

Sat Oct 19 09:44:00 PDT 2019

craig.topper added a comment.

The negative offsets on length256_eq are coming from the memcmp expansion IR.

  *** IR Dump After Expand memcmp() to load/stores ***

  define i1 @length256_eq(i8* %x, i8* %y) #0 {
    %1 = bitcast i8* %x to i512*
    %2 = bitcast i8* %y to i512*
    %3 = load i512, i512* %1
    %4 = load i512, i512* %2
    %5 = xor i512 %3, %4
    %6 = getelementptr i8, i8* %x, i8 64
    %7 = bitcast i8* %6 to i512*
    %8 = getelementptr i8, i8* %y, i8 64
    %9 = bitcast i8* %8 to i512*
    %10 = load i512, i512* %7
    %11 = load i512, i512* %9
    %12 = xor i512 %10, %11
    %13 = getelementptr i8, i8* %x, i8 -128
    %14 = bitcast i8* %13 to i512*
    %15 = getelementptr i8, i8* %y, i8 -128
    %16 = bitcast i8* %15 to i512*
    %17 = load i512, i512* %14
    %18 = load i512, i512* %16
    %19 = xor i512 %17, %18
    %20 = getelementptr i8, i8* %x, i8 -64
    %21 = bitcast i8* %20 to i512*
    %22 = getelementptr i8, i8* %y, i8 -64
    %23 = bitcast i8* %22 to i512*
    %24 = load i512, i512* %21
    %25 = load i512, i512* %23
    %26 = xor i512 %24, %25
    %27 = or i512 %5, %12
    %28 = or i512 %19, %26
    %29 = or i512 %27, %28
    %30 = icmp ne i512 %29, 0
    %31 = zext i1 %30 to i32
    %cmp = icmp ne i32 %31, 0
    ret i1 %cmp
  }

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D69044/new/

https://reviews.llvm.org/D69044