[PATCH] D127392: [AggressiveInstCombine] Combine consecutive loads which are being merged to form a wider load.

Mon Sep 26 11:01:52 PDT 2022

aeubanks added a comment.

it ended up being a function that we always compile with -O3...

anyway, reduced test case

  $ cat /tmp/a.ll
  target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"                                                                                                       
  target triple = "x86_64-grtev4-linux-gnu"                                                                                                                                                          

  define i64 @eggs(ptr noundef readonly %arg) {                                                                                                                                                      
    %tmp3 = load i8, ptr %arg, align 1                                                             
    %tmp4 = getelementptr inbounds i8, ptr %arg, i64 1                                             
    %tmp5 = load i8, ptr %tmp4, align 1                                                            
    %tmp6 = getelementptr inbounds i8, ptr %arg, i64 2                                             
    %tmp7 = load i8, ptr %tmp6, align 1                                                            
    %tmp8 = getelementptr inbounds i8, ptr %arg, i64 3                                             
    %tmp9 = load i8, ptr %tmp8, align 1                                                            
    %tmp10 = getelementptr inbounds i8, ptr %arg, i64 4                                            
    %tmp11 = load i8, ptr %tmp10, align 1                                                          
    %tmp12 = getelementptr inbounds i8, ptr %arg, i64 5                                            
    %tmp13 = load i8, ptr %tmp12, align 1                                                          
    %tmp14 = getelementptr inbounds i8, ptr %arg, i64 6                                            
    %tmp15 = load i8, ptr %tmp14, align 1                                                          
    %tmp16 = getelementptr inbounds i8, ptr %arg, i64 7                                            
    %tmp17 = load i8, ptr %tmp16, align 1
    %tmp18 = zext i8 %tmp17 to i64     
    %tmp19 = shl nuw i64 %tmp18, 56    
    %tmp20 = zext i8 %tmp15 to i64     
    %tmp21 = shl nuw nsw i64 %tmp20, 48
    %tmp22 = or i64 %tmp19, %tmp21     
    %tmp23 = zext i8 %tmp13 to i64     
    %tmp24 = shl nuw nsw i64 %tmp23, 40
    %tmp25 = or i64 %tmp22, %tmp24     
    %tmp26 = zext i8 %tmp11 to i64     
    %tmp27 = shl nuw nsw i64 %tmp26, 32
    %tmp28 = or i64 %tmp25, %tmp27     
    %tmp29 = zext i8 %tmp9 to i64      
    %tmp30 = shl nuw nsw i64 %tmp29, 24
    %tmp31 = or i64 %tmp28, %tmp30     
    %tmp32 = zext i8 %tmp7 to i64      
    %tmp33 = shl nuw nsw i64 %tmp32, 16
    %tmp34 = zext i8 %tmp5 to i64     
    %tmp35 = shl nuw nsw i64 %tmp34, 8
    %tmp36 = or i64 %tmp31, %tmp33
    %tmp37 = zext i8 %tmp3 to i64 
    %tmp38 = or i64 %tmp36, %tmp35                                                                                                                                                                   
    %tmp39 = or i64 %tmp38, %tmp37                                                                                                                                                                   
    ret i64 %tmp39                                                                                                                                                                                   
  }
  $ ./build/rel/bin/opt -passes=aggressive-instcombine -S /tmp/a.ll
  ; ModuleID = '/tmp/b.ll'
  source_filename = "/tmp/a.ll"
  target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
  target triple = "x86_64-grtev4-linux-gnu"

  define i64 @eggs(ptr noundef readonly %arg) {
    %tmp3 = load i8, ptr %arg, align 1
    %tmp4 = getelementptr inbounds i8, ptr %arg, i64 1
    %tmp5 = load i32, ptr %tmp4, align 1
    %1 = zext i32 %tmp5 to i64
    %2 = shl i64 %1, 8
    %tmp37 = zext i8 %tmp3 to i64
    %tmp39 = or i64 %2, %tmp37
    ret i64 %tmp39
  }

that does look wrong, it looks like it should be optimized to `load i64` rather than `zext(load i32 (%a + 1)) | zext(load i8 %a)`

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D127392/new/

https://reviews.llvm.org/D127392