define i32 @sad(i8* nocapture readonly %pix1, i8* nocapture readonly %pix2) #0 { entry: br label %vector.body vector.body: ; preds = %entry %0 = bitcast i8* %pix1 to <4 x i8>* %wide.load = load <4 x i8>* %0, align 1 %1 = zext <4 x i8> %wide.load to <4 x i32> %2 = bitcast i8* %pix2 to <4 x i8>* %wide.load7 = load <4 x i8>* %2, align 1 %3 = zext <4 x i8> %wide.load7 to <4 x i32> %4 = sub nsw <4 x i32> %1, %3 %5 = icmp uge <4 x i8> %wide.load, %wide.load7 %6 = sub nsw <4 x i32> zeroinitializer, %4 %7 = select <4 x i1> %5, <4 x i32> %4, <4 x i32> %6 %8 = getelementptr inbounds i8* %pix1, i64 4 %9 = bitcast i8* %8 to <4 x i8>* %wide.load.1 = load <4 x i8>* %9, align 1 %10 = zext <4 x i8> %wide.load.1 to <4 x i32> %11 = getelementptr inbounds i8* %pix2, i64 4 %12 = bitcast i8* %11 to <4 x i8>* %wide.load7.1 = load <4 x i8>* %12, align 1 %13 = zext <4 x i8> %wide.load7.1 to <4 x i32> %14 = sub nsw <4 x i32> %10, %13 %15 = icmp uge <4 x i8> %wide.load.1, %wide.load7.1 %16 = sub nsw <4 x i32> zeroinitializer, %14 %17 = select <4 x i1> %15, <4 x i32> %14, <4 x i32> %16 %18 = add nsw <4 x i32> %17, %7 %19 = getelementptr inbounds i8* %pix1, i64 8 %20 = bitcast i8* %19 to <4 x i8>* %wide.load.2 = load <4 x i8>* %20, align 1 %21 = zext <4 x i8> %wide.load.2 to <4 x i32> %22 = getelementptr inbounds i8* %pix2, i64 8 %23 = bitcast i8* %22 to <4 x i8>* %wide.load7.2 = load <4 x i8>* %23, align 1 %24 = zext <4 x i8> %wide.load7.2 to <4 x i32> %25 = sub nsw <4 x i32> %21, %24 %26 = icmp uge <4 x i8> %wide.load.2, %wide.load7.2 %27 = sub nsw <4 x i32> zeroinitializer, %25 %28 = select <4 x i1> %26, <4 x i32> %25, <4 x i32> %27 %29 = add nsw <4 x i32> %28, %18 %30 = getelementptr inbounds i8* %pix1, i64 12 %31 = bitcast i8* %30 to <4 x i8>* %wide.load.3 = load <4 x i8>* %31, align 1 %32 = zext <4 x i8> %wide.load.3 to <4 x i32> %33 = getelementptr inbounds i8* %pix2, i64 12 %34 = bitcast i8* %33 to <4 x i8>* %wide.load7.3 = load <4 x i8>* %34, align 1 %35 = zext <4 x i8> %wide.load7.3 to <4 x i32> %36 = sub nsw <4 x i32> %32, %35 %37 = icmp uge <4 x i8> %wide.load.3, %wide.load7.3 %38 = sub nsw <4 x i32> zeroinitializer, %36 %39 = select <4 x i1> %37, <4 x i32> %36, <4 x i32> %38 %40 = add nsw <4 x i32> %39, %29 %rdx.shuf = shufflevector <4 x i32> %40, <4 x i32> undef, <4 x i32> %bin.rdx = add <4 x i32> %40, %rdx.shuf %rdx.shuf8 = shufflevector <4 x i32> %bin.rdx, <4 x i32> undef, <4 x i32> %bin.rdx9 = add <4 x i32> %bin.rdx, %rdx.shuf8 %41 = extractelement <4 x i32> %bin.rdx9, i32 0 ret i32 %41 }